Skip to content

Commit 3fc72be

Browse files
committed
Cleanup hash conditions
1 parent ae29606 commit 3fc72be

File tree

10 files changed

+180
-57
lines changed

10 files changed

+180
-57
lines changed

README.md

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
# MyST Libre
32

43
![PyPI - Version](https://img.shields.io/pypi/v/myst-libre?style=flat&logo=python&logoColor=white&logoSize=8&labelColor=rgb(255%2C0%2C0)&color=white)
@@ -45,21 +44,44 @@ DOCKER_PRIVATE_REGISTRY_PASSWORD=your_password
4544

4645
**Import libraries and define REES resources**
4746

47+
Minimal example to create a rees object:
48+
4849
```python
4950
from myst_libre.tools import JupyterHubLocalSpawner, MystMD
5051
from myst_libre.rees import REES
5152
from myst_libre.builders import MystBuilder
5253

53-
rees_resources = REES(dict(
54+
rees = REES(dict(
5455
registry_url="https://your-registry.io",
55-
gh_user_repo_name = "owner/repository",
56-
gh_repo_commit_hash = "full_SHA_commit_A",
57-
binder_image_tag = "full_SHA_commit_A_or_B",
58-
dotenv = '/path/to/dotenv'))
56+
gh_user_repo_name = "owner/repository"
57+
))
5958
```
6059

61-
> [!NOTE]
62-
> Currently, the assumption is that the Docker image was built by binderhub from a REES-compliant repository that also includes the MyST content. Therefore, `binder_image_tag` and `gh_repo_commit_hash` are simply two different commits in the same (`gh_repo_user_name`) repository. However, `binder_image_tag` is not allowed to be ahead of `gh_repo_commit_hash`.
60+
Other optional parameters that can be passed to the REES constructor:
61+
62+
63+
- `gh_repo_commit_hash`: Full SHA commit hash of the `gh_user_repo_name` repository (optional, default: latest commit)
64+
- `binder_image_tag`: Full SHA commit hash at which a binder tag is available for the "found image name" (optional, default: latest)
65+
- `binder_image_name_override`: Override the "found image name" whose container will be used to build the MyST article (optional, default: None)
66+
- `dotenv`: Path to a directory containing the .env file for authentication credentials to pull images from `registry_url` (optional, default: None)
67+
- `bh_image_prefix`: Binderhub names the images with a prefix, e.g., `<prefix>agahkarakuzu-2dmriscope-7a73fb`, typically set as `binder-`. This will be used in the regex pattern to find the "binderhub built image name" in the `registry_url`. See [reference docs](https://binderhub.readthedocs.io/en/latest/zero-to-binderhub/setup-binderhub.html) for more details.
68+
- `bh_private_project_name`: See [this issue ](https://github.com/jupyterhub/binderhub/issues/800) (optional, default: [`registry_url` without `http://` or `https://`])
69+
70+
71+
Note that in this context what is meant by "prefix" is not the same as in the reference docs. (optional, default: `binder-`)
72+
73+
**Image Selection Order**
74+
75+
1. If the `myst.yml` file in the `gh_user_repo_name` repository contains `project/thebe/binder/repo`, this image is prioritized.
76+
2. If `project/thebe/binder/repo` is not specified, the `gh_user_repo_name` is used as the image name.
77+
78+
Note that if (2) is the case, your build command probably should not be `myst build`, but you can still use other builders, e.g., `jupyter-book build`.
79+
80+
If you specify `binder_image_name_override`, it will be used as the repository name to locate the image.
81+
82+
This allows you to build the MyST article using a runtime from a different repository than the one specified in `gh_user_repo_name`, as defined in `myst.yml` or overridden by `binder_image_name_override`.
83+
84+
The `binder_image_tag` set to `latest` refers to the most recent successful build of an image that meets the specified conditions. The repository content might be more recent than the `binder_image_tag` (e.g., `gh_repo_commit_hash`), but the same binder image can be reused.
6385

6486
**Fetch resources and spawn JupyterHub in the respective container**
6587

example.py

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,32 @@
44

55

66

7-
resources = REES(dict(
8-
registry_url="https://binder-registry.conp.cloud",
9-
gh_user_repo_name = "agahkarakuzu/mriscope",
10-
gh_repo_commit_hash = "ae64d9ed17e6ce66ecf94d585d7b68a19a435d70",
11-
binder_image_tag = "489ae0eb0d08fe30e45bc31201524a6570b9b7dd"))
7+
rees = REES(dict(
8+
registry_url="https://binder-registry.conp.cloud",
9+
gh_user_repo_name = "agahkarakuzu/mriscope",
10+
dotenv = "/Users/agah/Desktop/neurolibre/myst_libre",
11+
bh_project_name = "binder-registry.conp.cloud"
12+
))
1213

1314

14-
hub = JupyterHubLocalSpawner(resources,
15-
host_data_parent_dir = "/Users/agah/Desktop/tmp/DATA",
16-
host_build_source_parent_dir = '/Users/agah/Desktop/tmp',
17-
container_data_mount_dir = '/home/jovyan/data',
18-
container_build_source_mount_dir = '/home/jovyan')
15+
# Under the hood it looks like this:
16+
# if rees.search_img_by_repo_name():
17+
# print("🐳 Image name:",rees.found_image_name)
18+
# print("🏷️ Unsorted tags:",rees.found_image_tags)
1919

20-
hub.spawn_jupyter_hub()
20+
# if rees.get_tags_sorted_by_date():
21+
# print("🏷️ Sorted image tags:",rees.found_image_tags_sorted)
2122

22-
MystBuilder(hub).build_site()
23+
hub = JupyterHubLocalSpawner(rees,
24+
host_build_source_parent_dir = "/Users/agah/Desktop/tmp",
25+
container_build_source_mount_dir = '/home/jovyan',
26+
host_data_parent_dir = "/Users/agah/Desktop/tmp/DATA",
27+
container_data_mount_dir = '/home/jovyan/data')
28+
29+
# # This has to be called
30+
hub_logs = hub.spawn_jupyter_hub()
31+
32+
builder = MystBuilder(hub=hub)
33+
myst_logs = builder.build('--execute','--html')
34+
35+
print(myst_logs)

myst_libre/abstract_class.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,13 @@ def set_log_level(self, level):
3636
self.logging_level = level
3737
self.logger = logging.basicConfig(level=self.logging_level, format='%(asctime)s - %(levelname)s - %(message)s')
3838

39-
def cprint(self, message, color):
39+
def cprint(self, message, color,highlight=None):
4040
"""
4141
Print a message in a specified color using termcolor.
4242
4343
Args:
4444
message (str): The message to print.
4545
color (str): The color to use for printing the message.
46+
highlight (str): The highlight to use for printing the message.
4647
"""
47-
print(colored(message, color))
48+
print(colored(message, color, highlight))

myst_libre/rees/rees.py

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,40 @@ def __init__(self, rees_dict):
1010
# These are needed in the scope of the base classes
1111
self.registry_url = rees_dict['registry_url']
1212
self.gh_user_repo_name = rees_dict['gh_user_repo_name']
13-
self.gh_repo_commit_hash = rees_dict['gh_repo_commit_hash']
14-
self.binder_image_tag = rees_dict['binder_image_tag']
15-
self.binder_image_name = rees_dict.get('binder_image_name', None)
16-
13+
self.gh_repo_commit_hash = rees_dict.get('gh_repo_commit_hash', "latest")
14+
self.binder_image_name_override = rees_dict.get('binder_image_name_override', None)
15+
self.binder_image_tag = rees_dict.get('binder_image_tag', "latest")
16+
self.bh_private_project_name = rees_dict.get('bh_private_project_name', None)
17+
self.bh_image_prefix = rees_dict.get('bh_image_prefix', "binder-")
18+
self.bh_project_name = rees_dict.get('bh_project_name', None)
19+
1720
if 'dotenv' in rees_dict.keys():
1821
self.dotenvloc = rees_dict['dotenv']
1922

2023
# Initialize as base to rees
2124
BuildSourceManager.__init__(self)
2225
DockerRegistryClient.__init__(self)
2326

27+
# Get the latest commit hash from the GitHub API
28+
if self.gh_repo_commit_hash == "latest":
29+
self.cprint("🔎 Searching for latest commit hash in GitHub.","light_blue")
30+
api_url = f'https://api.github.com/repos/{self.gh_user_repo_name}/commits/{self.branch}'
31+
response = self.rest_client.get(api_url)
32+
if response.status_code == 200:
33+
latest_commit_hash = response.json()['sha']
34+
self.gh_repo_commit_hash = latest_commit_hash
35+
self.cprint(f"🎉 Found latest commit hash: {self.gh_repo_commit_hash}","white","on_blue")
36+
37+
if self.search_img_by_repo_name():
38+
if self.binder_image_tag == "latest":
39+
self.cprint("🔎 Searching for latest image tag in registry.","light_blue")
40+
if self.get_tags_sorted_by_date():
41+
self.cprint(f"🎉 Found latest tag in {self.registry_url}","light_blue")
42+
self.cprint(f"🏷️ Latest runtime tag {self.found_image_tags_sorted[0]} for {self.found_image_name}","white","on_blue")
43+
self.binder_image_tag = self.found_image_tags_sorted[0]
44+
else:
45+
raise Exception(f"[ERROR] A docker image has not been found for {self.gh_user_repo_name} at {self.binder_image_tag}.")
46+
2447
self.cprint(f"␤[Preflight checks]","light_grey")
2548
self.check_docker_installed()
2649

@@ -58,10 +81,10 @@ def pull_image(self):
5881
"""
5982
if bool(self._auth) or not self.use_public_registry:
6083
self.login_to_registry()
61-
self.logger.info(f"Logging into {self.registry_url_bare}")
84+
self.logger.info(f"Logging into {self.registry_url}")
6285

6386
try:
64-
self.pull_image_name = f'{self.registry_url_bare}/{self.found_image_name}'
87+
self.pull_image_name = f'{self.bh_private_project_name}/{self.found_image_name}'
6588
self.logger.info(f'Pulling image {self.pull_image_name}:{self.binder_image_tag} from {self.registry_url}.')
6689
self.docker_image = self.docker_client.images.pull(self.pull_image_name, tag=self.binder_image_tag)
6790
except:

myst_libre/tools/build_source_manager.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -54,15 +54,17 @@ def git_clone_repo(self,clone_parent_directory):
5454
self.build_dir = os.path.join(self.host_build_source_parent_dir, self.username, self.repo_name, self.gh_repo_commit_hash)
5555

5656
if os.path.exists(self.build_dir):
57-
self.cprint(f'Source {self.build_dir} already exists.', "yellow")
57+
self.cprint(f'Source {self.build_dir} already exists.', "black","on_yellow")
5858
self.repo_object = Repo(self.build_dir)
59+
if os.path.exists(os.path.join(self.build_dir, '_build/html')):
60+
self.cprint(f'⛔️ A build already exists at this commit, terminating...', "white","on_light_red")
61+
raise Exception("A build already exists at this commit")
5962
else:
6063
os.makedirs(os.path.dirname(self.build_dir), exist_ok=True)
6164
self.cprint(f'Cloning into {self.build_dir}', "green")
6265
self.repo_object = Repo.clone_from(f'{self.provider}/{self.gh_user_repo_name}', self.build_dir)
6366

6467
self.set_commit_info()
65-
self.validate_commits()
6668

6769
def git_checkout_commit(self):
6870
"""
@@ -103,20 +105,14 @@ def repo2data_download(self,target_directory):
103105
repo2data.install()
104106

105107
def set_commit_info(self):
106-
if self.binder_image_name:
108+
if self.binder_image_name_override:
107109
self.binder_commit_info['datetime'] = "20 November 2024"
108110
self.binder_commit_info['message'] = "Base runtime from myst-libre"
109111
else:
110112
self.binder_commit_info['datetime'] = self.repo_object.commit(self.binder_image_tag).committed_datetime
111113
self.binder_commit_info['message'] = self.repo_object.commit(self.binder_image_tag).message
112114
self.repo_commit_info['datetime'] = self.repo_object.commit(self.gh_repo_commit_hash).committed_datetime
113115
self.repo_commit_info['message'] = self.repo_object.commit(self.gh_repo_commit_hash).message
114-
self.validate_commits()
115-
116-
def validate_commits(self):
117-
if not self.binder_image_name:
118-
if self.repo_commit_info['datetime'] < self.binder_commit_info['datetime']:
119-
raise ValueError("The repo commit datetime cannot be older than the binder commit datetime.")
120116

121117
def create_latest_symlink(self):
122118
"""

myst_libre/tools/docker_registry_client.py

Lines changed: 80 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
"""
66

77
import re
8+
import datetime
9+
import yaml
10+
811
from .rest_client import RestClient
912
from .authenticator import Authenticator
1013
from .decorators import request_set_decorator
@@ -30,10 +33,11 @@ def __init__(self, **kwargs):
3033
else:
3134
super().__init__()
3235
self.rest_client = RestClient()
33-
34-
self.registry_url_bare = self.registry_url.replace("http://", "").replace("https://", "")
36+
3537
self.found_image_name = None
3638
self.found_image_tags = None
39+
self.found_image_tags_sorted = None
40+
self.myst_yml_dict = None
3741
self.docker_images = []
3842

3943

@@ -60,16 +64,36 @@ def search_img_by_repo_name(self):
6064
bool: True if image found, else False.
6165
6266
Assuming use in rees, TODO refactored for clarity:
63-
binder_image_name
67+
binder_image_name_override
6468
gh_user_repo_name
6569
"""
6670
self.get_image_list()
67-
if self.binder_image_name:
68-
src_name = self.binder_image_name
71+
if self.binder_image_name_override:
72+
src_name = self.binder_image_name_override
6973
else:
70-
src_name = self.gh_user_repo_name
74+
# If not overridden, the (thebe/binder)repo in myst.yml takes precedence
75+
if self.get_myst_yml_as_dict():
76+
src_name = self.myst_yml_dict['project']['thebe']['binder']['repo']
77+
if src_name:
78+
self.cprint(f"🥳 Using project::thebe::binder::repo from myst config to look for 🐳 in {self.registry_url}: {src_name}","light_blue")
79+
else:
80+
# Fallback to the GitHub user/repo name
81+
src_name = self.gh_user_repo_name
82+
self.cprint(f"ℹ️ project::thebe::binder::repo not found in myst.yml, using GitHub {self.gh_user_repo_name} name to look for 🐳 in {self.registry_url}.","light_blue")
83+
self.cprint("‼️ IMPORTANT WARNING: Myst build may succeed, but binder-specific config must be added to myst.yml to attach the built page to a proper runtime.","light_red")
84+
return False
85+
else:
86+
# Well, build will most likely fail, but we can try
87+
self.cprint("⚠️ myst.yml not found, using GitHub user/repo name for 🐳.","light_red")
88+
src_name = self.gh_user_repo_name
89+
7190
user_repo_formatted = src_name.replace('-', '-2d').replace('_', '-5f').replace('/', '-2d')
72-
pattern = f'{self.registry_url_bare}/binder-{user_repo_formatted}.*'
91+
if self.bh_project_name:
92+
pattern = f'{self.bh_project_name}/{self.bh_image_prefix}{user_repo_formatted}.*'
93+
else:
94+
pattern = f'{self.bh_image_prefix}{user_repo_formatted}.*'
95+
96+
print(pattern)
7397
for image in self.docker_images:
7498
if re.match(pattern, image):
7599
self.found_image_name = image
@@ -98,3 +122,52 @@ def list_tags(self):
98122
"""
99123
tags_url = f"{self.registry_url}/v2/{self.found_image_name}/tags/list"
100124
return self.rest_client.get(tags_url)
125+
126+
#@request_set_decorator(success_status_code=200, set_attribute="found_image_tags_sorted", json_key="tags")
127+
def get_tags_sorted_by_date(self):
128+
tags = self.list_tags()
129+
tag_dates = []
130+
for tag in tags:
131+
manifest_url = f"{self.registry_url}/v2/{self.found_image_name}/manifests/{tag}"
132+
headers = {'Accept': 'application/vnd.docker.distribution.manifest.v2+json'}
133+
manifest = self.rest_client.get(manifest_url, headers=headers).json()
134+
135+
config_digest = manifest['config']['digest']
136+
config_url = f"{self.registry_url}/v2/{self.found_image_name}/blobs/{config_digest}"
137+
config = self.rest_client.get(config_url).json()
138+
139+
created = config.get('created', None)
140+
if created:
141+
# Truncate the fractional seconds to 6 digits
142+
created = re.sub(r'(\.\d{6})\d+', r'\1', created)
143+
dt = datetime.datetime.fromisoformat(created.rstrip('Z'))
144+
tag_dates.append((tag, dt))
145+
sorted_tags = sorted(tag_dates, key=lambda x: x[1], reverse=True)
146+
self.found_image_tags_sorted = [tag for tag, _ in sorted_tags]
147+
return bool(tag_dates)
148+
149+
def get_myst_yml_as_dict(self):
150+
"""
151+
Get the myst.yml file as a dictionary.
152+
153+
Returns:
154+
dict: The parsed myst.yml file as a dictionary.
155+
"""
156+
# This is a helper function to get the myst.yml file as a dictionary
157+
# Maybe move it to a helper file, but this whole project needs a refactor anyway.
158+
# I know it makes your eyes bleed, sorry.
159+
user, repo = self.gh_user_repo_name.split('/')
160+
url = f"https://raw.githubusercontent.com/{user}/{repo}/main/myst.yml"
161+
response = self.rest_client.get(url)
162+
163+
if response.status_code == 200:
164+
try:
165+
# Parse the YAML content into a dictionary
166+
self.myst_yml_dict = yaml.safe_load(response.text)
167+
return True
168+
except yaml.YAMLError as e:
169+
self.logger.error(f"Error parsing YAML: {e}")
170+
return False
171+
else:
172+
self.logger.error(f"Failed to fetch myst.yml: {response.status_code}")
173+
return False

myst_libre/tools/jupyter_hub_local_spawner.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,6 @@ def spawn_jupyter_hub(self, jb_build_command=None):
8282
this_entrypoint = f"/bin/sh -c 'jupyter-book build --all --verbose --path-output {self.container_build_source_mount_dir} content 2>&1 | tee -a jupyter_book_build.log'"
8383
else:
8484
this_entrypoint = f'jupyter server --allow-root --ip 0.0.0.0 --log-level=DEBUG --IdentityProvider.token="{self.jh_token}" --ServerApp.port="{self.port}"'
85-
86-
if not self.rees.search_img_by_repo_name():
87-
raise Exception(f"[ERROR] A docker image has not been found for {self.rees.gh_user_repo_name} at {self.rees.binder_image_tag}.")
88-
if self.rees.binder_image_tag not in self.rees.found_image_tags:
89-
raise Exception(f"[ERROR] A docker image exists for {self.rees.gh_user_repo_name}, yet the tag {self.rees.binder_image_tag} is missing.")
9085

9186
# self.rees.found_image_name is assigned if above not fails
9287

@@ -139,8 +134,8 @@ def log_and_print(message, color=None):
139134
log_and_print(f' ├───────── ✸ {self.rees.pull_image_name}', 'light_blue')
140135
log_and_print(f' ├───────── ⎌ {self.rees.binder_image_tag}', 'light_blue')
141136
log_and_print(f" ├───────── ⏲ {self.rees.binder_commit_info['datetime']}: {self.rees.binder_commit_info['message']}".replace('\n', ''), 'light_blue')
142-
if self.rees.binder_image_name:
143-
log_and_print(f' └───────── ℹ Using NeuroLibre base image {self.rees.binder_image_name}', 'yellow')
137+
if self.rees.binder_image_name_override:
138+
log_and_print(f' └───────── ℹ Using NeuroLibre base image {self.rees.binder_image_name_override}', 'yellow')
144139
else:
145140
log_and_print(f' └───────── ℹ This image was built from REES-compliant {self.rees.gh_user_repo_name} repository at the commit above', 'yellow')
146141
except Exception as e:

myst_libre/tools/rest_client.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def __init__(self,dotenvloc = '.'):
2323
self.session = requests.Session()
2424
self.session.auth = HTTPBasicAuth(self._auth['username'], self._auth['password'])
2525

26-
def get(self, url):
26+
def get(self, url,headers=None):
2727
"""
2828
Perform a GET request.
2929
@@ -33,7 +33,7 @@ def get(self, url):
3333
Returns:
3434
Response: HTTP response object.
3535
"""
36-
response = self.session.get(url)
36+
response = self.session.get(url,headers=headers)
3737
return response
3838

3939
def post(self, url, data=None, json=None):

pyproject.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,12 @@ dependencies = [
2525
"PyGithub",
2626
"termcolor",
2727
"mystmd",
28-
"repo2data"
28+
"repo2data",
29+
"pyyaml"
2930
]
3031

3132
[project.urls]
32-
Homepage = "https://github.com/agahkarakuzu/myst_libre"
33+
Homepage = "https://github.com/neurolibre/myst_libre"
3334

3435
[tool.setuptools_scm]
3536
version_scheme = "post-release"

0 commit comments

Comments
 (0)