From c636fe0f351b71bd961d4df1d571a38a762e90d1 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 23 Jun 2024 14:52:12 +0200 Subject: [PATCH] `ultralytics 8.2.40` refactor HUB code into callbacks (#13896) Signed-off-by: Glenn Jocher Co-authored-by: UltralyticsAssistant --- .github/workflows/ci.yaml | 2 + docs/en/reference/hub/utils.md | 4 +- docs/en/reference/utils/callbacks/hub.md | 4 ++ ultralytics/__init__.py | 2 +- ultralytics/engine/model.py | 27 +--------- ultralytics/hub/__init__.py | 17 ++++++- ultralytics/hub/session.py | 65 ++++++++++++++---------- ultralytics/utils/callbacks/hub.py | 11 +++- 8 files changed, 75 insertions(+), 57 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index e40ea2df..a7c65661 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -61,6 +61,7 @@ jobs: run: python docs/build_reference.py - name: Commit and Push Reference Section Changes run: | + git pull origin ${{ github.head_ref || github.ref }} git add . git reset HEAD -- .github/workflows/ # workflow changes are not permitted with default token git config --global user.name "UltralyticsAssistant" @@ -77,6 +78,7 @@ jobs: continue-on-error: true if: always() && github.event_name == 'pull_request' run: | + git pull origin ${{ github.head_ref || github.ref }} git add --update # only add updated files git reset HEAD -- .github/workflows/ # workflow changes are not permitted with default token if ! git diff --staged --quiet; then diff --git a/docs/en/reference/hub/utils.md b/docs/en/reference/hub/utils.md index 4b4d8375..d7f8a9d6 100644 --- a/docs/en/reference/hub/utils.md +++ b/docs/en/reference/hub/utils.md @@ -1,6 +1,6 @@ --- -description: Explore the utilities in the Ultralytics Hub. Learn about smart_request, request_with_credentials, and more to enhance your YOLO projects. -keywords: Ultralytics, Hub, Utilities, YOLO, smart_request, request_with_credentials +description: Explore the utilities in the Ultralytics HUB. Learn about smart_request, request_with_credentials, and more to enhance your YOLO projects. +keywords: Ultralytics, HUB, Utilities, YOLO, smart_request, request_with_credentials --- # Reference for `ultralytics/hub/utils.py` diff --git a/docs/en/reference/utils/callbacks/hub.md b/docs/en/reference/utils/callbacks/hub.md index c4620b55..d7e24bf2 100644 --- a/docs/en/reference/utils/callbacks/hub.md +++ b/docs/en/reference/utils/callbacks/hub.md @@ -11,6 +11,10 @@ keywords: Ultralytics, callbacks, pretrain, model save, train start, train end,

+## ::: ultralytics.utils.callbacks.hub.on_pretrain_routine_start + +

+ ## ::: ultralytics.utils.callbacks.hub.on_pretrain_routine_end

diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py index 51e1bf47..5be255eb 100644 --- a/ultralytics/__init__.py +++ b/ultralytics/__init__.py @@ -1,6 +1,6 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -__version__ = "8.2.39" +__version__ = "8.2.40" import os diff --git a/ultralytics/engine/model.py b/ultralytics/engine/model.py index 1ef2e099..6827264b 100644 --- a/ultralytics/engine/model.py +++ b/ultralytics/engine/model.py @@ -9,7 +9,7 @@ import torch from ultralytics.cfg import TASK2DATA, get_cfg, get_save_dir from ultralytics.engine.results import Results -from ultralytics.hub.utils import HUB_WEB_ROOT +from ultralytics.hub import HUB_WEB_ROOT, HUBTrainingSession from ultralytics.nn.tasks import attempt_load_one_weight, guess_model_task, nn, yaml_model_load from ultralytics.utils import ( ARGV, @@ -17,7 +17,6 @@ from ultralytics.utils import ( DEFAULT_CFG_DICT, LOGGER, RANK, - SETTINGS, callbacks, checks, emojis, @@ -76,7 +75,6 @@ class Model(nn.Module): add_callback: Adds a callback function for an event. clear_callback: Clears all callbacks for an event. reset_callbacks: Resets all callbacks to their default functions. - _get_hub_session: Retrieves or creates an Ultralytics HUB session. is_triton_model: Checks if a model is a Triton Server model. is_hub_model: Checks if a model is an Ultralytics HUB model. _reset_ckpt_args: Resets checkpoint arguments when loading a PyTorch model. @@ -136,7 +134,7 @@ class Model(nn.Module): if self.is_hub_model(model): # Fetch model from HUB checks.check_requirements("hub-sdk>=0.0.6") - self.session = self._get_hub_session(model) + self.session = HUBTrainingSession.create_session(model) model = self.session.model_file # Check if Triton Server model @@ -175,14 +173,6 @@ class Model(nn.Module): """ return self.predict(source, stream, **kwargs) - @staticmethod - def _get_hub_session(model: str): - """Creates a session for Hub Training.""" - from ultralytics.hub.session import HUBTrainingSession - - session = HUBTrainingSession(model) - return session if session.client.authenticated else None - @staticmethod def is_triton_model(model: str) -> bool: """Is model a Triton Server URL string, i.e. :////""" @@ -656,19 +646,6 @@ class Model(nn.Module): self.trainer.model = self.trainer.get_model(weights=self.model if self.ckpt else None, cfg=self.model.yaml) self.model = self.trainer.model - if SETTINGS["hub"] is True and not self.session: - # Create a model in HUB - try: - self.session = self._get_hub_session(self.model_name) - if self.session: - self.session.create_model(args) - # Check model was created - if not getattr(self.session.model, "id", None): - self.session = None - except (PermissionError, ModuleNotFoundError): - # Ignore PermissionError and ModuleNotFoundError which indicates hub-sdk not installed - pass - self.trainer.hub_session = self.session # attach optional HUB session self.trainer.train() # Update model and cfg after training diff --git a/ultralytics/hub/__init__.py b/ultralytics/hub/__init__.py index 96dabcd7..daeed889 100644 --- a/ultralytics/hub/__init__.py +++ b/ultralytics/hub/__init__.py @@ -4,9 +4,24 @@ import requests from ultralytics.data.utils import HUBDatasetStats from ultralytics.hub.auth import Auth -from ultralytics.hub.utils import HUB_API_ROOT, HUB_WEB_ROOT, PREFIX +from ultralytics.hub.session import HUBTrainingSession +from ultralytics.hub.utils import HUB_API_ROOT, HUB_WEB_ROOT, PREFIX, events from ultralytics.utils import LOGGER, SETTINGS, checks +__all__ = ( + "PREFIX", + "HUB_WEB_ROOT", + "HUBTrainingSession", + "login", + "logout", + "reset_model", + "export_fmts_hub", + "export_model", + "get_export", + "check_dataset", + "events", +) + def login(api_key: str = None, save=True) -> bool: """ diff --git a/ultralytics/hub/session.py b/ultralytics/hub/session.py index e820ce0a..4de4fec3 100644 --- a/ultralytics/hub/session.py +++ b/ultralytics/hub/session.py @@ -19,16 +19,12 @@ class HUBTrainingSession: HUB training session for Ultralytics HUB YOLO models. Handles model initialization, heartbeats, and checkpointing. Attributes: - agent_id (str): Identifier for the instance communicating with the server. model_id (str): Identifier for the YOLO model being trained. model_url (str): URL for the model in Ultralytics HUB. - api_url (str): API URL for the model in Ultralytics HUB. - auth_header (dict): Authentication header for the Ultralytics HUB API requests. rate_limits (dict): Rate limits for different API calls (in seconds). timers (dict): Timers for rate limiting. metrics_queue (dict): Queue for the model's metrics. model (dict): Model data fetched from Ultralytics HUB. - alive (bool): Indicates if the heartbeat loop is active. """ def __init__(self, identifier): @@ -46,14 +42,12 @@ class HUBTrainingSession: """ from hub_sdk import HUBClient - self.rate_limits = { - "metrics": 3.0, - "ckpt": 900.0, - "heartbeat": 300.0, - } # rate limits (seconds) + self.rate_limits = {"metrics": 3, "ckpt": 900, "heartbeat": 300} # rate limits (seconds) self.metrics_queue = {} # holds metrics for each epoch until upload self.metrics_upload_failed_queue = {} # holds metrics for each epoch if upload failed self.timers = {} # holds timers in ultralytics/utils/callbacks/hub.py + self.model = None + self.model_url = None # Parse input api_key, model_id, self.filename = self._parse_identifier(identifier) @@ -65,10 +59,26 @@ class HUBTrainingSession: # Initialize client self.client = HUBClient(credentials) - if model_id: - self.load_model(model_id) # load existing model - else: - self.model = self.client.model() # load empty model + # Load models if authenticated + if self.client.authenticated: + if model_id: + self.load_model(model_id) # load existing model + else: + self.model = self.client.model() # load empty model + + @classmethod + def create_session(cls, identifier, args=None): + """Class method to create an authenticated HUBTrainingSession or return None.""" + try: + session = cls(identifier) + assert session.client.authenticated, "HUB not authenticated" + if args: + session.create_model(args) + assert session.model.id, "HUB model not loaded correctly" + return session + # PermissionError and ModuleNotFoundError indicate hub-sdk not installed + except (PermissionError, ModuleNotFoundError, AssertionError): + return None def load_model(self, model_id): """Loads an existing model from Ultralytics HUB using the provided model identifier.""" @@ -92,14 +102,12 @@ class HUBTrainingSession: "epochs": model_args.get("epochs", 300), "imageSize": model_args.get("imgsz", 640), "patience": model_args.get("patience", 100), - "device": model_args.get("device", ""), - "cache": model_args.get("cache", "ram"), + "device": str(model_args.get("device", "")), # convert None to string + "cache": str(model_args.get("cache", "ram")), # convert True, False, None to string }, "dataset": {"name": model_args.get("data")}, "lineage": { - "architecture": { - "name": self.filename.replace(".pt", "").replace(".yaml", ""), - }, + "architecture": {"name": self.filename.replace(".pt", "").replace(".yaml", "")}, "parent": {}, }, "meta": {"name": self.filename}, @@ -113,7 +121,7 @@ class HUBTrainingSession: # Model could not be created # TODO: improve error handling if not self.model.id: - return + return None self.model_url = f"{HUB_WEB_ROOT}/models/{self.model.id}" @@ -122,7 +130,8 @@ class HUBTrainingSession: LOGGER.info(f"{PREFIX}View model at {self.model_url} 🚀") - def _parse_identifier(self, identifier): + @staticmethod + def _parse_identifier(identifier): """ Parses the given identifier to determine the type of identifier and extract relevant components. @@ -213,13 +222,14 @@ class HUBTrainingSession: thread=True, verbose=True, progress_total=None, - stream_reponse=None, + stream_response=None, *args, **kwargs, ): def retry_request(): """Attempts to call `request_func` with retries, timeout, and optional threading.""" t0 = time.time() # Record the start time for the timeout + response = None for i in range(retry + 1): if (time.time() - t0) > timeout: LOGGER.warning(f"{PREFIX}Timeout for request reached. {HELP_MSG}") @@ -233,7 +243,7 @@ class HUBTrainingSession: if progress_total: self._show_upload_progress(progress_total, response) - elif stream_reponse: + elif stream_response: self._iterate_content(response) if HTTPStatus.OK <= response.status_code < HTTPStatus.MULTIPLE_CHOICES: @@ -268,7 +278,8 @@ class HUBTrainingSession: # If running in the main thread, call retry_request directly return retry_request() - def _should_retry(self, status_code): + @staticmethod + def _should_retry(status_code): """Determines if a request should be retried based on the HTTP status code.""" retry_codes = { HTTPStatus.REQUEST_TIMEOUT, @@ -338,12 +349,13 @@ class HUBTrainingSession: timeout=3600, thread=not final, progress_total=progress_total, - stream_reponse=True, + stream_response=True, ) else: LOGGER.warning(f"{PREFIX}WARNING ⚠️ Model upload issue. Missing model {weights}.") - def _show_upload_progress(self, content_length: int, response: requests.Response) -> None: + @staticmethod + def _show_upload_progress(content_length: int, response: requests.Response) -> None: """ Display a progress bar to track the upload progress of a file download. @@ -358,7 +370,8 @@ class HUBTrainingSession: for data in response.iter_content(chunk_size=1024): pbar.update(len(data)) - def _iterate_content(self, response: requests.Response) -> None: + @staticmethod + def _iterate_content(response: requests.Response) -> None: """ Process the streamed HTTP response data. diff --git a/ultralytics/utils/callbacks/hub.py b/ultralytics/utils/callbacks/hub.py index 99fe98bd..f312a610 100644 --- a/ultralytics/utils/callbacks/hub.py +++ b/ultralytics/utils/callbacks/hub.py @@ -3,8 +3,14 @@ import json from time import time -from ultralytics.hub.utils import HUB_WEB_ROOT, PREFIX, events -from ultralytics.utils import LOGGER, SETTINGS +from ultralytics.hub import HUB_WEB_ROOT, PREFIX, HUBTrainingSession, events +from ultralytics.utils import LOGGER, RANK, SETTINGS + + +def on_pretrain_routine_start(trainer): + """Create a remote Ultralytics HUB session to log local model training.""" + if RANK in {-1, 0} and SETTINGS["hub"] is True and not getattr(trainer, "hub_session", None): + trainer.hub_session = HUBTrainingSession.create_session(trainer.args.model, trainer.args) def on_pretrain_routine_end(trainer): @@ -91,6 +97,7 @@ def on_export_start(exporter): callbacks = ( { + "on_pretrain_routine_start": on_pretrain_routine_start, "on_pretrain_routine_end": on_pretrain_routine_end, "on_fit_epoch_end": on_fit_epoch_end, "on_model_save": on_model_save,