ultralytics 8.0.198 MLflow fix, tests and Docs page (#5357)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2023-10-13 20:41:05 +02:00 · 2023-10-13 20:41:05 +02:00 · 5b3c4cfc0e
commit 5b3c4cfc0e
parent 12e3eef844
11 changed files with 228 additions and 65 deletions
--- a/ultralytics/utils/callbacks/mlflow.py
+++ b/ultralytics/utils/callbacks/mlflow.py
@ -1,64 +1,104 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
+"""
+MLflow Logging for Ultralytics YOLO.

-from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING, colorstr
+This module enables MLflow logging for Ultralytics YOLO. It logs metrics, parameters, and model artifacts.
+For setting up, a tracking URI should be specified. The logging can be customized using environment variables.
+
+Commands:
+    1. To set a project name:
+        `export MLFLOW_EXPERIMENT_NAME=<your_experiment_name>` or use the project=<project> argument
+
+    2. To set a run name:
+        `export MLFLOW_RUN=<your_run_name>` or use the name=<name> argument
+
+    3. To start a local MLflow server:
+        mlflow server --backend-store-uri runs/mlflow
+       It will by default start a local server at http://127.0.0.1:5000.
+       To specify a different URI, set the MLFLOW_TRACKING_URI environment variable.
+
+    4. To kill all running MLflow server instances:
+        ps aux | grep 'mlflow' | grep -v 'grep' | awk '{print $2}' | xargs kill -9
+"""
+
+from ultralytics.utils import LOGGER, RUNS_DIR, SETTINGS, TESTS_RUNNING, colorstr

 try:
-    assert not TESTS_RUNNING  # do not log pytest
+    import os
+
+    assert not TESTS_RUNNING or 'test_mlflow' in os.environ.get('PYTEST_CURRENT_TEST', '')  # do not log pytest
    assert SETTINGS['mlflow'] is True  # verify integration is enabled
    import mlflow

    assert hasattr(mlflow, '__version__')  # verify package is not directory
-    PREFIX = colorstr('MLFlow:')
-    import os
-    import re
+    from pathlib import Path
+    PREFIX = colorstr('MLflow: ')

 except (ImportError, AssertionError):
    mlflow = None


 def on_pretrain_routine_end(trainer):
-    """Logs training parameters to MLflow."""
-    global mlflow, run, experiment_name
+    """
+    Log training parameters to MLflow at the end of the pretraining routine.

-    if os.environ.get('MLFLOW_TRACKING_URI') is None:
-        mlflow = None
+    This function sets up MLflow logging based on environment variables and trainer arguments. It sets the tracking URI,
+    experiment name, and run name, then starts the MLflow run if not already active. It finally logs the parameters
+    from the trainer.

-    if mlflow:
-        mlflow_location = os.environ['MLFLOW_TRACKING_URI']  # "http://192.168.xxx.xxx:5000"
-        LOGGER.debug(f'{PREFIX} tracking uri: {mlflow_location}')
-        mlflow.set_tracking_uri(mlflow_location)
-        experiment_name = os.environ.get('MLFLOW_EXPERIMENT_NAME') or trainer.args.project or '/Shared/YOLOv8'
-        run_name = os.environ.get('MLFLOW_RUN') or trainer.args.name
-        experiment = mlflow.set_experiment(experiment_name)  # change since mlflow does this now by default
+    Args:
+        trainer (ultralytics.engine.trainer.BaseTrainer): The training object with arguments and parameters to log.

-        mlflow.autolog()
-        prefix = colorstr('MLFlow: ')
-        try:
-            run, active_run = mlflow, mlflow.active_run()
-            if not active_run:
-                active_run = mlflow.start_run(experiment_id=experiment.experiment_id, run_name=run_name)
-            LOGGER.info(f'{prefix}Using run_id({active_run.info.run_id}) at {mlflow_location}')
-            run.log_params(trainer.args)
-        except Exception as err:
-            LOGGER.error(f'{prefix}Failing init - {repr(err)}')
-            LOGGER.warning(f'{prefix}Continuing without Mlflow')
+    Global:
+        mlflow: The imported mlflow module to use for logging.
+
+    Environment Variables:
+        MLFLOW_TRACKING_URI: The URI for MLflow tracking. If not set, defaults to 'runs/mlflow'.
+        MLFLOW_EXPERIMENT_NAME: The name of the MLflow experiment. If not set, defaults to trainer.args.project.
+        MLFLOW_RUN: The name of the MLflow run. If not set, defaults to trainer.args.name.
+    """
+    global mlflow
+
+    uri = os.environ.get('MLFLOW_TRACKING_URI') or str(RUNS_DIR / 'mlflow')
+    LOGGER.debug(f'{PREFIX} tracking uri: {uri}')
+    mlflow.set_tracking_uri(uri)
+
+    # Set experiment and run names
+    experiment_name = os.environ.get('MLFLOW_EXPERIMENT_NAME') or trainer.args.project or '/Shared/YOLOv8'
+    run_name = os.environ.get('MLFLOW_RUN') or trainer.args.name
+    mlflow.set_experiment(experiment_name)
+
+    mlflow.autolog()
+    try:
+        active_run = mlflow.active_run() or mlflow.start_run(run_name=run_name)
+        LOGGER.info(f'{PREFIX}logging run_id({active_run.info.run_id}) to {uri}')
+        if Path(uri).is_dir():
+            LOGGER.info(f"{PREFIX}view at http://127.0.0.1:5000 with 'mlflow server --backend-store-uri {uri}'")
+        LOGGER.info(f"{PREFIX}disable with 'yolo settings mlflow=False'")
+        mlflow.log_params(dict(trainer.args))
+    except Exception as e:
+        LOGGER.warning(f'{PREFIX}WARNING ⚠️ Failed to initialize: {e}\n'
+                       f'{PREFIX}WARNING ⚠️ Not tracking this run')


 def on_fit_epoch_end(trainer):
-    """Logs training metrics to Mlflow."""
+    """Log training metrics at the end of each fit epoch to MLflow."""
    if mlflow:
-        metrics_dict = {f"{re.sub('[()]', '', k)}": float(v) for k, v in trainer.metrics.items()}
-        run.log_metrics(metrics=metrics_dict, step=trainer.epoch)
+        sanitized_metrics = {k.replace('(', '').replace(')', ''): float(v) for k, v in trainer.metrics.items()}
+        mlflow.log_metrics(metrics=sanitized_metrics, step=trainer.epoch)


 def on_train_end(trainer):
-    """Called at end of train loop to log model artifact info."""
+    """Log model artifacts at the end of the training."""
    if mlflow:
-        run.log_artifact(trainer.last)
-        run.log_artifact(trainer.best)
-        run.log_artifact(trainer.save_dir)
+        mlflow.log_artifact(str(trainer.best.parent))  # log save_dir/weights directory with best.pt and last.pt
+        for f in trainer.save_dir.glob('*'):  # log all other files in save_dir
+            if f.suffix in {'.png', '.jpg', '.csv', '.pt', '.yaml'}:
+                mlflow.log_artifact(str(f))
+
        mlflow.end_run()
-        LOGGER.debug(f'{PREFIX} ending run')
+        LOGGER.info(f'{PREFIX}results logged to {mlflow.get_tracking_uri()}\n'
+                    f"{PREFIX}disable with 'yolo settings mlflow=False'")


 callbacks = {
--- a/ultralytics/utils/callbacks/wb.py
+++ b/ultralytics/utils/callbacks/wb.py
@ -19,7 +19,7 @@ except (ImportError, AssertionError):
    wb = None


-def _custom_table(x, y, classes, title='Precision Recall Curve', x_axis_title='Recall', y_axis_title='Precision'):
+def _custom_table(x, y, classes, title='Precision Recall Curve', x_title='Recall', y_title='Precision'):
    """
    Create and log a custom metric visualization to wandb.plot.pr_curve.

@ -39,7 +39,7 @@ def _custom_table(x, y, classes, title='Precision Recall Curve', x_axis_title='R
    """
    df = pd.DataFrame({'class': classes, 'y': y, 'x': x}).round(3)
    fields = {'x': 'x', 'y': 'y', 'class': 'class'}
-    string_fields = {'title': title, 'x-axis-title': x_axis_title, 'y-axis-title': y_axis_title}
+    string_fields = {'title': title, 'x-axis-title': x_title, 'y-axis-title': y_title}
    return wb.plot_table('wandb/area-under-curve/v0',
                         wb.Table(dataframe=df),
                         fields=fields,