ultralytics 8.0.198 MLflow fix, tests and Docs page (#5357)
Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
parent
12e3eef844
commit
5b3c4cfc0e
11 changed files with 228 additions and 65 deletions
|
|
@ -1,64 +1,104 @@
|
|||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
"""
|
||||
MLflow Logging for Ultralytics YOLO.
|
||||
|
||||
from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING, colorstr
|
||||
This module enables MLflow logging for Ultralytics YOLO. It logs metrics, parameters, and model artifacts.
|
||||
For setting up, a tracking URI should be specified. The logging can be customized using environment variables.
|
||||
|
||||
Commands:
|
||||
1. To set a project name:
|
||||
`export MLFLOW_EXPERIMENT_NAME=<your_experiment_name>` or use the project=<project> argument
|
||||
|
||||
2. To set a run name:
|
||||
`export MLFLOW_RUN=<your_run_name>` or use the name=<name> argument
|
||||
|
||||
3. To start a local MLflow server:
|
||||
mlflow server --backend-store-uri runs/mlflow
|
||||
It will by default start a local server at http://127.0.0.1:5000.
|
||||
To specify a different URI, set the MLFLOW_TRACKING_URI environment variable.
|
||||
|
||||
4. To kill all running MLflow server instances:
|
||||
ps aux | grep 'mlflow' | grep -v 'grep' | awk '{print $2}' | xargs kill -9
|
||||
"""
|
||||
|
||||
from ultralytics.utils import LOGGER, RUNS_DIR, SETTINGS, TESTS_RUNNING, colorstr
|
||||
|
||||
try:
|
||||
assert not TESTS_RUNNING # do not log pytest
|
||||
import os
|
||||
|
||||
assert not TESTS_RUNNING or 'test_mlflow' in os.environ.get('PYTEST_CURRENT_TEST', '') # do not log pytest
|
||||
assert SETTINGS['mlflow'] is True # verify integration is enabled
|
||||
import mlflow
|
||||
|
||||
assert hasattr(mlflow, '__version__') # verify package is not directory
|
||||
PREFIX = colorstr('MLFlow:')
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
PREFIX = colorstr('MLflow: ')
|
||||
|
||||
except (ImportError, AssertionError):
|
||||
mlflow = None
|
||||
|
||||
|
||||
def on_pretrain_routine_end(trainer):
|
||||
"""Logs training parameters to MLflow."""
|
||||
global mlflow, run, experiment_name
|
||||
"""
|
||||
Log training parameters to MLflow at the end of the pretraining routine.
|
||||
|
||||
if os.environ.get('MLFLOW_TRACKING_URI') is None:
|
||||
mlflow = None
|
||||
This function sets up MLflow logging based on environment variables and trainer arguments. It sets the tracking URI,
|
||||
experiment name, and run name, then starts the MLflow run if not already active. It finally logs the parameters
|
||||
from the trainer.
|
||||
|
||||
if mlflow:
|
||||
mlflow_location = os.environ['MLFLOW_TRACKING_URI'] # "http://192.168.xxx.xxx:5000"
|
||||
LOGGER.debug(f'{PREFIX} tracking uri: {mlflow_location}')
|
||||
mlflow.set_tracking_uri(mlflow_location)
|
||||
experiment_name = os.environ.get('MLFLOW_EXPERIMENT_NAME') or trainer.args.project or '/Shared/YOLOv8'
|
||||
run_name = os.environ.get('MLFLOW_RUN') or trainer.args.name
|
||||
experiment = mlflow.set_experiment(experiment_name) # change since mlflow does this now by default
|
||||
Args:
|
||||
trainer (ultralytics.engine.trainer.BaseTrainer): The training object with arguments and parameters to log.
|
||||
|
||||
mlflow.autolog()
|
||||
prefix = colorstr('MLFlow: ')
|
||||
try:
|
||||
run, active_run = mlflow, mlflow.active_run()
|
||||
if not active_run:
|
||||
active_run = mlflow.start_run(experiment_id=experiment.experiment_id, run_name=run_name)
|
||||
LOGGER.info(f'{prefix}Using run_id({active_run.info.run_id}) at {mlflow_location}')
|
||||
run.log_params(trainer.args)
|
||||
except Exception as err:
|
||||
LOGGER.error(f'{prefix}Failing init - {repr(err)}')
|
||||
LOGGER.warning(f'{prefix}Continuing without Mlflow')
|
||||
Global:
|
||||
mlflow: The imported mlflow module to use for logging.
|
||||
|
||||
Environment Variables:
|
||||
MLFLOW_TRACKING_URI: The URI for MLflow tracking. If not set, defaults to 'runs/mlflow'.
|
||||
MLFLOW_EXPERIMENT_NAME: The name of the MLflow experiment. If not set, defaults to trainer.args.project.
|
||||
MLFLOW_RUN: The name of the MLflow run. If not set, defaults to trainer.args.name.
|
||||
"""
|
||||
global mlflow
|
||||
|
||||
uri = os.environ.get('MLFLOW_TRACKING_URI') or str(RUNS_DIR / 'mlflow')
|
||||
LOGGER.debug(f'{PREFIX} tracking uri: {uri}')
|
||||
mlflow.set_tracking_uri(uri)
|
||||
|
||||
# Set experiment and run names
|
||||
experiment_name = os.environ.get('MLFLOW_EXPERIMENT_NAME') or trainer.args.project or '/Shared/YOLOv8'
|
||||
run_name = os.environ.get('MLFLOW_RUN') or trainer.args.name
|
||||
mlflow.set_experiment(experiment_name)
|
||||
|
||||
mlflow.autolog()
|
||||
try:
|
||||
active_run = mlflow.active_run() or mlflow.start_run(run_name=run_name)
|
||||
LOGGER.info(f'{PREFIX}logging run_id({active_run.info.run_id}) to {uri}')
|
||||
if Path(uri).is_dir():
|
||||
LOGGER.info(f"{PREFIX}view at http://127.0.0.1:5000 with 'mlflow server --backend-store-uri {uri}'")
|
||||
LOGGER.info(f"{PREFIX}disable with 'yolo settings mlflow=False'")
|
||||
mlflow.log_params(dict(trainer.args))
|
||||
except Exception as e:
|
||||
LOGGER.warning(f'{PREFIX}WARNING ⚠️ Failed to initialize: {e}\n'
|
||||
f'{PREFIX}WARNING ⚠️ Not tracking this run')
|
||||
|
||||
|
||||
def on_fit_epoch_end(trainer):
|
||||
"""Logs training metrics to Mlflow."""
|
||||
"""Log training metrics at the end of each fit epoch to MLflow."""
|
||||
if mlflow:
|
||||
metrics_dict = {f"{re.sub('[()]', '', k)}": float(v) for k, v in trainer.metrics.items()}
|
||||
run.log_metrics(metrics=metrics_dict, step=trainer.epoch)
|
||||
sanitized_metrics = {k.replace('(', '').replace(')', ''): float(v) for k, v in trainer.metrics.items()}
|
||||
mlflow.log_metrics(metrics=sanitized_metrics, step=trainer.epoch)
|
||||
|
||||
|
||||
def on_train_end(trainer):
|
||||
"""Called at end of train loop to log model artifact info."""
|
||||
"""Log model artifacts at the end of the training."""
|
||||
if mlflow:
|
||||
run.log_artifact(trainer.last)
|
||||
run.log_artifact(trainer.best)
|
||||
run.log_artifact(trainer.save_dir)
|
||||
mlflow.log_artifact(str(trainer.best.parent)) # log save_dir/weights directory with best.pt and last.pt
|
||||
for f in trainer.save_dir.glob('*'): # log all other files in save_dir
|
||||
if f.suffix in {'.png', '.jpg', '.csv', '.pt', '.yaml'}:
|
||||
mlflow.log_artifact(str(f))
|
||||
|
||||
mlflow.end_run()
|
||||
LOGGER.debug(f'{PREFIX} ending run')
|
||||
LOGGER.info(f'{PREFIX}results logged to {mlflow.get_tracking_uri()}\n'
|
||||
f"{PREFIX}disable with 'yolo settings mlflow=False'")
|
||||
|
||||
|
||||
callbacks = {
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ except (ImportError, AssertionError):
|
|||
wb = None
|
||||
|
||||
|
||||
def _custom_table(x, y, classes, title='Precision Recall Curve', x_axis_title='Recall', y_axis_title='Precision'):
|
||||
def _custom_table(x, y, classes, title='Precision Recall Curve', x_title='Recall', y_title='Precision'):
|
||||
"""
|
||||
Create and log a custom metric visualization to wandb.plot.pr_curve.
|
||||
|
||||
|
|
@ -39,7 +39,7 @@ def _custom_table(x, y, classes, title='Precision Recall Curve', x_axis_title='R
|
|||
"""
|
||||
df = pd.DataFrame({'class': classes, 'y': y, 'x': x}).round(3)
|
||||
fields = {'x': 'x', 'y': 'y', 'class': 'class'}
|
||||
string_fields = {'title': title, 'x-axis-title': x_axis_title, 'y-axis-title': y_axis_title}
|
||||
string_fields = {'title': title, 'x-axis-title': x_title, 'y-axis-title': y_title}
|
||||
return wb.plot_table('wandb/area-under-curve/v0',
|
||||
wb.Table(dataframe=df),
|
||||
fields=fields,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue