ultralytics 8.0.239 Ultralytics Actions and hub-sdk adoption (#7431)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com>
Co-authored-by: UltralyticsAssistant <web@ultralytics.com>
Co-authored-by: Burhan <62214284+Burhan-Q@users.noreply.github.com>
Co-authored-by: Kayzwer <68285002+Kayzwer@users.noreply.github.com>
This commit is contained in:
Glenn Jocher 2024-01-10 03:16:08 +01:00 committed by GitHub
parent e795277391
commit fe27db2f6e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
139 changed files with 6870 additions and 5125 deletions

File diff suppressed because it is too large Load diff

View file

@ -53,7 +53,7 @@ class Model(nn.Module):
list(ultralytics.engine.results.Results): The prediction results.
"""
def __init__(self, model: Union[str, Path] = 'yolov8n.pt', task=None) -> None:
def __init__(self, model: Union[str, Path] = "yolov8n.pt", task=None) -> None:
"""
Initializes the YOLO model.
@ -89,7 +89,7 @@ class Model(nn.Module):
# Load or create new YOLO model
model = checks.check_model_file_from_stem(model) # add suffix, i.e. yolov8n -> yolov8n.pt
if Path(model).suffix in ('.yaml', '.yml'):
if Path(model).suffix in (".yaml", ".yml"):
self._new(model, task)
else:
self._load(model, task)
@ -112,16 +112,20 @@ class Model(nn.Module):
def is_triton_model(model):
"""Is model a Triton Server URL string, i.e. <scheme>://<netloc>/<endpoint>/<task_name>"""
from urllib.parse import urlsplit
url = urlsplit(model)
return url.netloc and url.path and url.scheme in {'http', 'grpc'}
return url.netloc and url.path and url.scheme in {"http", "grpc"}
@staticmethod
def is_hub_model(model):
"""Check if the provided model is a HUB model."""
return any((
model.startswith(f'{HUB_WEB_ROOT}/models/'), # i.e. https://hub.ultralytics.com/models/MODEL_ID
[len(x) for x in model.split('_')] == [42, 20], # APIKEY_MODELID
len(model) == 20 and not Path(model).exists() and all(x not in model for x in './\\'))) # MODELID
return any(
(
model.startswith(f"{HUB_WEB_ROOT}/models/"), # i.e. https://hub.ultralytics.com/models/MODEL_ID
[len(x) for x in model.split("_")] == [42, 20], # APIKEY_MODELID
len(model) == 20 and not Path(model).exists() and all(x not in model for x in "./\\"),
)
) # MODELID
def _new(self, cfg: str, task=None, model=None, verbose=True):
"""
@ -136,9 +140,9 @@ class Model(nn.Module):
cfg_dict = yaml_model_load(cfg)
self.cfg = cfg
self.task = task or guess_model_task(cfg_dict)
self.model = (model or self._smart_load('model'))(cfg_dict, verbose=verbose and RANK == -1) # build model
self.overrides['model'] = self.cfg
self.overrides['task'] = self.task
self.model = (model or self._smart_load("model"))(cfg_dict, verbose=verbose and RANK == -1) # build model
self.overrides["model"] = self.cfg
self.overrides["task"] = self.task
# Below added to allow export from YAMLs
self.model.args = {**DEFAULT_CFG_DICT, **self.overrides} # combine default and model args (prefer model args)
@ -153,9 +157,9 @@ class Model(nn.Module):
task (str | None): model task
"""
suffix = Path(weights).suffix
if suffix == '.pt':
if suffix == ".pt":
self.model, self.ckpt = attempt_load_one_weight(weights)
self.task = self.model.args['task']
self.task = self.model.args["task"]
self.overrides = self.model.args = self._reset_ckpt_args(self.model.args)
self.ckpt_path = self.model.pt_path
else:
@ -163,12 +167,12 @@ class Model(nn.Module):
self.model, self.ckpt = weights, None
self.task = task or guess_model_task(weights)
self.ckpt_path = weights
self.overrides['model'] = weights
self.overrides['task'] = self.task
self.overrides["model"] = weights
self.overrides["task"] = self.task
def _check_is_pytorch_model(self):
"""Raises TypeError is model is not a PyTorch model."""
pt_str = isinstance(self.model, (str, Path)) and Path(self.model).suffix == '.pt'
pt_str = isinstance(self.model, (str, Path)) and Path(self.model).suffix == ".pt"
pt_module = isinstance(self.model, nn.Module)
if not (pt_module or pt_str):
raise TypeError(
@ -176,19 +180,20 @@ class Model(nn.Module):
f"PyTorch models can train, val, predict and export, i.e. 'model.train(data=...)', but exported "
f"formats like ONNX, TensorRT etc. only support 'predict' and 'val' modes, "
f"i.e. 'yolo predict model=yolov8n.onnx'.\nTo run CUDA or MPS inference please pass the device "
f"argument directly in your inference command, i.e. 'model.predict(source=..., device=0)'")
f"argument directly in your inference command, i.e. 'model.predict(source=..., device=0)'"
)
def reset_weights(self):
"""Resets the model modules parameters to randomly initialized values, losing all training information."""
self._check_is_pytorch_model()
for m in self.model.modules():
if hasattr(m, 'reset_parameters'):
if hasattr(m, "reset_parameters"):
m.reset_parameters()
for p in self.model.parameters():
p.requires_grad = True
return self
def load(self, weights='yolov8n.pt'):
def load(self, weights="yolov8n.pt"):
"""Transfers parameters with matching names and shapes from 'weights' to model."""
self._check_is_pytorch_model()
if isinstance(weights, (str, Path)):
@ -226,8 +231,8 @@ class Model(nn.Module):
Returns:
(List[torch.Tensor]): A list of image embeddings.
"""
if not kwargs.get('embed'):
kwargs['embed'] = [len(self.model.model) - 2] # embed second-to-last layer if no indices passed
if not kwargs.get("embed"):
kwargs["embed"] = [len(self.model.model) - 2] # embed second-to-last layer if no indices passed
return self.predict(source, stream, **kwargs)
def predict(self, source=None, stream=False, predictor=None, **kwargs):
@ -249,21 +254,22 @@ class Model(nn.Module):
source = ASSETS
LOGGER.warning(f"WARNING ⚠️ 'source' is missing. Using 'source={source}'.")
is_cli = (sys.argv[0].endswith('yolo') or sys.argv[0].endswith('ultralytics')) and any(
x in sys.argv for x in ('predict', 'track', 'mode=predict', 'mode=track'))
is_cli = (sys.argv[0].endswith("yolo") or sys.argv[0].endswith("ultralytics")) and any(
x in sys.argv for x in ("predict", "track", "mode=predict", "mode=track")
)
custom = {'conf': 0.25, 'save': is_cli} # method defaults
args = {**self.overrides, **custom, **kwargs, 'mode': 'predict'} # highest priority args on the right
prompts = args.pop('prompts', None) # for SAM-type models
custom = {"conf": 0.25, "save": is_cli} # method defaults
args = {**self.overrides, **custom, **kwargs, "mode": "predict"} # highest priority args on the right
prompts = args.pop("prompts", None) # for SAM-type models
if not self.predictor:
self.predictor = predictor or self._smart_load('predictor')(overrides=args, _callbacks=self.callbacks)
self.predictor = predictor or self._smart_load("predictor")(overrides=args, _callbacks=self.callbacks)
self.predictor.setup_model(model=self.model, verbose=is_cli)
else: # only update args if predictor is already setup
self.predictor.args = get_cfg(self.predictor.args, args)
if 'project' in args or 'name' in args:
if "project" in args or "name" in args:
self.predictor.save_dir = get_save_dir(self.predictor.args)
if prompts and hasattr(self.predictor, 'set_prompts'): # for SAM-type models
if prompts and hasattr(self.predictor, "set_prompts"): # for SAM-type models
self.predictor.set_prompts(prompts)
return self.predictor.predict_cli(source=source) if is_cli else self.predictor(source=source, stream=stream)
@ -280,11 +286,12 @@ class Model(nn.Module):
Returns:
(List[ultralytics.engine.results.Results]): The tracking results.
"""
if not hasattr(self.predictor, 'trackers'):
if not hasattr(self.predictor, "trackers"):
from ultralytics.trackers import register_tracker
register_tracker(self, persist)
kwargs['conf'] = kwargs.get('conf') or 0.1 # ByteTrack-based method needs low confidence predictions as input
kwargs['mode'] = 'track'
kwargs["conf"] = kwargs.get("conf") or 0.1 # ByteTrack-based method needs low confidence predictions as input
kwargs["mode"] = "track"
return self.predict(source=source, stream=stream, **kwargs)
def val(self, validator=None, **kwargs):
@ -295,10 +302,10 @@ class Model(nn.Module):
validator (BaseValidator): Customized validator.
**kwargs : Any other args accepted by the validators. To see all args check 'configuration' section in docs
"""
custom = {'rect': True} # method defaults
args = {**self.overrides, **custom, **kwargs, 'mode': 'val'} # highest priority args on the right
custom = {"rect": True} # method defaults
args = {**self.overrides, **custom, **kwargs, "mode": "val"} # highest priority args on the right
validator = (validator or self._smart_load('validator'))(args=args, _callbacks=self.callbacks)
validator = (validator or self._smart_load("validator"))(args=args, _callbacks=self.callbacks)
validator(model=self.model)
self.metrics = validator.metrics
return validator.metrics
@ -313,16 +320,17 @@ class Model(nn.Module):
self._check_is_pytorch_model()
from ultralytics.utils.benchmarks import benchmark
custom = {'verbose': False} # method defaults
args = {**DEFAULT_CFG_DICT, **self.model.args, **custom, **kwargs, 'mode': 'benchmark'}
custom = {"verbose": False} # method defaults
args = {**DEFAULT_CFG_DICT, **self.model.args, **custom, **kwargs, "mode": "benchmark"}
return benchmark(
model=self,
data=kwargs.get('data'), # if no 'data' argument passed set data=None for default datasets
imgsz=args['imgsz'],
half=args['half'],
int8=args['int8'],
device=args['device'],
verbose=kwargs.get('verbose'))
data=kwargs.get("data"), # if no 'data' argument passed set data=None for default datasets
imgsz=args["imgsz"],
half=args["half"],
int8=args["int8"],
device=args["device"],
verbose=kwargs.get("verbose"),
)
def export(self, **kwargs):
"""
@ -334,8 +342,8 @@ class Model(nn.Module):
self._check_is_pytorch_model()
from .exporter import Exporter
custom = {'imgsz': self.model.args['imgsz'], 'batch': 1, 'data': None, 'verbose': False} # method defaults
args = {**self.overrides, **custom, **kwargs, 'mode': 'export'} # highest priority args on the right
custom = {"imgsz": self.model.args["imgsz"], "batch": 1, "data": None, "verbose": False} # method defaults
args = {**self.overrides, **custom, **kwargs, "mode": "export"} # highest priority args on the right
return Exporter(overrides=args, _callbacks=self.callbacks)(model=self.model)
def train(self, trainer=None, **kwargs):
@ -347,32 +355,32 @@ class Model(nn.Module):
**kwargs (Any): Any number of arguments representing the training configuration.
"""
self._check_is_pytorch_model()
if hasattr(self.session, 'model') and self.session.model.id: # Ultralytics HUB session with loaded model
if hasattr(self.session, "model") and self.session.model.id: # Ultralytics HUB session with loaded model
if any(kwargs):
LOGGER.warning('WARNING ⚠️ using HUB training arguments, ignoring local training arguments.')
LOGGER.warning("WARNING ⚠️ using HUB training arguments, ignoring local training arguments.")
kwargs = self.session.train_args # overwrite kwargs
checks.check_pip_update_available()
overrides = yaml_load(checks.check_yaml(kwargs['cfg'])) if kwargs.get('cfg') else self.overrides
custom = {'data': DEFAULT_CFG_DICT['data'] or TASK2DATA[self.task]} # method defaults
args = {**overrides, **custom, **kwargs, 'mode': 'train'} # highest priority args on the right
if args.get('resume'):
args['resume'] = self.ckpt_path
overrides = yaml_load(checks.check_yaml(kwargs["cfg"])) if kwargs.get("cfg") else self.overrides
custom = {"data": DEFAULT_CFG_DICT["data"] or TASK2DATA[self.task]} # method defaults
args = {**overrides, **custom, **kwargs, "mode": "train"} # highest priority args on the right
if args.get("resume"):
args["resume"] = self.ckpt_path
self.trainer = (trainer or self._smart_load('trainer'))(overrides=args, _callbacks=self.callbacks)
if not args.get('resume'): # manually set model only if not resuming
self.trainer = (trainer or self._smart_load("trainer"))(overrides=args, _callbacks=self.callbacks)
if not args.get("resume"): # manually set model only if not resuming
self.trainer.model = self.trainer.get_model(weights=self.model if self.ckpt else None, cfg=self.model.yaml)
self.model = self.trainer.model
if SETTINGS['hub'] is True and not self.session:
if SETTINGS["hub"] is True and not self.session:
# Create a model in HUB
try:
self.session = self._get_hub_session(self.model_name)
if self.session:
self.session.create_model(args)
# Check model was created
if not getattr(self.session.model, 'id', None):
if not getattr(self.session.model, "id", None):
self.session = None
except PermissionError:
# Ignore permission error
@ -385,7 +393,7 @@ class Model(nn.Module):
ckpt = self.trainer.best if self.trainer.best.exists() else self.trainer.last
self.model, _ = attempt_load_one_weight(ckpt)
self.overrides = self.model.args
self.metrics = getattr(self.trainer.validator, 'metrics', None) # TODO: no metrics returned by DDP
self.metrics = getattr(self.trainer.validator, "metrics", None) # TODO: no metrics returned by DDP
return self.metrics
def tune(self, use_ray=False, iterations=10, *args, **kwargs):
@ -398,12 +406,13 @@ class Model(nn.Module):
self._check_is_pytorch_model()
if use_ray:
from ultralytics.utils.tuner import run_ray_tune
return run_ray_tune(self, max_samples=iterations, *args, **kwargs)
else:
from .tuner import Tuner
custom = {} # method defaults
args = {**self.overrides, **custom, **kwargs, 'mode': 'train'} # highest priority args on the right
args = {**self.overrides, **custom, **kwargs, "mode": "train"} # highest priority args on the right
return Tuner(args=args, _callbacks=self.callbacks)(model=self, iterations=iterations)
def _apply(self, fn):
@ -411,13 +420,13 @@ class Model(nn.Module):
self._check_is_pytorch_model()
self = super()._apply(fn) # noqa
self.predictor = None # reset predictor as device may have changed
self.overrides['device'] = self.device # was str(self.device) i.e. device(type='cuda', index=0) -> 'cuda:0'
self.overrides["device"] = self.device # was str(self.device) i.e. device(type='cuda', index=0) -> 'cuda:0'
return self
@property
def names(self):
"""Returns class names of the loaded model."""
return self.model.names if hasattr(self.model, 'names') else None
return self.model.names if hasattr(self.model, "names") else None
@property
def device(self):
@ -427,7 +436,7 @@ class Model(nn.Module):
@property
def transforms(self):
"""Returns transform of the loaded model."""
return self.model.transforms if hasattr(self.model, 'transforms') else None
return self.model.transforms if hasattr(self.model, "transforms") else None
def add_callback(self, event: str, func):
"""Add a callback."""
@ -445,7 +454,7 @@ class Model(nn.Module):
@staticmethod
def _reset_ckpt_args(args):
"""Reset arguments when loading a PyTorch model."""
include = {'imgsz', 'data', 'task', 'single_cls'} # only remember these arguments when loading a PyTorch model
include = {"imgsz", "data", "task", "single_cls"} # only remember these arguments when loading a PyTorch model
return {k: v for k, v in args.items() if k in include}
# def __getattr__(self, attr):
@ -461,7 +470,8 @@ class Model(nn.Module):
name = self.__class__.__name__
mode = inspect.stack()[1][3] # get the function name.
raise NotImplementedError(
emojis(f"WARNING ⚠️ '{name}' model does not support '{mode}' mode for '{self.task}' task yet.")) from e
emojis(f"WARNING ⚠️ '{name}' model does not support '{mode}' mode for '{self.task}' task yet.")
) from e
@property
def task_map(self):
@ -471,4 +481,4 @@ class Model(nn.Module):
Returns:
task_map (dict): The map of model task to mode classes.
"""
raise NotImplementedError('Please provide task map for your model!')
raise NotImplementedError("Please provide task map for your model!")

View file

@ -132,8 +132,11 @@ class BasePredictor:
def inference(self, im, *args, **kwargs):
"""Runs inference on a given image using the specified model and arguments."""
visualize = increment_path(self.save_dir / Path(self.batch[0][0]).stem,
mkdir=True) if self.args.visualize and (not self.source_type.tensor) else False
visualize = (
increment_path(self.save_dir / Path(self.batch[0][0]).stem, mkdir=True)
if self.args.visualize and (not self.source_type.tensor)
else False
)
return self.model(im, augment=self.args.augment, visualize=visualize, embed=self.args.embed, *args, **kwargs)
def pre_transform(self, im):
@ -153,35 +156,38 @@ class BasePredictor:
def write_results(self, idx, results, batch):
"""Write inference results to a file or directory."""
p, im, _ = batch
log_string = ''
log_string = ""
if len(im.shape) == 3:
im = im[None] # expand for batch dim
if self.source_type.webcam or self.source_type.from_img or self.source_type.tensor: # batch_size >= 1
log_string += f'{idx}: '
log_string += f"{idx}: "
frame = self.dataset.count
else:
frame = getattr(self.dataset, 'frame', 0)
frame = getattr(self.dataset, "frame", 0)
self.data_path = p
self.txt_path = str(self.save_dir / 'labels' / p.stem) + ('' if self.dataset.mode == 'image' else f'_{frame}')
log_string += '%gx%g ' % im.shape[2:] # print string
self.txt_path = str(self.save_dir / "labels" / p.stem) + ("" if self.dataset.mode == "image" else f"_{frame}")
log_string += "%gx%g " % im.shape[2:] # print string
result = results[idx]
log_string += result.verbose()
if self.args.save or self.args.show: # Add bbox to image
plot_args = {
'line_width': self.args.line_width,
'boxes': self.args.show_boxes,
'conf': self.args.show_conf,
'labels': self.args.show_labels}
"line_width": self.args.line_width,
"boxes": self.args.show_boxes,
"conf": self.args.show_conf,
"labels": self.args.show_labels,
}
if not self.args.retina_masks:
plot_args['im_gpu'] = im[idx]
plot_args["im_gpu"] = im[idx]
self.plotted_img = result.plot(**plot_args)
# Write
if self.args.save_txt:
result.save_txt(f'{self.txt_path}.txt', save_conf=self.args.save_conf)
result.save_txt(f"{self.txt_path}.txt", save_conf=self.args.save_conf)
if self.args.save_crop:
result.save_crop(save_dir=self.save_dir / 'crops',
file_name=self.data_path.stem + ('' if self.dataset.mode == 'image' else f'_{frame}'))
result.save_crop(
save_dir=self.save_dir / "crops",
file_name=self.data_path.stem + ("" if self.dataset.mode == "image" else f"_{frame}"),
)
return log_string
@ -210,17 +216,24 @@ class BasePredictor:
def setup_source(self, source):
"""Sets up source and inference mode."""
self.imgsz = check_imgsz(self.args.imgsz, stride=self.model.stride, min_dim=2) # check image size
self.transforms = getattr(
self.model.model, 'transforms', classify_transforms(
self.imgsz[0], crop_fraction=self.args.crop_fraction)) if self.args.task == 'classify' else None
self.dataset = load_inference_source(source=source,
imgsz=self.imgsz,
vid_stride=self.args.vid_stride,
buffer=self.args.stream_buffer)
self.transforms = (
getattr(
self.model.model,
"transforms",
classify_transforms(self.imgsz[0], crop_fraction=self.args.crop_fraction),
)
if self.args.task == "classify"
else None
)
self.dataset = load_inference_source(
source=source, imgsz=self.imgsz, vid_stride=self.args.vid_stride, buffer=self.args.stream_buffer
)
self.source_type = self.dataset.source_type
if not getattr(self, 'stream', True) and (self.dataset.mode == 'stream' or # streams
len(self.dataset) > 1000 or # images
any(getattr(self.dataset, 'video_flag', [False]))): # videos
if not getattr(self, "stream", True) and (
self.dataset.mode == "stream" # streams
or len(self.dataset) > 1000 # images
or any(getattr(self.dataset, "video_flag", [False]))
): # videos
LOGGER.warning(STREAM_WARNING)
self.vid_path = [None] * self.dataset.bs
self.vid_writer = [None] * self.dataset.bs
@ -230,7 +243,7 @@ class BasePredictor:
def stream_inference(self, source=None, model=None, *args, **kwargs):
"""Streams real-time inference on camera feed and saves results to file."""
if self.args.verbose:
LOGGER.info('')
LOGGER.info("")
# Setup model
if not self.model:
@ -242,7 +255,7 @@ class BasePredictor:
# Check if save_dir/ label file exists
if self.args.save or self.args.save_txt:
(self.save_dir / 'labels' if self.args.save_txt else self.save_dir).mkdir(parents=True, exist_ok=True)
(self.save_dir / "labels" if self.args.save_txt else self.save_dir).mkdir(parents=True, exist_ok=True)
# Warmup model
if not self.done_warmup:
@ -250,10 +263,10 @@ class BasePredictor:
self.done_warmup = True
self.seen, self.windows, self.batch, profilers = 0, [], None, (ops.Profile(), ops.Profile(), ops.Profile())
self.run_callbacks('on_predict_start')
self.run_callbacks("on_predict_start")
for batch in self.dataset:
self.run_callbacks('on_predict_batch_start')
self.run_callbacks("on_predict_batch_start")
self.batch = batch
path, im0s, vid_cap, s = batch
@ -272,15 +285,16 @@ class BasePredictor:
with profilers[2]:
self.results = self.postprocess(preds, im, im0s)
self.run_callbacks('on_predict_postprocess_end')
self.run_callbacks("on_predict_postprocess_end")
# Visualize, save, write results
n = len(im0s)
for i in range(n):
self.seen += 1
self.results[i].speed = {
'preprocess': profilers[0].dt * 1E3 / n,
'inference': profilers[1].dt * 1E3 / n,
'postprocess': profilers[2].dt * 1E3 / n}
"preprocess": profilers[0].dt * 1e3 / n,
"inference": profilers[1].dt * 1e3 / n,
"postprocess": profilers[2].dt * 1e3 / n,
}
p, im0 = path[i], None if self.source_type.tensor else im0s[i].copy()
p = Path(p)
@ -293,12 +307,12 @@ class BasePredictor:
if self.args.save and self.plotted_img is not None:
self.save_preds(vid_cap, i, str(self.save_dir / p.name))
self.run_callbacks('on_predict_batch_end')
self.run_callbacks("on_predict_batch_end")
yield from self.results
# Print time (inference-only)
if self.args.verbose:
LOGGER.info(f'{s}{profilers[1].dt * 1E3:.1f}ms')
LOGGER.info(f"{s}{profilers[1].dt * 1E3:.1f}ms")
# Release assets
if isinstance(self.vid_writer[-1], cv2.VideoWriter):
@ -306,25 +320,29 @@ class BasePredictor:
# Print results
if self.args.verbose and self.seen:
t = tuple(x.t / self.seen * 1E3 for x in profilers) # speeds per image
LOGGER.info(f'Speed: %.1fms preprocess, %.1fms inference, %.1fms postprocess per image at shape '
f'{(1, 3, *im.shape[2:])}' % t)
t = tuple(x.t / self.seen * 1e3 for x in profilers) # speeds per image
LOGGER.info(
f"Speed: %.1fms preprocess, %.1fms inference, %.1fms postprocess per image at shape "
f"{(1, 3, *im.shape[2:])}" % t
)
if self.args.save or self.args.save_txt or self.args.save_crop:
nl = len(list(self.save_dir.glob('labels/*.txt'))) # number of labels
s = f"\n{nl} label{'s' * (nl > 1)} saved to {self.save_dir / 'labels'}" if self.args.save_txt else ''
nl = len(list(self.save_dir.glob("labels/*.txt"))) # number of labels
s = f"\n{nl} label{'s' * (nl > 1)} saved to {self.save_dir / 'labels'}" if self.args.save_txt else ""
LOGGER.info(f"Results saved to {colorstr('bold', self.save_dir)}{s}")
self.run_callbacks('on_predict_end')
self.run_callbacks("on_predict_end")
def setup_model(self, model, verbose=True):
"""Initialize YOLO model with given parameters and set it to evaluation mode."""
self.model = AutoBackend(model or self.args.model,
device=select_device(self.args.device, verbose=verbose),
dnn=self.args.dnn,
data=self.args.data,
fp16=self.args.half,
fuse=True,
verbose=verbose)
self.model = AutoBackend(
model or self.args.model,
device=select_device(self.args.device, verbose=verbose),
dnn=self.args.dnn,
data=self.args.data,
fp16=self.args.half,
fuse=True,
verbose=verbose,
)
self.device = self.model.device # update device
self.args.half = self.model.fp16 # update half
@ -333,18 +351,18 @@ class BasePredictor:
def show(self, p):
"""Display an image in a window using OpenCV imshow()."""
im0 = self.plotted_img
if platform.system() == 'Linux' and p not in self.windows:
if platform.system() == "Linux" and p not in self.windows:
self.windows.append(p)
cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux)
cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
cv2.imshow(str(p), im0)
cv2.waitKey(500 if self.batch[3].startswith('image') else 1) # 1 millisecond
cv2.waitKey(500 if self.batch[3].startswith("image") else 1) # 1 millisecond
def save_preds(self, vid_cap, idx, save_path):
"""Save video predictions as mp4 at specified path."""
im0 = self.plotted_img
# Save imgs
if self.dataset.mode == 'image':
if self.dataset.mode == "image":
cv2.imwrite(save_path, im0)
else: # 'video' or 'stream'
frames_path = f'{save_path.split(".", 1)[0]}_frames/'
@ -361,15 +379,16 @@ class BasePredictor:
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
else: # stream
fps, w, h = 30, im0.shape[1], im0.shape[0]
suffix, fourcc = ('.mp4', 'avc1') if MACOS else ('.avi', 'WMV2') if WINDOWS else ('.avi', 'MJPG')
self.vid_writer[idx] = cv2.VideoWriter(str(Path(save_path).with_suffix(suffix)),
cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
suffix, fourcc = (".mp4", "avc1") if MACOS else (".avi", "WMV2") if WINDOWS else (".avi", "MJPG")
self.vid_writer[idx] = cv2.VideoWriter(
str(Path(save_path).with_suffix(suffix)), cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)
)
# Write video
self.vid_writer[idx].write(im0)
# Write frame
if self.args.save_frames:
cv2.imwrite(f'{frames_path}{self.vid_frame[idx]}.jpg', im0)
cv2.imwrite(f"{frames_path}{self.vid_frame[idx]}.jpg", im0)
self.vid_frame[idx] += 1
def run_callbacks(self, event: str):

View file

@ -98,15 +98,15 @@ class Results(SimpleClass):
self.probs = Probs(probs) if probs is not None else None
self.keypoints = Keypoints(keypoints, self.orig_shape) if keypoints is not None else None
self.obb = OBB(obb, self.orig_shape) if obb is not None else None
self.speed = {'preprocess': None, 'inference': None, 'postprocess': None} # milliseconds per image
self.speed = {"preprocess": None, "inference": None, "postprocess": None} # milliseconds per image
self.names = names
self.path = path
self.save_dir = None
self._keys = 'boxes', 'masks', 'probs', 'keypoints', 'obb'
self._keys = "boxes", "masks", "probs", "keypoints", "obb"
def __getitem__(self, idx):
"""Return a Results object for the specified index."""
return self._apply('__getitem__', idx)
return self._apply("__getitem__", idx)
def __len__(self):
"""Return the number of detections in the Results object."""
@ -146,19 +146,19 @@ class Results(SimpleClass):
def cpu(self):
"""Return a copy of the Results object with all tensors on CPU memory."""
return self._apply('cpu')
return self._apply("cpu")
def numpy(self):
"""Return a copy of the Results object with all tensors as numpy arrays."""
return self._apply('numpy')
return self._apply("numpy")
def cuda(self):
"""Return a copy of the Results object with all tensors on GPU memory."""
return self._apply('cuda')
return self._apply("cuda")
def to(self, *args, **kwargs):
"""Return a copy of the Results object with tensors on the specified device and dtype."""
return self._apply('to', *args, **kwargs)
return self._apply("to", *args, **kwargs)
def new(self):
"""Return a new Results object with the same image, path, and names."""
@ -169,7 +169,7 @@ class Results(SimpleClass):
conf=True,
line_width=None,
font_size=None,
font='Arial.ttf',
font="Arial.ttf",
pil=False,
img=None,
im_gpu=None,
@ -229,14 +229,20 @@ class Results(SimpleClass):
font_size,
font,
pil or (pred_probs is not None and show_probs), # Classify tasks default to pil=True
example=names)
example=names,
)
# Plot Segment results
if pred_masks and show_masks:
if im_gpu is None:
img = LetterBox(pred_masks.shape[1:])(image=annotator.result())
im_gpu = torch.as_tensor(img, dtype=torch.float16, device=pred_masks.data.device).permute(
2, 0, 1).flip(0).contiguous() / 255
im_gpu = (
torch.as_tensor(img, dtype=torch.float16, device=pred_masks.data.device)
.permute(2, 0, 1)
.flip(0)
.contiguous()
/ 255
)
idx = pred_boxes.cls if pred_boxes else range(len(pred_masks))
annotator.masks(pred_masks.data, colors=[colors(x, True) for x in idx], im_gpu=im_gpu)
@ -244,14 +250,14 @@ class Results(SimpleClass):
if pred_boxes is not None and show_boxes:
for d in reversed(pred_boxes):
c, conf, id = int(d.cls), float(d.conf) if conf else None, None if d.id is None else int(d.id.item())
name = ('' if id is None else f'id:{id} ') + names[c]
label = (f'{name} {conf:.2f}' if conf else name) if labels else None
name = ("" if id is None else f"id:{id} ") + names[c]
label = (f"{name} {conf:.2f}" if conf else name) if labels else None
box = d.xyxyxyxy.reshape(-1, 4, 2).squeeze() if is_obb else d.xyxy.squeeze()
annotator.box_label(box, label, color=colors(c, True), rotated=is_obb)
# Plot Classify results
if pred_probs is not None and show_probs:
text = ',\n'.join(f'{names[j] if names else j} {pred_probs.data[j]:.2f}' for j in pred_probs.top5)
text = ",\n".join(f"{names[j] if names else j} {pred_probs.data[j]:.2f}" for j in pred_probs.top5)
x = round(self.orig_shape[0] * 0.03)
annotator.text([x, x], text, txt_color=(255, 255, 255)) # TODO: allow setting colors
@ -264,11 +270,11 @@ class Results(SimpleClass):
def verbose(self):
"""Return log string for each task."""
log_string = ''
log_string = ""
probs = self.probs
boxes = self.boxes
if len(self) == 0:
return log_string if probs is not None else f'{log_string}(no detections), '
return log_string if probs is not None else f"{log_string}(no detections), "
if probs is not None:
log_string += f"{', '.join(f'{self.names[j]} {probs.data[j]:.2f}' for j in probs.top5)}, "
if boxes:
@ -293,7 +299,7 @@ class Results(SimpleClass):
texts = []
if probs is not None:
# Classify
[texts.append(f'{probs.data[j]:.2f} {self.names[j]}') for j in probs.top5]
[texts.append(f"{probs.data[j]:.2f} {self.names[j]}") for j in probs.top5]
elif boxes:
# Detect/segment/pose
for j, d in enumerate(boxes):
@ -304,16 +310,16 @@ class Results(SimpleClass):
line = (c, *seg)
if kpts is not None:
kpt = torch.cat((kpts[j].xyn, kpts[j].conf[..., None]), 2) if kpts[j].has_visible else kpts[j].xyn
line += (*kpt.reshape(-1).tolist(), )
line += (conf, ) * save_conf + (() if id is None else (id, ))
texts.append(('%g ' * len(line)).rstrip() % line)
line += (*kpt.reshape(-1).tolist(),)
line += (conf,) * save_conf + (() if id is None else (id,))
texts.append(("%g " * len(line)).rstrip() % line)
if texts:
Path(txt_file).parent.mkdir(parents=True, exist_ok=True) # make directory
with open(txt_file, 'a') as f:
f.writelines(text + '\n' for text in texts)
with open(txt_file, "a") as f:
f.writelines(text + "\n" for text in texts)
def save_crop(self, save_dir, file_name=Path('im.jpg')):
def save_crop(self, save_dir, file_name=Path("im.jpg")):
"""
Save cropped predictions to `save_dir/cls/file_name.jpg`.
@ -322,21 +328,23 @@ class Results(SimpleClass):
file_name (str | pathlib.Path): File name.
"""
if self.probs is not None:
LOGGER.warning('WARNING ⚠️ Classify task do not support `save_crop`.')
LOGGER.warning("WARNING ⚠️ Classify task do not support `save_crop`.")
return
if self.obb is not None:
LOGGER.warning('WARNING ⚠️ OBB task do not support `save_crop`.')
LOGGER.warning("WARNING ⚠️ OBB task do not support `save_crop`.")
return
for d in self.boxes:
save_one_box(d.xyxy,
self.orig_img.copy(),
file=Path(save_dir) / self.names[int(d.cls)] / f'{Path(file_name)}.jpg',
BGR=True)
save_one_box(
d.xyxy,
self.orig_img.copy(),
file=Path(save_dir) / self.names[int(d.cls)] / f"{Path(file_name)}.jpg",
BGR=True,
)
def tojson(self, normalize=False):
"""Convert the object to JSON format."""
if self.probs is not None:
LOGGER.warning('Warning: Classify task do not support `tojson` yet.')
LOGGER.warning("Warning: Classify task do not support `tojson` yet.")
return
import json
@ -346,19 +354,19 @@ class Results(SimpleClass):
data = self.boxes.data.cpu().tolist()
h, w = self.orig_shape if normalize else (1, 1)
for i, row in enumerate(data): # xyxy, track_id if tracking, conf, class_id
box = {'x1': row[0] / w, 'y1': row[1] / h, 'x2': row[2] / w, 'y2': row[3] / h}
box = {"x1": row[0] / w, "y1": row[1] / h, "x2": row[2] / w, "y2": row[3] / h}
conf = row[-2]
class_id = int(row[-1])
name = self.names[class_id]
result = {'name': name, 'class': class_id, 'confidence': conf, 'box': box}
result = {"name": name, "class": class_id, "confidence": conf, "box": box}
if self.boxes.is_track:
result['track_id'] = int(row[-3]) # track ID
result["track_id"] = int(row[-3]) # track ID
if self.masks:
x, y = self.masks.xy[i][:, 0], self.masks.xy[i][:, 1] # numpy array
result['segments'] = {'x': (x / w).tolist(), 'y': (y / h).tolist()}
result["segments"] = {"x": (x / w).tolist(), "y": (y / h).tolist()}
if self.keypoints is not None:
x, y, visible = self.keypoints[i].data[0].cpu().unbind(dim=1) # torch Tensor
result['keypoints'] = {'x': (x / w).tolist(), 'y': (y / h).tolist(), 'visible': visible.tolist()}
result["keypoints"] = {"x": (x / w).tolist(), "y": (y / h).tolist(), "visible": visible.tolist()}
results.append(result)
# Convert detections to JSON
@ -397,7 +405,7 @@ class Boxes(BaseTensor):
if boxes.ndim == 1:
boxes = boxes[None, :]
n = boxes.shape[-1]
assert n in (6, 7), f'expected 6 or 7 values but got {n}' # xyxy, track_id, conf, cls
assert n in (6, 7), f"expected 6 or 7 values but got {n}" # xyxy, track_id, conf, cls
super().__init__(boxes, orig_shape)
self.is_track = n == 7
self.orig_shape = orig_shape
@ -474,7 +482,8 @@ class Masks(BaseTensor):
"""Return normalized segments."""
return [
ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=True)
for x in ops.masks2segments(self.data)]
for x in ops.masks2segments(self.data)
]
@property
@lru_cache(maxsize=1)
@ -482,7 +491,8 @@ class Masks(BaseTensor):
"""Return segments in pixel coordinates."""
return [
ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=False)
for x in ops.masks2segments(self.data)]
for x in ops.masks2segments(self.data)
]
class Keypoints(BaseTensor):
@ -610,7 +620,7 @@ class OBB(BaseTensor):
if boxes.ndim == 1:
boxes = boxes[None, :]
n = boxes.shape[-1]
assert n in (7, 8), f'expected 7 or 8 values but got {n}' # xywh, rotation, track_id, conf, cls
assert n in (7, 8), f"expected 7 or 8 values but got {n}" # xywh, rotation, track_id, conf, cls
super().__init__(boxes, orig_shape)
self.is_track = n == 8
self.orig_shape = orig_shape

View file

@ -23,14 +23,31 @@ from torch import nn, optim
from ultralytics.cfg import get_cfg, get_save_dir
from ultralytics.data.utils import check_cls_dataset, check_det_dataset
from ultralytics.nn.tasks import attempt_load_one_weight, attempt_load_weights
from ultralytics.utils import (DEFAULT_CFG, LOGGER, RANK, TQDM, __version__, callbacks, clean_url, colorstr, emojis,
yaml_save)
from ultralytics.utils import (
DEFAULT_CFG,
LOGGER,
RANK,
TQDM,
__version__,
callbacks,
clean_url,
colorstr,
emojis,
yaml_save,
)
from ultralytics.utils.autobatch import check_train_batch_size
from ultralytics.utils.checks import check_amp, check_file, check_imgsz, check_model_file_from_stem, print_args
from ultralytics.utils.dist import ddp_cleanup, generate_ddp_command
from ultralytics.utils.files import get_latest_run
from ultralytics.utils.torch_utils import (EarlyStopping, ModelEMA, de_parallel, init_seeds, one_cycle, select_device,
strip_optimizer)
from ultralytics.utils.torch_utils import (
EarlyStopping,
ModelEMA,
de_parallel,
init_seeds,
one_cycle,
select_device,
strip_optimizer,
)
class BaseTrainer:
@ -89,12 +106,12 @@ class BaseTrainer:
# Dirs
self.save_dir = get_save_dir(self.args)
self.args.name = self.save_dir.name # update name for loggers
self.wdir = self.save_dir / 'weights' # weights dir
self.wdir = self.save_dir / "weights" # weights dir
if RANK in (-1, 0):
self.wdir.mkdir(parents=True, exist_ok=True) # make dir
self.args.save_dir = str(self.save_dir)
yaml_save(self.save_dir / 'args.yaml', vars(self.args)) # save run args
self.last, self.best = self.wdir / 'last.pt', self.wdir / 'best.pt' # checkpoint paths
yaml_save(self.save_dir / "args.yaml", vars(self.args)) # save run args
self.last, self.best = self.wdir / "last.pt", self.wdir / "best.pt" # checkpoint paths
self.save_period = self.args.save_period
self.batch_size = self.args.batch
@ -104,18 +121,18 @@ class BaseTrainer:
print_args(vars(self.args))
# Device
if self.device.type in ('cpu', 'mps'):
if self.device.type in ("cpu", "mps"):
self.args.workers = 0 # faster CPU training as time dominated by inference, not dataloading
# Model and Dataset
self.model = check_model_file_from_stem(self.args.model) # add suffix, i.e. yolov8n -> yolov8n.pt
try:
if self.args.task == 'classify':
if self.args.task == "classify":
self.data = check_cls_dataset(self.args.data)
elif self.args.data.split('.')[-1] in ('yaml', 'yml') or self.args.task in ('detect', 'segment', 'pose'):
elif self.args.data.split(".")[-1] in ("yaml", "yml") or self.args.task in ("detect", "segment", "pose"):
self.data = check_det_dataset(self.args.data)
if 'yaml_file' in self.data:
self.args.data = self.data['yaml_file'] # for validating 'yolo train data=url.zip' usage
if "yaml_file" in self.data:
self.args.data = self.data["yaml_file"] # for validating 'yolo train data=url.zip' usage
except Exception as e:
raise RuntimeError(emojis(f"Dataset '{clean_url(self.args.data)}' error ❌ {e}")) from e
@ -131,8 +148,8 @@ class BaseTrainer:
self.fitness = None
self.loss = None
self.tloss = None
self.loss_names = ['Loss']
self.csv = self.save_dir / 'results.csv'
self.loss_names = ["Loss"]
self.csv = self.save_dir / "results.csv"
self.plot_idx = [0, 1, 2]
# Callbacks
@ -156,7 +173,7 @@ class BaseTrainer:
def train(self):
"""Allow device='', device=None on Multi-GPU systems to default to device=0."""
if isinstance(self.args.device, str) and len(self.args.device): # i.e. device='0' or device='0,1,2,3'
world_size = len(self.args.device.split(','))
world_size = len(self.args.device.split(","))
elif isinstance(self.args.device, (tuple, list)): # i.e. device=[0, 1, 2, 3] (multi-GPU from CLI is list)
world_size = len(self.args.device)
elif torch.cuda.is_available(): # i.e. device=None or device='' or device=number
@ -165,14 +182,16 @@ class BaseTrainer:
world_size = 0
# Run subprocess if DDP training, else train normally
if world_size > 1 and 'LOCAL_RANK' not in os.environ:
if world_size > 1 and "LOCAL_RANK" not in os.environ:
# Argument checks
if self.args.rect:
LOGGER.warning("WARNING ⚠️ 'rect=True' is incompatible with Multi-GPU training, setting 'rect=False'")
self.args.rect = False
if self.args.batch == -1:
LOGGER.warning("WARNING ⚠️ 'batch=-1' for AutoBatch is incompatible with Multi-GPU training, setting "
"default 'batch=16'")
LOGGER.warning(
"WARNING ⚠️ 'batch=-1' for AutoBatch is incompatible with Multi-GPU training, setting "
"default 'batch=16'"
)
self.args.batch = 16
# Command
@ -199,37 +218,45 @@ class BaseTrainer:
def _setup_ddp(self, world_size):
"""Initializes and sets the DistributedDataParallel parameters for training."""
torch.cuda.set_device(RANK)
self.device = torch.device('cuda', RANK)
self.device = torch.device("cuda", RANK)
# LOGGER.info(f'DDP info: RANK {RANK}, WORLD_SIZE {world_size}, DEVICE {self.device}')
os.environ['NCCL_BLOCKING_WAIT'] = '1' # set to enforce timeout
os.environ["NCCL_BLOCKING_WAIT"] = "1" # set to enforce timeout
dist.init_process_group(
'nccl' if dist.is_nccl_available() else 'gloo',
"nccl" if dist.is_nccl_available() else "gloo",
timeout=timedelta(seconds=10800), # 3 hours
rank=RANK,
world_size=world_size)
world_size=world_size,
)
def _setup_train(self, world_size):
"""Builds dataloaders and optimizer on correct rank process."""
# Model
self.run_callbacks('on_pretrain_routine_start')
self.run_callbacks("on_pretrain_routine_start")
ckpt = self.setup_model()
self.model = self.model.to(self.device)
self.set_model_attributes()
# Freeze layers
freeze_list = self.args.freeze if isinstance(
self.args.freeze, list) else range(self.args.freeze) if isinstance(self.args.freeze, int) else []
always_freeze_names = ['.dfl'] # always freeze these layers
freeze_layer_names = [f'model.{x}.' for x in freeze_list] + always_freeze_names
freeze_list = (
self.args.freeze
if isinstance(self.args.freeze, list)
else range(self.args.freeze)
if isinstance(self.args.freeze, int)
else []
)
always_freeze_names = [".dfl"] # always freeze these layers
freeze_layer_names = [f"model.{x}." for x in freeze_list] + always_freeze_names
for k, v in self.model.named_parameters():
# v.register_hook(lambda x: torch.nan_to_num(x)) # NaN to 0 (commented for erratic training results)
if any(x in k for x in freeze_layer_names):
LOGGER.info(f"Freezing layer '{k}'")
v.requires_grad = False
elif not v.requires_grad:
LOGGER.info(f"WARNING ⚠️ setting 'requires_grad=True' for frozen layer '{k}'. "
'See ultralytics.engine.trainer for customization of frozen layers.')
LOGGER.info(
f"WARNING ⚠️ setting 'requires_grad=True' for frozen layer '{k}'. "
"See ultralytics.engine.trainer for customization of frozen layers."
)
v.requires_grad = True
# Check AMP
@ -246,7 +273,7 @@ class BaseTrainer:
self.model = nn.parallel.DistributedDataParallel(self.model, device_ids=[RANK])
# Check imgsz
gs = max(int(self.model.stride.max() if hasattr(self.model, 'stride') else 32), 32) # grid size (max stride)
gs = max(int(self.model.stride.max() if hasattr(self.model, "stride") else 32), 32) # grid size (max stride)
self.args.imgsz = check_imgsz(self.args.imgsz, stride=gs, floor=gs, max_dim=1)
self.stride = gs # for multi-scale training
@ -256,15 +283,14 @@ class BaseTrainer:
# Dataloaders
batch_size = self.batch_size // max(world_size, 1)
self.train_loader = self.get_dataloader(self.trainset, batch_size=batch_size, rank=RANK, mode='train')
self.train_loader = self.get_dataloader(self.trainset, batch_size=batch_size, rank=RANK, mode="train")
if RANK in (-1, 0):
# NOTE: When training DOTA dataset, double batch size could get OOM cause some images got more than 2000 objects.
self.test_loader = self.get_dataloader(self.testset,
batch_size=batch_size if self.args.task == 'obb' else batch_size * 2,
rank=-1,
mode='val')
self.test_loader = self.get_dataloader(
self.testset, batch_size=batch_size if self.args.task == "obb" else batch_size * 2, rank=-1, mode="val"
)
self.validator = self.get_validator()
metric_keys = self.validator.metrics.keys + self.label_loss_items(prefix='val')
metric_keys = self.validator.metrics.keys + self.label_loss_items(prefix="val")
self.metrics = dict(zip(metric_keys, [0] * len(metric_keys)))
self.ema = ModelEMA(self.model)
if self.args.plots:
@ -274,18 +300,20 @@ class BaseTrainer:
self.accumulate = max(round(self.args.nbs / self.batch_size), 1) # accumulate loss before optimizing
weight_decay = self.args.weight_decay * self.batch_size * self.accumulate / self.args.nbs # scale weight_decay
iterations = math.ceil(len(self.train_loader.dataset) / max(self.batch_size, self.args.nbs)) * self.epochs
self.optimizer = self.build_optimizer(model=self.model,
name=self.args.optimizer,
lr=self.args.lr0,
momentum=self.args.momentum,
decay=weight_decay,
iterations=iterations)
self.optimizer = self.build_optimizer(
model=self.model,
name=self.args.optimizer,
lr=self.args.lr0,
momentum=self.args.momentum,
decay=weight_decay,
iterations=iterations,
)
# Scheduler
self._setup_scheduler()
self.stopper, self.stop = EarlyStopping(patience=self.args.patience), False
self.resume_training(ckpt)
self.scheduler.last_epoch = self.start_epoch - 1 # do not move
self.run_callbacks('on_pretrain_routine_end')
self.run_callbacks("on_pretrain_routine_end")
def _do_train(self, world_size=1):
"""Train completed, evaluate and plot if specified by arguments."""
@ -299,19 +327,23 @@ class BaseTrainer:
self.epoch_time = None
self.epoch_time_start = time.time()
self.train_time_start = time.time()
self.run_callbacks('on_train_start')
LOGGER.info(f'Image sizes {self.args.imgsz} train, {self.args.imgsz} val\n'
f'Using {self.train_loader.num_workers * (world_size or 1)} dataloader workers\n'
f"Logging results to {colorstr('bold', self.save_dir)}\n"
f'Starting training for '
f'{self.args.time} hours...' if self.args.time else f'{self.epochs} epochs...')
self.run_callbacks("on_train_start")
LOGGER.info(
f'Image sizes {self.args.imgsz} train, {self.args.imgsz} val\n'
f'Using {self.train_loader.num_workers * (world_size or 1)} dataloader workers\n'
f"Logging results to {colorstr('bold', self.save_dir)}\n"
f'Starting training for '
f'{self.args.time} hours...'
if self.args.time
else f"{self.epochs} epochs..."
)
if self.args.close_mosaic:
base_idx = (self.epochs - self.args.close_mosaic) * nb
self.plot_idx.extend([base_idx, base_idx + 1, base_idx + 2])
epoch = self.epochs # predefine for resume fully trained model edge cases
for epoch in range(self.start_epoch, self.epochs):
self.epoch = epoch
self.run_callbacks('on_train_epoch_start')
self.run_callbacks("on_train_epoch_start")
self.model.train()
if RANK != -1:
self.train_loader.sampler.set_epoch(epoch)
@ -327,7 +359,7 @@ class BaseTrainer:
self.tloss = None
self.optimizer.zero_grad()
for i, batch in pbar:
self.run_callbacks('on_train_batch_start')
self.run_callbacks("on_train_batch_start")
# Warmup
ni = i + nb * epoch
if ni <= nw:
@ -335,10 +367,11 @@ class BaseTrainer:
self.accumulate = max(1, int(np.interp(ni, xi, [1, self.args.nbs / self.batch_size]).round()))
for j, x in enumerate(self.optimizer.param_groups):
# Bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
x['lr'] = np.interp(
ni, xi, [self.args.warmup_bias_lr if j == 0 else 0.0, x['initial_lr'] * self.lf(epoch)])
if 'momentum' in x:
x['momentum'] = np.interp(ni, xi, [self.args.warmup_momentum, self.args.momentum])
x["lr"] = np.interp(
ni, xi, [self.args.warmup_bias_lr if j == 0 else 0.0, x["initial_lr"] * self.lf(epoch)]
)
if "momentum" in x:
x["momentum"] = np.interp(ni, xi, [self.args.warmup_momentum, self.args.momentum])
# Forward
with torch.cuda.amp.autocast(self.amp):
@ -346,8 +379,9 @@ class BaseTrainer:
self.loss, self.loss_items = self.model(batch)
if RANK != -1:
self.loss *= world_size
self.tloss = (self.tloss * i + self.loss_items) / (i + 1) if self.tloss is not None \
else self.loss_items
self.tloss = (
(self.tloss * i + self.loss_items) / (i + 1) if self.tloss is not None else self.loss_items
)
# Backward
self.scaler.scale(self.loss).backward()
@ -368,24 +402,25 @@ class BaseTrainer:
break
# Log
mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G' # (GB)
mem = f"{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G" # (GB)
loss_len = self.tloss.shape[0] if len(self.tloss.size()) else 1
losses = self.tloss if loss_len > 1 else torch.unsqueeze(self.tloss, 0)
if RANK in (-1, 0):
pbar.set_description(
('%11s' * 2 + '%11.4g' * (2 + loss_len)) %
(f'{epoch + 1}/{self.epochs}', mem, *losses, batch['cls'].shape[0], batch['img'].shape[-1]))
self.run_callbacks('on_batch_end')
("%11s" * 2 + "%11.4g" * (2 + loss_len))
% (f"{epoch + 1}/{self.epochs}", mem, *losses, batch["cls"].shape[0], batch["img"].shape[-1])
)
self.run_callbacks("on_batch_end")
if self.args.plots and ni in self.plot_idx:
self.plot_training_samples(batch, ni)
self.run_callbacks('on_train_batch_end')
self.run_callbacks("on_train_batch_end")
self.lr = {f'lr/pg{ir}': x['lr'] for ir, x in enumerate(self.optimizer.param_groups)} # for loggers
self.run_callbacks('on_train_epoch_end')
self.lr = {f"lr/pg{ir}": x["lr"] for ir, x in enumerate(self.optimizer.param_groups)} # for loggers
self.run_callbacks("on_train_epoch_end")
if RANK in (-1, 0):
final_epoch = epoch + 1 == self.epochs
self.ema.update_attr(self.model, include=['yaml', 'nc', 'args', 'names', 'stride', 'class_weights'])
self.ema.update_attr(self.model, include=["yaml", "nc", "args", "names", "stride", "class_weights"])
# Validation
if self.args.val or final_epoch or self.stopper.possible_stop or self.stop:
@ -398,14 +433,14 @@ class BaseTrainer:
# Save model
if self.args.save or final_epoch:
self.save_model()
self.run_callbacks('on_model_save')
self.run_callbacks("on_model_save")
# Scheduler
t = time.time()
self.epoch_time = t - self.epoch_time_start
self.epoch_time_start = t
with warnings.catch_warnings():
warnings.simplefilter('ignore') # suppress 'Detected lr_scheduler.step() before optimizer.step()'
warnings.simplefilter("ignore") # suppress 'Detected lr_scheduler.step() before optimizer.step()'
if self.args.time:
mean_epoch_time = (t - self.train_time_start) / (epoch - self.start_epoch + 1)
self.epochs = self.args.epochs = math.ceil(self.args.time * 3600 / mean_epoch_time)
@ -413,7 +448,7 @@ class BaseTrainer:
self.scheduler.last_epoch = self.epoch # do not move
self.stop |= epoch >= self.epochs # stop if exceeded epochs
self.scheduler.step()
self.run_callbacks('on_fit_epoch_end')
self.run_callbacks("on_fit_epoch_end")
torch.cuda.empty_cache() # clear GPU memory at end of epoch, may help reduce CUDA out of memory errors
# Early Stopping
@ -426,39 +461,43 @@ class BaseTrainer:
if RANK in (-1, 0):
# Do final val with best.pt
LOGGER.info(f'\n{epoch - self.start_epoch + 1} epochs completed in '
f'{(time.time() - self.train_time_start) / 3600:.3f} hours.')
LOGGER.info(
f"\n{epoch - self.start_epoch + 1} epochs completed in "
f"{(time.time() - self.train_time_start) / 3600:.3f} hours."
)
self.final_eval()
if self.args.plots:
self.plot_metrics()
self.run_callbacks('on_train_end')
self.run_callbacks("on_train_end")
torch.cuda.empty_cache()
self.run_callbacks('teardown')
self.run_callbacks("teardown")
def save_model(self):
"""Save model training checkpoints with additional metadata."""
import pandas as pd # scope for faster startup
metrics = {**self.metrics, **{'fitness': self.fitness}}
results = {k.strip(): v for k, v in pd.read_csv(self.csv).to_dict(orient='list').items()}
metrics = {**self.metrics, **{"fitness": self.fitness}}
results = {k.strip(): v for k, v in pd.read_csv(self.csv).to_dict(orient="list").items()}
ckpt = {
'epoch': self.epoch,
'best_fitness': self.best_fitness,
'model': deepcopy(de_parallel(self.model)).half(),
'ema': deepcopy(self.ema.ema).half(),
'updates': self.ema.updates,
'optimizer': self.optimizer.state_dict(),
'train_args': vars(self.args), # save as dict
'train_metrics': metrics,
'train_results': results,
'date': datetime.now().isoformat(),
'version': __version__}
"epoch": self.epoch,
"best_fitness": self.best_fitness,
"model": deepcopy(de_parallel(self.model)).half(),
"ema": deepcopy(self.ema.ema).half(),
"updates": self.ema.updates,
"optimizer": self.optimizer.state_dict(),
"train_args": vars(self.args), # save as dict
"train_metrics": metrics,
"train_results": results,
"date": datetime.now().isoformat(),
"version": __version__,
}
# Save last and best
torch.save(ckpt, self.last)
if self.best_fitness == self.fitness:
torch.save(ckpt, self.best)
if (self.save_period > 0) and (self.epoch > 0) and (self.epoch % self.save_period == 0):
torch.save(ckpt, self.wdir / f'epoch{self.epoch}.pt')
torch.save(ckpt, self.wdir / f"epoch{self.epoch}.pt")
@staticmethod
def get_dataset(data):
@ -467,7 +506,7 @@ class BaseTrainer:
Returns None if data format is not recognized.
"""
return data['train'], data.get('val') or data.get('test')
return data["train"], data.get("val") or data.get("test")
def setup_model(self):
"""Load/create/download model for any task."""
@ -476,9 +515,9 @@ class BaseTrainer:
model, weights = self.model, None
ckpt = None
if str(model).endswith('.pt'):
if str(model).endswith(".pt"):
weights, ckpt = attempt_load_one_weight(model)
cfg = ckpt['model'].yaml
cfg = ckpt["model"].yaml
else:
cfg = model
self.model = self.get_model(cfg=cfg, weights=weights, verbose=RANK == -1) # calls Model(cfg, weights)
@ -505,7 +544,7 @@ class BaseTrainer:
The returned dict is expected to contain "fitness" key.
"""
metrics = self.validator(self)
fitness = metrics.pop('fitness', -self.loss.detach().cpu().numpy()) # use loss as fitness measure if not found
fitness = metrics.pop("fitness", -self.loss.detach().cpu().numpy()) # use loss as fitness measure if not found
if not self.best_fitness or self.best_fitness < fitness:
self.best_fitness = fitness
return metrics, fitness
@ -516,24 +555,24 @@ class BaseTrainer:
def get_validator(self):
"""Returns a NotImplementedError when the get_validator function is called."""
raise NotImplementedError('get_validator function not implemented in trainer')
raise NotImplementedError("get_validator function not implemented in trainer")
def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode='train'):
def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode="train"):
"""Returns dataloader derived from torch.data.Dataloader."""
raise NotImplementedError('get_dataloader function not implemented in trainer')
raise NotImplementedError("get_dataloader function not implemented in trainer")
def build_dataset(self, img_path, mode='train', batch=None):
def build_dataset(self, img_path, mode="train", batch=None):
"""Build dataset."""
raise NotImplementedError('build_dataset function not implemented in trainer')
raise NotImplementedError("build_dataset function not implemented in trainer")
def label_loss_items(self, loss_items=None, prefix='train'):
def label_loss_items(self, loss_items=None, prefix="train"):
"""Returns a loss dict with labelled training loss items tensor."""
# Not needed for classification but necessary for segmentation & detection
return {'loss': loss_items} if loss_items is not None else ['loss']
return {"loss": loss_items} if loss_items is not None else ["loss"]
def set_model_attributes(self):
"""To set or update model parameters before training."""
self.model.names = self.data['names']
self.model.names = self.data["names"]
def build_targets(self, preds, targets):
"""Builds target tensors for training YOLO model."""
@ -541,7 +580,7 @@ class BaseTrainer:
def progress_string(self):
"""Returns a string describing training progress."""
return ''
return ""
# TODO: may need to put these following functions into callback
def plot_training_samples(self, batch, ni):
@ -556,9 +595,9 @@ class BaseTrainer:
"""Saves training metrics to a CSV file."""
keys, vals = list(metrics.keys()), list(metrics.values())
n = len(metrics) + 1 # number of cols
s = '' if self.csv.exists() else (('%23s,' * n % tuple(['epoch'] + keys)).rstrip(',') + '\n') # header
with open(self.csv, 'a') as f:
f.write(s + ('%23.5g,' * n % tuple([self.epoch + 1] + vals)).rstrip(',') + '\n')
s = "" if self.csv.exists() else (("%23s," * n % tuple(["epoch"] + keys)).rstrip(",") + "\n") # header
with open(self.csv, "a") as f:
f.write(s + ("%23.5g," * n % tuple([self.epoch + 1] + vals)).rstrip(",") + "\n")
def plot_metrics(self):
"""Plot and display metrics visually."""
@ -567,7 +606,7 @@ class BaseTrainer:
def on_plot(self, name, data=None):
"""Registers plots (e.g. to be consumed in callbacks)"""
path = Path(name)
self.plots[path] = {'data': data, 'timestamp': time.time()}
self.plots[path] = {"data": data, "timestamp": time.time()}
def final_eval(self):
"""Performs final evaluation and validation for object detection YOLO model."""
@ -575,11 +614,11 @@ class BaseTrainer:
if f.exists():
strip_optimizer(f) # strip optimizers
if f is self.best:
LOGGER.info(f'\nValidating {f}...')
LOGGER.info(f"\nValidating {f}...")
self.validator.args.plots = self.args.plots
self.metrics = self.validator(model=f)
self.metrics.pop('fitness', None)
self.run_callbacks('on_fit_epoch_end')
self.metrics.pop("fitness", None)
self.run_callbacks("on_fit_epoch_end")
def check_resume(self, overrides):
"""Check if resume checkpoint exists and update arguments accordingly."""
@ -591,19 +630,21 @@ class BaseTrainer:
# Check that resume data YAML exists, otherwise strip to force re-download of dataset
ckpt_args = attempt_load_weights(last).args
if not Path(ckpt_args['data']).exists():
ckpt_args['data'] = self.args.data
if not Path(ckpt_args["data"]).exists():
ckpt_args["data"] = self.args.data
resume = True
self.args = get_cfg(ckpt_args)
self.args.model = str(last) # reinstate model
for k in 'imgsz', 'batch': # allow arg updates to reduce memory on resume if crashed due to CUDA OOM
for k in "imgsz", "batch": # allow arg updates to reduce memory on resume if crashed due to CUDA OOM
if k in overrides:
setattr(self.args, k, overrides[k])
except Exception as e:
raise FileNotFoundError('Resume checkpoint not found. Please pass a valid checkpoint to resume from, '
"i.e. 'yolo train resume model=path/to/last.pt'") from e
raise FileNotFoundError(
"Resume checkpoint not found. Please pass a valid checkpoint to resume from, "
"i.e. 'yolo train resume model=path/to/last.pt'"
) from e
self.resume = resume
def resume_training(self, ckpt):
@ -611,23 +652,26 @@ class BaseTrainer:
if ckpt is None:
return
best_fitness = 0.0
start_epoch = ckpt['epoch'] + 1
if ckpt['optimizer'] is not None:
self.optimizer.load_state_dict(ckpt['optimizer']) # optimizer
best_fitness = ckpt['best_fitness']
if self.ema and ckpt.get('ema'):
self.ema.ema.load_state_dict(ckpt['ema'].float().state_dict()) # EMA
self.ema.updates = ckpt['updates']
start_epoch = ckpt["epoch"] + 1
if ckpt["optimizer"] is not None:
self.optimizer.load_state_dict(ckpt["optimizer"]) # optimizer
best_fitness = ckpt["best_fitness"]
if self.ema and ckpt.get("ema"):
self.ema.ema.load_state_dict(ckpt["ema"].float().state_dict()) # EMA
self.ema.updates = ckpt["updates"]
if self.resume:
assert start_epoch > 0, \
f'{self.args.model} training to {self.epochs} epochs is finished, nothing to resume.\n' \
assert start_epoch > 0, (
f"{self.args.model} training to {self.epochs} epochs is finished, nothing to resume.\n"
f"Start a new training without resuming, i.e. 'yolo train model={self.args.model}'"
)
LOGGER.info(
f'Resuming training from {self.args.model} from epoch {start_epoch + 1} to {self.epochs} total epochs')
f"Resuming training from {self.args.model} from epoch {start_epoch + 1} to {self.epochs} total epochs"
)
if self.epochs < start_epoch:
LOGGER.info(
f"{self.model} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {self.epochs} more epochs.")
self.epochs += ckpt['epoch'] # finetune additional epochs
f"{self.model} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {self.epochs} more epochs."
)
self.epochs += ckpt["epoch"] # finetune additional epochs
self.best_fitness = best_fitness
self.start_epoch = start_epoch
if start_epoch > (self.epochs - self.args.close_mosaic):
@ -635,13 +679,13 @@ class BaseTrainer:
def _close_dataloader_mosaic(self):
"""Update dataloaders to stop using mosaic augmentation."""
if hasattr(self.train_loader.dataset, 'mosaic'):
if hasattr(self.train_loader.dataset, "mosaic"):
self.train_loader.dataset.mosaic = False
if hasattr(self.train_loader.dataset, 'close_mosaic'):
LOGGER.info('Closing dataloader mosaic')
if hasattr(self.train_loader.dataset, "close_mosaic"):
LOGGER.info("Closing dataloader mosaic")
self.train_loader.dataset.close_mosaic(hyp=self.args)
def build_optimizer(self, model, name='auto', lr=0.001, momentum=0.9, decay=1e-5, iterations=1e5):
def build_optimizer(self, model, name="auto", lr=0.001, momentum=0.9, decay=1e-5, iterations=1e5):
"""
Constructs an optimizer for the given model, based on the specified optimizer name, learning rate, momentum,
weight decay, and number of iterations.
@ -661,41 +705,45 @@ class BaseTrainer:
"""
g = [], [], [] # optimizer parameter groups
bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k) # normalization layers, i.e. BatchNorm2d()
if name == 'auto':
LOGGER.info(f"{colorstr('optimizer:')} 'optimizer=auto' found, "
f"ignoring 'lr0={self.args.lr0}' and 'momentum={self.args.momentum}' and "
f"determining best 'optimizer', 'lr0' and 'momentum' automatically... ")
nc = getattr(model, 'nc', 10) # number of classes
bn = tuple(v for k, v in nn.__dict__.items() if "Norm" in k) # normalization layers, i.e. BatchNorm2d()
if name == "auto":
LOGGER.info(
f"{colorstr('optimizer:')} 'optimizer=auto' found, "
f"ignoring 'lr0={self.args.lr0}' and 'momentum={self.args.momentum}' and "
f"determining best 'optimizer', 'lr0' and 'momentum' automatically... "
)
nc = getattr(model, "nc", 10) # number of classes
lr_fit = round(0.002 * 5 / (4 + nc), 6) # lr0 fit equation to 6 decimal places
name, lr, momentum = ('SGD', 0.01, 0.9) if iterations > 10000 else ('AdamW', lr_fit, 0.9)
name, lr, momentum = ("SGD", 0.01, 0.9) if iterations > 10000 else ("AdamW", lr_fit, 0.9)
self.args.warmup_bias_lr = 0.0 # no higher than 0.01 for Adam
for module_name, module in model.named_modules():
for param_name, param in module.named_parameters(recurse=False):
fullname = f'{module_name}.{param_name}' if module_name else param_name
if 'bias' in fullname: # bias (no decay)
fullname = f"{module_name}.{param_name}" if module_name else param_name
if "bias" in fullname: # bias (no decay)
g[2].append(param)
elif isinstance(module, bn): # weight (no decay)
g[1].append(param)
else: # weight (with decay)
g[0].append(param)
if name in ('Adam', 'Adamax', 'AdamW', 'NAdam', 'RAdam'):
if name in ("Adam", "Adamax", "AdamW", "NAdam", "RAdam"):
optimizer = getattr(optim, name, optim.Adam)(g[2], lr=lr, betas=(momentum, 0.999), weight_decay=0.0)
elif name == 'RMSProp':
elif name == "RMSProp":
optimizer = optim.RMSprop(g[2], lr=lr, momentum=momentum)
elif name == 'SGD':
elif name == "SGD":
optimizer = optim.SGD(g[2], lr=lr, momentum=momentum, nesterov=True)
else:
raise NotImplementedError(
f"Optimizer '{name}' not found in list of available optimizers "
f'[Adam, AdamW, NAdam, RAdam, RMSProp, SGD, auto].'
'To request support for addition optimizers please visit https://github.com/ultralytics/ultralytics.')
f"[Adam, AdamW, NAdam, RAdam, RMSProp, SGD, auto]."
"To request support for addition optimizers please visit https://github.com/ultralytics/ultralytics."
)
optimizer.add_param_group({'params': g[0], 'weight_decay': decay}) # add g0 with weight_decay
optimizer.add_param_group({'params': g[1], 'weight_decay': 0.0}) # add g1 (BatchNorm2d weights)
optimizer.add_param_group({"params": g[0], "weight_decay": decay}) # add g0 with weight_decay
optimizer.add_param_group({"params": g[1], "weight_decay": 0.0}) # add g1 (BatchNorm2d weights)
LOGGER.info(
f"{colorstr('optimizer:')} {type(optimizer).__name__}(lr={lr}, momentum={momentum}) with parameter groups "
f'{len(g[1])} weight(decay=0.0), {len(g[0])} weight(decay={decay}), {len(g[2])} bias(decay=0.0)')
f'{len(g[1])} weight(decay=0.0), {len(g[0])} weight(decay={decay}), {len(g[2])} bias(decay=0.0)'
)
return optimizer

View file

@ -73,40 +73,43 @@ class Tuner:
Args:
args (dict, optional): Configuration for hyperparameter evolution.
"""
self.space = args.pop('space', None) or { # key: (min, max, gain(optional))
self.space = args.pop("space", None) or { # key: (min, max, gain(optional))
# 'optimizer': tune.choice(['SGD', 'Adam', 'AdamW', 'NAdam', 'RAdam', 'RMSProp']),
'lr0': (1e-5, 1e-1), # initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
'lrf': (0.0001, 0.1), # final OneCycleLR learning rate (lr0 * lrf)
'momentum': (0.7, 0.98, 0.3), # SGD momentum/Adam beta1
'weight_decay': (0.0, 0.001), # optimizer weight decay 5e-4
'warmup_epochs': (0.0, 5.0), # warmup epochs (fractions ok)
'warmup_momentum': (0.0, 0.95), # warmup initial momentum
'box': (1.0, 20.0), # box loss gain
'cls': (0.2, 4.0), # cls loss gain (scale with pixels)
'dfl': (0.4, 6.0), # dfl loss gain
'hsv_h': (0.0, 0.1), # image HSV-Hue augmentation (fraction)
'hsv_s': (0.0, 0.9), # image HSV-Saturation augmentation (fraction)
'hsv_v': (0.0, 0.9), # image HSV-Value augmentation (fraction)
'degrees': (0.0, 45.0), # image rotation (+/- deg)
'translate': (0.0, 0.9), # image translation (+/- fraction)
'scale': (0.0, 0.95), # image scale (+/- gain)
'shear': (0.0, 10.0), # image shear (+/- deg)
'perspective': (0.0, 0.001), # image perspective (+/- fraction), range 0-0.001
'flipud': (0.0, 1.0), # image flip up-down (probability)
'fliplr': (0.0, 1.0), # image flip left-right (probability)
'mosaic': (0.0, 1.0), # image mixup (probability)
'mixup': (0.0, 1.0), # image mixup (probability)
'copy_paste': (0.0, 1.0)} # segment copy-paste (probability)
"lr0": (1e-5, 1e-1), # initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
"lrf": (0.0001, 0.1), # final OneCycleLR learning rate (lr0 * lrf)
"momentum": (0.7, 0.98, 0.3), # SGD momentum/Adam beta1
"weight_decay": (0.0, 0.001), # optimizer weight decay 5e-4
"warmup_epochs": (0.0, 5.0), # warmup epochs (fractions ok)
"warmup_momentum": (0.0, 0.95), # warmup initial momentum
"box": (1.0, 20.0), # box loss gain
"cls": (0.2, 4.0), # cls loss gain (scale with pixels)
"dfl": (0.4, 6.0), # dfl loss gain
"hsv_h": (0.0, 0.1), # image HSV-Hue augmentation (fraction)
"hsv_s": (0.0, 0.9), # image HSV-Saturation augmentation (fraction)
"hsv_v": (0.0, 0.9), # image HSV-Value augmentation (fraction)
"degrees": (0.0, 45.0), # image rotation (+/- deg)
"translate": (0.0, 0.9), # image translation (+/- fraction)
"scale": (0.0, 0.95), # image scale (+/- gain)
"shear": (0.0, 10.0), # image shear (+/- deg)
"perspective": (0.0, 0.001), # image perspective (+/- fraction), range 0-0.001
"flipud": (0.0, 1.0), # image flip up-down (probability)
"fliplr": (0.0, 1.0), # image flip left-right (probability)
"mosaic": (0.0, 1.0), # image mixup (probability)
"mixup": (0.0, 1.0), # image mixup (probability)
"copy_paste": (0.0, 1.0), # segment copy-paste (probability)
}
self.args = get_cfg(overrides=args)
self.tune_dir = get_save_dir(self.args, name='tune')
self.tune_csv = self.tune_dir / 'tune_results.csv'
self.tune_dir = get_save_dir(self.args, name="tune")
self.tune_csv = self.tune_dir / "tune_results.csv"
self.callbacks = _callbacks or callbacks.get_default_callbacks()
self.prefix = colorstr('Tuner: ')
self.prefix = colorstr("Tuner: ")
callbacks.add_integration_callbacks(self)
LOGGER.info(f"{self.prefix}Initialized Tuner instance with 'tune_dir={self.tune_dir}'\n"
f'{self.prefix}💡 Learn about tuning at https://docs.ultralytics.com/guides/hyperparameter-tuning')
LOGGER.info(
f"{self.prefix}Initialized Tuner instance with 'tune_dir={self.tune_dir}'\n"
f"{self.prefix}💡 Learn about tuning at https://docs.ultralytics.com/guides/hyperparameter-tuning"
)
def _mutate(self, parent='single', n=5, mutation=0.8, sigma=0.2):
def _mutate(self, parent="single", n=5, mutation=0.8, sigma=0.2):
"""
Mutates the hyperparameters based on bounds and scaling factors specified in `self.space`.
@ -121,15 +124,15 @@ class Tuner:
"""
if self.tune_csv.exists(): # if CSV file exists: select best hyps and mutate
# Select parent(s)
x = np.loadtxt(self.tune_csv, ndmin=2, delimiter=',', skiprows=1)
x = np.loadtxt(self.tune_csv, ndmin=2, delimiter=",", skiprows=1)
fitness = x[:, 0] # first column
n = min(n, len(x)) # number of previous results to consider
x = x[np.argsort(-fitness)][:n] # top n mutations
w = x[:, 0] - x[:, 0].min() + 1E-6 # weights (sum > 0)
if parent == 'single' or len(x) == 1:
w = x[:, 0] - x[:, 0].min() + 1e-6 # weights (sum > 0)
if parent == "single" or len(x) == 1:
# x = x[random.randint(0, n - 1)] # random selection
x = x[random.choices(range(n), weights=w)[0]] # weighted selection
elif parent == 'weighted':
elif parent == "weighted":
x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination
# Mutate
@ -174,44 +177,44 @@ class Tuner:
t0 = time.time()
best_save_dir, best_metrics = None, None
(self.tune_dir / 'weights').mkdir(parents=True, exist_ok=True)
(self.tune_dir / "weights").mkdir(parents=True, exist_ok=True)
for i in range(iterations):
# Mutate hyperparameters
mutated_hyp = self._mutate()
LOGGER.info(f'{self.prefix}Starting iteration {i + 1}/{iterations} with hyperparameters: {mutated_hyp}')
LOGGER.info(f"{self.prefix}Starting iteration {i + 1}/{iterations} with hyperparameters: {mutated_hyp}")
metrics = {}
train_args = {**vars(self.args), **mutated_hyp}
save_dir = get_save_dir(get_cfg(train_args))
weights_dir = save_dir / 'weights'
ckpt_file = weights_dir / ('best.pt' if (weights_dir / 'best.pt').exists() else 'last.pt')
weights_dir = save_dir / "weights"
ckpt_file = weights_dir / ("best.pt" if (weights_dir / "best.pt").exists() else "last.pt")
try:
# Train YOLO model with mutated hyperparameters (run in subprocess to avoid dataloader hang)
cmd = ['yolo', 'train', *(f'{k}={v}' for k, v in train_args.items())]
cmd = ["yolo", "train", *(f"{k}={v}" for k, v in train_args.items())]
return_code = subprocess.run(cmd, check=True).returncode
metrics = torch.load(ckpt_file)['train_metrics']
assert return_code == 0, 'training failed'
metrics = torch.load(ckpt_file)["train_metrics"]
assert return_code == 0, "training failed"
except Exception as e:
LOGGER.warning(f'WARNING ❌️ training failure for hyperparameter tuning iteration {i + 1}\n{e}')
LOGGER.warning(f"WARNING ❌️ training failure for hyperparameter tuning iteration {i + 1}\n{e}")
# Save results and mutated_hyp to CSV
fitness = metrics.get('fitness', 0.0)
fitness = metrics.get("fitness", 0.0)
log_row = [round(fitness, 5)] + [mutated_hyp[k] for k in self.space.keys()]
headers = '' if self.tune_csv.exists() else (','.join(['fitness'] + list(self.space.keys())) + '\n')
with open(self.tune_csv, 'a') as f:
f.write(headers + ','.join(map(str, log_row)) + '\n')
headers = "" if self.tune_csv.exists() else (",".join(["fitness"] + list(self.space.keys())) + "\n")
with open(self.tune_csv, "a") as f:
f.write(headers + ",".join(map(str, log_row)) + "\n")
# Get best results
x = np.loadtxt(self.tune_csv, ndmin=2, delimiter=',', skiprows=1)
x = np.loadtxt(self.tune_csv, ndmin=2, delimiter=",", skiprows=1)
fitness = x[:, 0] # first column
best_idx = fitness.argmax()
best_is_current = best_idx == i
if best_is_current:
best_save_dir = save_dir
best_metrics = {k: round(v, 5) for k, v in metrics.items()}
for ckpt in weights_dir.glob('*.pt'):
shutil.copy2(ckpt, self.tune_dir / 'weights')
for ckpt in weights_dir.glob("*.pt"):
shutil.copy2(ckpt, self.tune_dir / "weights")
elif cleanup:
shutil.rmtree(ckpt_file.parent) # remove iteration weights/ dir to reduce storage space
@ -219,15 +222,19 @@ class Tuner:
plot_tune_results(self.tune_csv)
# Save and print tune results
header = (f'{self.prefix}{i + 1}/{iterations} iterations complete ✅ ({time.time() - t0:.2f}s)\n'
f'{self.prefix}Results saved to {colorstr("bold", self.tune_dir)}\n'
f'{self.prefix}Best fitness={fitness[best_idx]} observed at iteration {best_idx + 1}\n'
f'{self.prefix}Best fitness metrics are {best_metrics}\n'
f'{self.prefix}Best fitness model is {best_save_dir}\n'
f'{self.prefix}Best fitness hyperparameters are printed below.\n')
LOGGER.info('\n' + header)
header = (
f'{self.prefix}{i + 1}/{iterations} iterations complete ✅ ({time.time() - t0:.2f}s)\n'
f'{self.prefix}Results saved to {colorstr("bold", self.tune_dir)}\n'
f'{self.prefix}Best fitness={fitness[best_idx]} observed at iteration {best_idx + 1}\n'
f'{self.prefix}Best fitness metrics are {best_metrics}\n'
f'{self.prefix}Best fitness model is {best_save_dir}\n'
f'{self.prefix}Best fitness hyperparameters are printed below.\n'
)
LOGGER.info("\n" + header)
data = {k: float(x[best_idx, i + 1]) for i, k in enumerate(self.space.keys())}
yaml_save(self.tune_dir / 'best_hyperparameters.yaml',
data=data,
header=remove_colorstr(header.replace(self.prefix, '# ')) + '\n')
yaml_print(self.tune_dir / 'best_hyperparameters.yaml')
yaml_save(
self.tune_dir / "best_hyperparameters.yaml",
data=data,
header=remove_colorstr(header.replace(self.prefix, "# ")) + "\n",
)
yaml_print(self.tune_dir / "best_hyperparameters.yaml")

View file

@ -89,10 +89,10 @@ class BaseValidator:
self.nc = None
self.iouv = None
self.jdict = None
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
self.speed = {"preprocess": 0.0, "inference": 0.0, "loss": 0.0, "postprocess": 0.0}
self.save_dir = save_dir or get_save_dir(self.args)
(self.save_dir / 'labels' if self.args.save_txt else self.save_dir).mkdir(parents=True, exist_ok=True)
(self.save_dir / "labels" if self.args.save_txt else self.save_dir).mkdir(parents=True, exist_ok=True)
if self.args.conf is None:
self.args.conf = 0.001 # default conf=0.001
self.args.imgsz = check_imgsz(self.args.imgsz, max_dim=1)
@ -110,7 +110,7 @@ class BaseValidator:
if self.training:
self.device = trainer.device
self.data = trainer.data
self.args.half = self.device.type != 'cpu' # force FP16 val during training
self.args.half = self.device.type != "cpu" # force FP16 val during training
model = trainer.ema.ema or trainer.model
model = model.half() if self.args.half else model.float()
# self.model = model
@ -119,11 +119,13 @@ class BaseValidator:
model.eval()
else:
callbacks.add_integration_callbacks(self)
model = AutoBackend(model or self.args.model,
device=select_device(self.args.device, self.args.batch),
dnn=self.args.dnn,
data=self.args.data,
fp16=self.args.half)
model = AutoBackend(
model or self.args.model,
device=select_device(self.args.device, self.args.batch),
dnn=self.args.dnn,
data=self.args.data,
fp16=self.args.half,
)
# self.model = model
self.device = model.device # update device
self.args.half = model.fp16 # update half
@ -133,16 +135,16 @@ class BaseValidator:
self.args.batch = model.batch_size
elif not pt and not jit:
self.args.batch = 1 # export.py models default to batch-size 1
LOGGER.info(f'Forcing batch=1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models')
LOGGER.info(f"Forcing batch=1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models")
if str(self.args.data).split('.')[-1] in ('yaml', 'yml'):
if str(self.args.data).split(".")[-1] in ("yaml", "yml"):
self.data = check_det_dataset(self.args.data)
elif self.args.task == 'classify':
elif self.args.task == "classify":
self.data = check_cls_dataset(self.args.data, split=self.args.split)
else:
raise FileNotFoundError(emojis(f"Dataset '{self.args.data}' for task={self.args.task} not found ❌"))
if self.device.type in ('cpu', 'mps'):
if self.device.type in ("cpu", "mps"):
self.args.workers = 0 # faster CPU val as time dominated by inference, not dataloading
if not pt:
self.args.rect = False
@ -152,13 +154,13 @@ class BaseValidator:
model.eval()
model.warmup(imgsz=(1 if pt else self.args.batch, 3, imgsz, imgsz)) # warmup
self.run_callbacks('on_val_start')
self.run_callbacks("on_val_start")
dt = Profile(), Profile(), Profile(), Profile()
bar = TQDM(self.dataloader, desc=self.get_desc(), total=len(self.dataloader))
self.init_metrics(de_parallel(model))
self.jdict = [] # empty before each val
for batch_i, batch in enumerate(bar):
self.run_callbacks('on_val_batch_start')
self.run_callbacks("on_val_batch_start")
self.batch_i = batch_i
# Preprocess
with dt[0]:
@ -166,7 +168,7 @@ class BaseValidator:
# Inference
with dt[1]:
preds = model(batch['img'], augment=augment)
preds = model(batch["img"], augment=augment)
# Loss
with dt[2]:
@ -182,23 +184,25 @@ class BaseValidator:
self.plot_val_samples(batch, batch_i)
self.plot_predictions(batch, preds, batch_i)
self.run_callbacks('on_val_batch_end')
self.run_callbacks("on_val_batch_end")
stats = self.get_stats()
self.check_stats(stats)
self.speed = dict(zip(self.speed.keys(), (x.t / len(self.dataloader.dataset) * 1E3 for x in dt)))
self.speed = dict(zip(self.speed.keys(), (x.t / len(self.dataloader.dataset) * 1e3 for x in dt)))
self.finalize_metrics()
self.print_results()
self.run_callbacks('on_val_end')
self.run_callbacks("on_val_end")
if self.training:
model.float()
results = {**stats, **trainer.label_loss_items(self.loss.cpu() / len(self.dataloader), prefix='val')}
results = {**stats, **trainer.label_loss_items(self.loss.cpu() / len(self.dataloader), prefix="val")}
return {k: round(float(v), 5) for k, v in results.items()} # return results as 5 decimal place floats
else:
LOGGER.info('Speed: %.1fms preprocess, %.1fms inference, %.1fms loss, %.1fms postprocess per image' %
tuple(self.speed.values()))
LOGGER.info(
"Speed: %.1fms preprocess, %.1fms inference, %.1fms loss, %.1fms postprocess per image"
% tuple(self.speed.values())
)
if self.args.save_json and self.jdict:
with open(str(self.save_dir / 'predictions.json'), 'w') as f:
LOGGER.info(f'Saving {f.name}...')
with open(str(self.save_dir / "predictions.json"), "w") as f:
LOGGER.info(f"Saving {f.name}...")
json.dump(self.jdict, f) # flatten and save
stats = self.eval_json(stats) # update stats
if self.args.plots or self.args.save_json:
@ -228,6 +232,7 @@ class BaseValidator:
if use_scipy:
# WARNING: known issue that reduces mAP in https://github.com/ultralytics/ultralytics/pull/4708
import scipy # scope import to avoid importing for all commands
cost_matrix = iou * (iou >= threshold)
if cost_matrix.any():
labels_idx, detections_idx = scipy.optimize.linear_sum_assignment(cost_matrix, maximize=True)
@ -257,11 +262,11 @@ class BaseValidator:
def get_dataloader(self, dataset_path, batch_size):
"""Get data loader from dataset path and batch size."""
raise NotImplementedError('get_dataloader function not implemented for this validator')
raise NotImplementedError("get_dataloader function not implemented for this validator")
def build_dataset(self, img_path):
"""Build dataset."""
raise NotImplementedError('build_dataset function not implemented in validator')
raise NotImplementedError("build_dataset function not implemented in validator")
def preprocess(self, batch):
"""Preprocesses an input batch."""
@ -306,7 +311,7 @@ class BaseValidator:
def on_plot(self, name, data=None):
"""Registers plots (e.g. to be consumed in callbacks)"""
self.plots[Path(name)] = {'data': data, 'timestamp': time.time()}
self.plots[Path(name)] = {"data": data, "timestamp": time.time()}
# TODO: may need to put these following functions into callback
def plot_val_samples(self, batch, ni):