ultralytics 8.0.206 engine Trainer updates (#6111)
Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: jamjamjon <51357717+jamjamjon@users.noreply.github.com>
This commit is contained in:
parent
25bd3b9834
commit
f2f5ed2c5e
7 changed files with 42 additions and 34 deletions
|
|
@ -1,6 +1,6 @@
|
|||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
__version__ = '8.0.205'
|
||||
__version__ = '8.0.206'
|
||||
|
||||
from ultralytics.models import RTDETR, SAM, YOLO
|
||||
from ultralytics.models.fastsam import FastSAM
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ from ultralytics.cfg import TASK2DATA, get_cfg, get_save_dir
|
|||
from ultralytics.hub.utils import HUB_WEB_ROOT
|
||||
from ultralytics.nn.tasks import attempt_load_one_weight, guess_model_task, nn, yaml_model_load
|
||||
from ultralytics.utils import ASSETS, DEFAULT_CFG_DICT, LOGGER, RANK, callbacks, checks, emojis, yaml_load
|
||||
from ultralytics.utils.downloads import GITHUB_ASSETS_STEMS
|
||||
|
||||
|
||||
class Model(nn.Module):
|
||||
|
|
@ -88,10 +87,8 @@ class Model(nn.Module):
|
|||
return
|
||||
|
||||
# Load or create new YOLO model
|
||||
suffix = Path(model).suffix
|
||||
if not suffix and Path(model).stem in GITHUB_ASSETS_STEMS:
|
||||
model, suffix = Path(model).with_suffix('.pt'), '.pt' # add suffix, i.e. yolov8n -> yolov8n.pt
|
||||
if suffix in ('.yaml', '.yml'):
|
||||
model = checks.check_model_file_from_stem(model) # add suffix, i.e. yolov8n -> yolov8n.pt
|
||||
if Path(model).suffix in ('.yaml', '.yml'):
|
||||
self._new(model, task)
|
||||
else:
|
||||
self._load(model, task)
|
||||
|
|
|
|||
|
|
@ -19,8 +19,6 @@ import numpy as np
|
|||
import torch
|
||||
from torch import distributed as dist
|
||||
from torch import nn, optim
|
||||
from torch.cuda import amp
|
||||
from torch.nn.parallel import DistributedDataParallel as DDP
|
||||
|
||||
from ultralytics.cfg import get_cfg, get_save_dir
|
||||
from ultralytics.data.utils import check_cls_dataset, check_det_dataset
|
||||
|
|
@ -28,7 +26,7 @@ from ultralytics.nn.tasks import attempt_load_one_weight, attempt_load_weights
|
|||
from ultralytics.utils import (DEFAULT_CFG, LOGGER, RANK, TQDM, __version__, callbacks, clean_url, colorstr, emojis,
|
||||
yaml_save)
|
||||
from ultralytics.utils.autobatch import check_train_batch_size
|
||||
from ultralytics.utils.checks import check_amp, check_file, check_imgsz, print_args
|
||||
from ultralytics.utils.checks import check_amp, check_file, check_imgsz, check_model_file_from_stem, print_args
|
||||
from ultralytics.utils.dist import ddp_cleanup, generate_ddp_command
|
||||
from ultralytics.utils.files import get_latest_run
|
||||
from ultralytics.utils.torch_utils import (EarlyStopping, ModelEMA, de_parallel, init_seeds, one_cycle, select_device,
|
||||
|
|
@ -43,7 +41,6 @@ class BaseTrainer:
|
|||
|
||||
Attributes:
|
||||
args (SimpleNamespace): Configuration for the trainer.
|
||||
check_resume (method): Method to check if training should be resumed from a saved checkpoint.
|
||||
validator (BaseValidator): Validator instance.
|
||||
model (nn.Module): Model instance.
|
||||
callbacks (defaultdict): Dictionary of callbacks.
|
||||
|
|
@ -62,6 +59,7 @@ class BaseTrainer:
|
|||
trainset (torch.utils.data.Dataset): Training dataset.
|
||||
testset (torch.utils.data.Dataset): Testing dataset.
|
||||
ema (nn.Module): EMA (Exponential Moving Average) of the model.
|
||||
resume (bool): Resume training from a checkpoint.
|
||||
lf (nn.Module): Loss function.
|
||||
scheduler (torch.optim.lr_scheduler._LRScheduler): Learning rate scheduler.
|
||||
best_fitness (float): The best fitness value achieved.
|
||||
|
|
@ -84,7 +82,6 @@ class BaseTrainer:
|
|||
self.check_resume(overrides)
|
||||
self.device = select_device(self.args.device, self.args.batch)
|
||||
self.validator = None
|
||||
self.model = None
|
||||
self.metrics = None
|
||||
self.plots = {}
|
||||
init_seeds(self.args.seed + 1 + RANK, deterministic=self.args.deterministic)
|
||||
|
|
@ -111,7 +108,7 @@ class BaseTrainer:
|
|||
self.args.workers = 0 # faster CPU training as time dominated by inference, not dataloading
|
||||
|
||||
# Model and Dataset
|
||||
self.model = self.args.model
|
||||
self.model = check_model_file_from_stem(self.args.model) # add suffix, i.e. yolov8n -> yolov8n.pt
|
||||
try:
|
||||
if self.args.task == 'classify':
|
||||
self.data = check_cls_dataset(self.args.data)
|
||||
|
|
@ -124,6 +121,7 @@ class BaseTrainer:
|
|||
|
||||
self.trainset, self.testset = self.get_dataset(self.data)
|
||||
self.ema = None
|
||||
self.resume = False
|
||||
|
||||
# Optimization utils init
|
||||
self.lf = None
|
||||
|
|
@ -236,9 +234,9 @@ class BaseTrainer:
|
|||
if RANK > -1 and world_size > 1: # DDP
|
||||
dist.broadcast(self.amp, src=0) # broadcast the tensor from rank 0 to all other ranks (returns None)
|
||||
self.amp = bool(self.amp) # as boolean
|
||||
self.scaler = amp.GradScaler(enabled=self.amp)
|
||||
self.scaler = torch.cuda.amp.GradScaler(enabled=self.amp)
|
||||
if world_size > 1:
|
||||
self.model = DDP(self.model, device_ids=[RANK])
|
||||
self.model = nn.parallel.DistributedDataParallel(self.model, device_ids=[RANK])
|
||||
|
||||
# Check imgsz
|
||||
gs = max(int(self.model.stride.max() if hasattr(self.model, 'stride') else 32), 32) # grid size (max stride)
|
||||
|
|
@ -311,11 +309,7 @@ class BaseTrainer:
|
|||
pbar = enumerate(self.train_loader)
|
||||
# Update dataloader attributes (optional)
|
||||
if epoch == (self.epochs - self.args.close_mosaic):
|
||||
LOGGER.info('Closing dataloader mosaic')
|
||||
if hasattr(self.train_loader.dataset, 'mosaic'):
|
||||
self.train_loader.dataset.mosaic = False
|
||||
if hasattr(self.train_loader.dataset, 'close_mosaic'):
|
||||
self.train_loader.dataset.close_mosaic(hyp=self.args)
|
||||
self._close_dataloader_mosaic()
|
||||
self.train_loader.reset()
|
||||
|
||||
if RANK in (-1, 0):
|
||||
|
|
@ -395,7 +389,7 @@ class BaseTrainer:
|
|||
self.epoch_time = tnow - self.epoch_time_start
|
||||
self.epoch_time_start = tnow
|
||||
self.run_callbacks('on_fit_epoch_end')
|
||||
torch.cuda.empty_cache() # clears GPU vRAM at end of epoch, can help with out of memory errors
|
||||
torch.cuda.empty_cache() # clear GPU memory at end of epoch, may help reduce CUDA out of memory errors
|
||||
|
||||
# Early Stopping
|
||||
if RANK != -1: # if DDP training
|
||||
|
|
@ -613,11 +607,15 @@ class BaseTrainer:
|
|||
self.best_fitness = best_fitness
|
||||
self.start_epoch = start_epoch
|
||||
if start_epoch > (self.epochs - self.args.close_mosaic):
|
||||
self._close_dataloader_mosaic()
|
||||
|
||||
def _close_dataloader_mosaic(self):
|
||||
"""Update dataloaders to stop using mosaic augmentation."""
|
||||
if hasattr(self.train_loader.dataset, 'mosaic'):
|
||||
self.train_loader.dataset.mosaic = False
|
||||
if hasattr(self.train_loader.dataset, 'close_mosaic'):
|
||||
LOGGER.info('Closing dataloader mosaic')
|
||||
if hasattr(self.train_loader.dataset, 'mosaic'):
|
||||
self.train_loader.dataset.mosaic = False
|
||||
if hasattr(self.train_loader.dataset, 'close_mosaic'):
|
||||
self.train_loader.dataset.close_mosaic(hyp=self.args)
|
||||
self.train_loader.dataset.close_mosaic(hyp=self.args)
|
||||
|
||||
def build_optimizer(self, model, name='auto', lr=0.001, momentum=0.9, decay=1e-5, iterations=1e5):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -426,6 +426,14 @@ def check_yolov5u_filename(file: str, verbose: bool = True):
|
|||
return file
|
||||
|
||||
|
||||
def check_model_file_from_stem(model='yolov8n'):
|
||||
"""Return a model filename from a valid model stem."""
|
||||
if model and not Path(model).suffix and Path(model).stem in downloads.GITHUB_ASSETS_STEMS:
|
||||
return Path(model).with_suffix('.pt') # add suffix, i.e. yolov8n -> yolov8n.pt
|
||||
else:
|
||||
return model
|
||||
|
||||
|
||||
def check_file(file, suffix='', download=True, hard=True):
|
||||
"""Search/download file (if necessary) and return path."""
|
||||
check_suffix(file, suffix) # optional
|
||||
|
|
|
|||
|
|
@ -324,8 +324,8 @@ def scale_image(masks, im0_shape, ratio_pad=None):
|
|||
else:
|
||||
gain = ratio_pad[0][0]
|
||||
pad = ratio_pad[1]
|
||||
top, left = int(pad[1]), int(pad[0]) # y, x
|
||||
bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0])
|
||||
top, left = (int(round(pad[1] - 0.1)), int(round(pad[0] - 0.1))) # y, x
|
||||
bottom, right = (int(round(im1_shape[0] - pad[1] + 0.1)), int(round(im1_shape[1] - pad[0] + 0.1)))
|
||||
|
||||
if len(masks.shape) < 2:
|
||||
raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
|
||||
|
|
@ -704,8 +704,8 @@ def scale_masks(masks, shape, padding=True):
|
|||
if padding:
|
||||
pad[0] /= 2
|
||||
pad[1] /= 2
|
||||
top, left = (int(pad[1]), int(pad[0])) if padding else (0, 0) # y, x
|
||||
bottom, right = (int(mh - pad[1]), int(mw - pad[0]))
|
||||
top, left = (int(round(pad[1] - 0.1)), int(round(pad[0] - 0.1))) if padding else (0, 0) # y, x
|
||||
bottom, right = (int(round(mh - pad[1] + 0.1)), int(round(mw - pad[0] + 0.1)))
|
||||
masks = masks[..., top:bottom, left:right]
|
||||
|
||||
masks = F.interpolate(masks, shape, mode='bilinear', align_corners=False) # NCHW
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue