ultralytics 8.0.167 Tuner updates and HUB Pose and Classify fixes (#4656)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2023-08-31 01:38:42 +02:00 · 2023-08-31 01:38:42 +02:00 · d2cf7acce0
commit d2cf7acce0
parent 8596ee241f
21 changed files with 174 additions and 144 deletions
--- a/ultralytics/engine/exporter.py
+++ b/ultralytics/engine/exporter.py
@ -438,18 +438,16 @@ class Exporter:
            Path(asset).unlink()  # delete zip
            pnnx.chmod(0o777)  # set read, write, and execute permissions for everyone

-        use_ncnn = True
        ncnn_args = [
            f'ncnnparam={f / "model.ncnn.param"}',
            f'ncnnbin={f / "model.ncnn.bin"}',
-            f'ncnnpy={f / "model_ncnn.py"}', ] if use_ncnn else []
+            f'ncnnpy={f / "model_ncnn.py"}', ]

-        use_pnnx = False
        pnnx_args = [
            f'pnnxparam={f / "model.pnnx.param"}',
            f'pnnxbin={f / "model.pnnx.bin"}',
            f'pnnxpy={f / "model_pnnx.py"}',
-            f'pnnxonnx={f / "model.pnnx.onnx"}', ] if use_pnnx else []
+            f'pnnxonnx={f / "model.pnnx.onnx"}', ]

        cmd = [
            str(pnnx),
@ -462,7 +460,10 @@ class Exporter:
        f.mkdir(exist_ok=True)  # make ncnn_model directory
        LOGGER.info(f"{prefix} running '{' '.join(cmd)}'")
        subprocess.run(cmd, check=True)
-        for f_debug in 'debug.bin', 'debug.param', 'debug2.bin', 'debug2.param':  # remove debug files
+
+        # Remove debug files
+        pnnx_files = [x.split('=')[-1] for x in pnnx_args]
+        for f_debug in ('debug.bin', 'debug.param', 'debug2.bin', 'debug2.param', *pnnx_files):
            Path(f_debug).unlink(missing_ok=True)

        yaml_save(f / 'metadata.yaml', self.metadata)  # add metadata.yaml
--- a/ultralytics/engine/model.py
+++ b/ultralytics/engine/model.py
@ -341,7 +341,8 @@ class Model:
        self.trainer.train()
        # Update model and cfg after training
        if RANK in (-1, 0):
-            self.model, _ = attempt_load_one_weight(str(self.trainer.best))
+            ckpt = self.trainer.best if self.trainer.best.exists() else self.trainer.last
+            self.model, _ = attempt_load_one_weight(ckpt)
            self.overrides = self.model.args
            self.metrics = getattr(self.trainer.validator, 'metrics', None)  # TODO: no metrics returned by DDP
        return self.metrics
@ -360,9 +361,9 @@ class Model:
        else:
            from .tuner import Tuner

-            custom = {}  # method defaults
+            custom = {'plots': False, 'save': False}  # method defaults
            args = {**self.overrides, **custom, **kwargs, 'mode': 'train'}  # highest priority args on the right
-            return Tuner(args=args, _callbacks=self.callbacks)(model=self.model, iterations=iterations)
+            return Tuner(args=args, _callbacks=self.callbacks)(model=self, iterations=iterations)

    def to(self, device):
        """
--- a/ultralytics/engine/trainer.py
+++ b/ultralytics/engine/trainer.py
@ -115,7 +115,7 @@ class BaseTrainer:
        try:
            if self.args.task == 'classify':
                self.data = check_cls_dataset(self.args.data)
-            elif self.args.data.split('.')[-1] in ('yaml', 'yml') or self.args.task in ('detect', 'segment'):
+            elif self.args.data.split('.')[-1] in ('yaml', 'yml') or self.args.task in ('detect', 'segment', 'pose'):
                self.data = check_det_dataset(self.args.data)
                if 'yaml_file' in self.data:
                    self.args.data = self.data['yaml_file']  # for validating 'yolo train data=url.zip' usage
@ -251,9 +251,8 @@ class BaseTrainer:
        self.args.imgsz = check_imgsz(self.args.imgsz, stride=gs, floor=gs, max_dim=1)

        # Batch size
-        if self.batch_size == -1:
-            if RANK == -1:  # single-GPU only, estimate best batch size
-                self.args.batch = self.batch_size = check_train_batch_size(self.model, self.args.imgsz, self.amp)
+        if self.batch_size == -1 and RANK == -1:  # single-GPU only, estimate best batch size
+            self.args.batch = self.batch_size = check_train_batch_size(self.model, self.args.imgsz, self.amp)

        # Dataloaders
        batch_size = self.batch_size // max(world_size, 1)
@ -262,7 +261,7 @@ class BaseTrainer:
            self.test_loader = self.get_dataloader(self.testset, batch_size=batch_size * 2, rank=-1, mode='val')
            self.validator = self.get_validator()
            metric_keys = self.validator.metrics.keys + self.label_loss_items(prefix='val')
-            self.metrics = dict(zip(metric_keys, [0] * len(metric_keys)))  # TODO: init metrics for plot_results()?
+            self.metrics = dict(zip(metric_keys, [0] * len(metric_keys)))
            self.ema = ModelEMA(self.model)
            if self.args.plots:
                self.plot_training_labels()
--- a/ultralytics/engine/tuner.py
+++ b/ultralytics/engine/tuner.py
@ -18,6 +18,7 @@ Example:
 """
 import random
 import time
+from copy import deepcopy

 import numpy as np

@ -51,7 +52,7 @@ class Tuner:
         from ultralytics import YOLO

         model = YOLO('yolov8n.pt')
-         model.tune(data='coco8.yaml', imgsz=640, epochs=100, iterations=10)
+         model.tune(data='coco8.yaml', imgsz=640, epochs=100, iterations=10, val=False, cache=True)
         ```
     """

@ -63,11 +64,11 @@ class Tuner:
            args (dict, optional): Configuration for hyperparameter evolution.
        """
        self.args = get_cfg(overrides=args)
-        self.space = {
+        self.space = {  # key: (min, max, gain(optionaL))
            # 'optimizer': tune.choice(['SGD', 'Adam', 'AdamW', 'NAdam', 'RAdam', 'RMSProp']),
            'lr0': (1e-5, 1e-1),
            'lrf': (0.01, 1.0),  # final OneCycleLR learning rate (lr0 * lrf)
-            'momentum': (0.6, 0.98),  # SGD momentum/Adam beta1
+            'momentum': (0.6, 0.98, 0.3),  # SGD momentum/Adam beta1
            'weight_decay': (0.0, 0.001),  # optimizer weight decay 5e-4
            'warmup_epochs': (0.0, 5.0),  # warmup epochs (fractions ok)
            'warmup_momentum': (0.0, 0.95),  # warmup initial momentum
@ -86,13 +87,13 @@ class Tuner:
            'mosaic': (0.0, 1.0),  # image mixup (probability)
            'mixup': (0.0, 1.0),  # image mixup (probability)
            'copy_paste': (0.0, 1.0)}  # segment copy-paste (probability)
-        self.tune_dir = get_save_dir(self.args, name='tune')
+        self.tune_dir = get_save_dir(self.args, name='_tune')
        self.evolve_csv = self.tune_dir / 'evolve.csv'
        self.callbacks = _callbacks or callbacks.get_default_callbacks()
        callbacks.add_integration_callbacks(self)
        LOGGER.info(f"Initialized Tuner instance with 'tune_dir={self.tune_dir}'.")

-    def _mutate(self, parent='single', n=5, mutation=0.8, sigma=0.2, return_best=False):
+    def _mutate(self, parent='single', n=5, mutation=0.8, sigma=0.2):
        """
        Mutates the hyperparameters based on bounds and scaling factors specified in `self.space`.

@ -111,10 +112,7 @@ class Tuner:
            fitness = x[:, 0]  # first column
            n = min(n, len(x))  # number of previous results to consider
            x = x[np.argsort(-fitness)][:n]  # top n mutations
-            if return_best:
-                return {k: float(x[0, i + 1]) for i, k in enumerate(self.space.keys())}
-            fitness = x[:, 0]  # first column
-            w = fitness - fitness.min() + 1E-6  # weights (sum > 0)
+            w = x[:, 0] - x[:, 0].min() + 1E-6  # weights (sum > 0)
            if parent == 'single' or len(x) == 1:
                # x = x[random.randint(0, n - 1)]  # random selection
                x = x[random.choices(range(n), weights=w)[0]]  # weighted selection
@ -124,7 +122,7 @@ class Tuner:
            # Mutate
            r = np.random  # method
            r.seed(int(time.time()))
-            g = np.array([self.space[k][0] for k in self.space.keys()])  # gains 0-1
+            g = np.array([v[2] if len(v) == 3 else 1.0 for k, v in self.space.items()])  # gains 0-1
            ng = len(self.space)
            v = np.ones(ng)
            while all(v == 1):  # mutate until a change occurs (prevent duplicates)
@ -152,7 +150,7 @@ class Tuner:
        4. Log the fitness score and mutated hyperparameters to a CSV file.

        Args:
-           model (YOLO): A pre-initialized YOLO model to be used for training.
+           model (Model): A pre-initialized YOLO model to be used for training.
           iterations (int): The number of generations to run the evolution for.

        Note:
@ -160,6 +158,7 @@ class Tuner:
           Ensure this path is set correctly in the Tuner instance.
        """

+        t0 = time.time()
        self.tune_dir.mkdir(parents=True, exist_ok=True)
        for i in range(iterations):
            # Mutate hyperparameters
@ -167,17 +166,27 @@ class Tuner:
            LOGGER.info(f'{prefix} Starting iteration {i + 1}/{iterations} with hyperparameters: {mutated_hyp}')

            # Initialize and train YOLOv8 model
-            model = YOLO('yolov8n.pt')
-            train_args = {**vars(self.args), **mutated_hyp}
-            results = model.train(**train_args)
+            try:
+                train_args = {**vars(self.args), **mutated_hyp}
+                fitness = (deepcopy(model) or YOLO(self.args.model)).train(**train_args).fitness  # results.fitness
+            except Exception as e:
+                LOGGER.warning(f'WARNING ❌️ training failure for hyperparameter tuning iteration {i}\n{e}')
+                fitness = 0.0

            # Save results and mutated_hyp to evolve_csv
+            log_row = [round(fitness, 5)] + [mutated_hyp[k] for k in self.space.keys()]
            headers = '' if self.evolve_csv.exists() else (','.join(['fitness_score'] + list(self.space.keys())) + '\n')
-            log_row = [results.fitness] + [mutated_hyp[k] for k in self.space.keys()]
            with open(self.evolve_csv, 'a') as f:
                f.write(headers + ','.join(map(str, log_row)) + '\n')

-        LOGGER.info(f'{prefix} All iterations complete. Results saved to {colorstr("bold", self.tune_dir)}')
-        best_hyp = self._mutate(return_best=True)  # best hyps
-        yaml_save(self.tune_dir / 'best.yaml', best_hyp)
+        # Print tuning results
+        x = np.loadtxt(self.evolve_csv, ndmin=2, delimiter=',', skiprows=1)
+        fitness = x[:, 0]  # first column
+        i = np.argsort(-fitness)[0]  # best fitness index
+        LOGGER.info(f'\n{prefix} All iterations complete ✅ ({time.time() - t0:.2f}s)\n'
+                    f'{prefix} Results saved to {colorstr("bold", self.tune_dir)}\n'
+                    f'{prefix} Best fitness={fitness[i]} observed at iteration {i}')
+
+        # Save turning results
+        yaml_save(self.tune_dir / 'best.yaml', data={k: float(x[0, i + 1]) for i, k in enumerate(self.space.keys())})
        yaml_print(self.tune_dir / 'best.yaml')
--- a/ultralytics/engine/validator.py
+++ b/ultralytics/engine/validator.py
@ -111,12 +111,12 @@ class BaseValidator:
        if self.training:
            self.device = trainer.device
            self.data = trainer.data
-            model = trainer.ema.ema or trainer.model
            self.args.half = self.device.type != 'cpu'  # force FP16 val during training
+            model = trainer.ema.ema or trainer.model
            model = model.half() if self.args.half else model.float()
-            self.model = model
+            # self.model = model
            self.loss = torch.zeros_like(trainer.loss_items, device=trainer.device)
-            self.args.plots = trainer.stopper.possible_stop or (trainer.epoch == trainer.epochs - 1)
+            self.args.plots &= trainer.stopper.possible_stop or (trainer.epoch == trainer.epochs - 1)
            model.eval()
        else:
            callbacks.add_integration_callbacks(self)
@ -126,7 +126,7 @@ class BaseValidator:
                                dnn=self.args.dnn,
                                data=self.args.data,
                                fp16=self.args.half)
-            self.model = model
+            # self.model = model
            self.device = model.device  # update device
            self.args.half = model.fp16  # update half
            stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
@ -297,8 +297,7 @@ class BaseValidator:

    def on_plot(self, name, data=None):
        """Registers plots (e.g. to be consumed in callbacks)"""
-        path = Path(name)
-        self.plots[path] = {'data': data, 'timestamp': time.time()}
+        self.plots[Path(name)] = {'data': data, 'timestamp': time.time()}

    # TODO: may need to put these following functions into callback
    def plot_val_samples(self, batch, ni):