ultralytics 8.0.239 Ultralytics Actions and hub-sdk adoption (#7431)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: UltralyticsAssistant <web@ultralytics.com> Co-authored-by: Burhan <62214284+Burhan-Q@users.noreply.github.com> Co-authored-by: Kayzwer <68285002+Kayzwer@users.noreply.github.com>
2024-01-10 03:16:08 +01:00 · 2024-01-10 03:16:08 +01:00 · fe27db2f6e
commit fe27db2f6e
parent e795277391
139 changed files with 6870 additions and 5125 deletions
--- a/ultralytics/data/augment.py
+++ b/ultralytics/data/augment.py
@ -117,11 +117,11 @@ class BaseMixTransform:
        if self.pre_transform is not None:
            for i, data in enumerate(mix_labels):
                mix_labels[i] = self.pre_transform(data)
-        labels['mix_labels'] = mix_labels
+        labels["mix_labels"] = mix_labels

        # Mosaic or MixUp
        labels = self._mix_transform(labels)
-        labels.pop('mix_labels', None)
+        labels.pop("mix_labels", None)
        return labels

    def _mix_transform(self, labels):
@ -149,8 +149,8 @@ class Mosaic(BaseMixTransform):

    def __init__(self, dataset, imgsz=640, p=1.0, n=4):
        """Initializes the object with a dataset, image size, probability, and border."""
-        assert 0 <= p <= 1.0, f'The probability should be in range [0, 1], but got {p}.'
-        assert n in (4, 9), 'grid must be equal to 4 or 9.'
+        assert 0 <= p <= 1.0, f"The probability should be in range [0, 1], but got {p}."
+        assert n in (4, 9), "grid must be equal to 4 or 9."
        super().__init__(dataset=dataset, p=p)
        self.dataset = dataset
        self.imgsz = imgsz
@ -166,20 +166,21 @@ class Mosaic(BaseMixTransform):

    def _mix_transform(self, labels):
        """Apply mixup transformation to the input image and labels."""
-        assert labels.get('rect_shape', None) is None, 'rect and mosaic are mutually exclusive.'
-        assert len(labels.get('mix_labels', [])), 'There are no other images for mosaic augment.'
-        return self._mosaic3(labels) if self.n == 3 else self._mosaic4(labels) if self.n == 4 else self._mosaic9(
-            labels)  # This code is modified for mosaic3 method.
+        assert labels.get("rect_shape", None) is None, "rect and mosaic are mutually exclusive."
+        assert len(labels.get("mix_labels", [])), "There are no other images for mosaic augment."
+        return (
+            self._mosaic3(labels) if self.n == 3 else self._mosaic4(labels) if self.n == 4 else self._mosaic9(labels)
+        )  # This code is modified for mosaic3 method.

    def _mosaic3(self, labels):
        """Create a 1x3 image mosaic."""
        mosaic_labels = []
        s = self.imgsz
        for i in range(3):
-            labels_patch = labels if i == 0 else labels['mix_labels'][i - 1]
+            labels_patch = labels if i == 0 else labels["mix_labels"][i - 1]
            # Load image
-            img = labels_patch['img']
-            h, w = labels_patch.pop('resized_shape')
+            img = labels_patch["img"]
+            h, w = labels_patch.pop("resized_shape")

            # Place img in img3
            if i == 0:  # center
@ -194,7 +195,7 @@ class Mosaic(BaseMixTransform):
            padw, padh = c[:2]
            x1, y1, x2, y2 = (max(x, 0) for x in c)  # allocate coords

-            img3[y1:y2, x1:x2] = img[y1 - padh:, x1 - padw:]  # img3[ymin:ymax, xmin:xmax]
+            img3[y1:y2, x1:x2] = img[y1 - padh :, x1 - padw :]  # img3[ymin:ymax, xmin:xmax]
            # hp, wp = h, w  # height, width previous for next iteration

            # Labels assuming imgsz*2 mosaic size
@ -202,7 +203,7 @@ class Mosaic(BaseMixTransform):
            mosaic_labels.append(labels_patch)
        final_labels = self._cat_labels(mosaic_labels)

-        final_labels['img'] = img3[-self.border[0]:self.border[0], -self.border[1]:self.border[1]]
+        final_labels["img"] = img3[-self.border[0] : self.border[0], -self.border[1] : self.border[1]]
        return final_labels

    def _mosaic4(self, labels):
@ -211,10 +212,10 @@ class Mosaic(BaseMixTransform):
        s = self.imgsz
        yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.border)  # mosaic center x, y
        for i in range(4):
-            labels_patch = labels if i == 0 else labels['mix_labels'][i - 1]
+            labels_patch = labels if i == 0 else labels["mix_labels"][i - 1]
            # Load image
-            img = labels_patch['img']
-            h, w = labels_patch.pop('resized_shape')
+            img = labels_patch["img"]
+            h, w = labels_patch.pop("resized_shape")

            # Place img in img4
            if i == 0:  # top left
@ -238,7 +239,7 @@ class Mosaic(BaseMixTransform):
            labels_patch = self._update_labels(labels_patch, padw, padh)
            mosaic_labels.append(labels_patch)
        final_labels = self._cat_labels(mosaic_labels)
-        final_labels['img'] = img4
+        final_labels["img"] = img4
        return final_labels

    def _mosaic9(self, labels):
@ -247,10 +248,10 @@ class Mosaic(BaseMixTransform):
        s = self.imgsz
        hp, wp = -1, -1  # height, width previous
        for i in range(9):
-            labels_patch = labels if i == 0 else labels['mix_labels'][i - 1]
+            labels_patch = labels if i == 0 else labels["mix_labels"][i - 1]
            # Load image
-            img = labels_patch['img']
-            h, w = labels_patch.pop('resized_shape')
+            img = labels_patch["img"]
+            h, w = labels_patch.pop("resized_shape")

            # Place img in img9
            if i == 0:  # center
@ -278,7 +279,7 @@ class Mosaic(BaseMixTransform):
            x1, y1, x2, y2 = (max(x, 0) for x in c)  # allocate coords

            # Image
-            img9[y1:y2, x1:x2] = img[y1 - padh:, x1 - padw:]  # img9[ymin:ymax, xmin:xmax]
+            img9[y1:y2, x1:x2] = img[y1 - padh :, x1 - padw :]  # img9[ymin:ymax, xmin:xmax]
            hp, wp = h, w  # height, width previous for next iteration

            # Labels assuming imgsz*2 mosaic size
@ -286,16 +287,16 @@ class Mosaic(BaseMixTransform):
            mosaic_labels.append(labels_patch)
        final_labels = self._cat_labels(mosaic_labels)

-        final_labels['img'] = img9[-self.border[0]:self.border[0], -self.border[1]:self.border[1]]
+        final_labels["img"] = img9[-self.border[0] : self.border[0], -self.border[1] : self.border[1]]
        return final_labels

    @staticmethod
    def _update_labels(labels, padw, padh):
        """Update labels."""
-        nh, nw = labels['img'].shape[:2]
-        labels['instances'].convert_bbox(format='xyxy')
-        labels['instances'].denormalize(nw, nh)
-        labels['instances'].add_padding(padw, padh)
+        nh, nw = labels["img"].shape[:2]
+        labels["instances"].convert_bbox(format="xyxy")
+        labels["instances"].denormalize(nw, nh)
+        labels["instances"].add_padding(padw, padh)
        return labels

    def _cat_labels(self, mosaic_labels):
@ -306,18 +307,20 @@ class Mosaic(BaseMixTransform):
        instances = []
        imgsz = self.imgsz * 2  # mosaic imgsz
        for labels in mosaic_labels:
-            cls.append(labels['cls'])
-            instances.append(labels['instances'])
+            cls.append(labels["cls"])
+            instances.append(labels["instances"])
+        # Final labels
        final_labels = {
-            'im_file': mosaic_labels[0]['im_file'],
-            'ori_shape': mosaic_labels[0]['ori_shape'],
-            'resized_shape': (imgsz, imgsz),
-            'cls': np.concatenate(cls, 0),
-            'instances': Instances.concatenate(instances, axis=0),
-            'mosaic_border': self.border}  # final_labels
-        final_labels['instances'].clip(imgsz, imgsz)
-        good = final_labels['instances'].remove_zero_area_boxes()
-        final_labels['cls'] = final_labels['cls'][good]
+            "im_file": mosaic_labels[0]["im_file"],
+            "ori_shape": mosaic_labels[0]["ori_shape"],
+            "resized_shape": (imgsz, imgsz),
+            "cls": np.concatenate(cls, 0),
+            "instances": Instances.concatenate(instances, axis=0),
+            "mosaic_border": self.border,
+        }
+        final_labels["instances"].clip(imgsz, imgsz)
+        good = final_labels["instances"].remove_zero_area_boxes()
+        final_labels["cls"] = final_labels["cls"][good]
        return final_labels


@ -335,10 +338,10 @@ class MixUp(BaseMixTransform):
    def _mix_transform(self, labels):
        """Applies MixUp augmentation as per https://arxiv.org/pdf/1710.09412.pdf."""
        r = np.random.beta(32.0, 32.0)  # mixup ratio, alpha=beta=32.0
-        labels2 = labels['mix_labels'][0]
-        labels['img'] = (labels['img'] * r + labels2['img'] * (1 - r)).astype(np.uint8)
-        labels['instances'] = Instances.concatenate([labels['instances'], labels2['instances']], axis=0)
-        labels['cls'] = np.concatenate([labels['cls'], labels2['cls']], 0)
+        labels2 = labels["mix_labels"][0]
+        labels["img"] = (labels["img"] * r + labels2["img"] * (1 - r)).astype(np.uint8)
+        labels["instances"] = Instances.concatenate([labels["instances"], labels2["instances"]], axis=0)
+        labels["cls"] = np.concatenate([labels["cls"], labels2["cls"]], 0)
        return labels


@ -366,14 +369,9 @@ class RandomPerspective:
        box_candidates(box1, box2): Filters out bounding boxes that don't meet certain criteria post-transformation.
    """

-    def __init__(self,
-                 degrees=0.0,
-                 translate=0.1,
-                 scale=0.5,
-                 shear=0.0,
-                 perspective=0.0,
-                 border=(0, 0),
-                 pre_transform=None):
+    def __init__(
+        self, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, border=(0, 0), pre_transform=None
+    ):
        """Initializes RandomPerspective object with transformation parameters."""

        self.degrees = degrees
@ -519,18 +517,18 @@ class RandomPerspective:
        Args:
            labels (dict): a dict of `bboxes`, `segments`, `keypoints`.
        """
-        if self.pre_transform and 'mosaic_border' not in labels:
+        if self.pre_transform and "mosaic_border" not in labels:
            labels = self.pre_transform(labels)
-        labels.pop('ratio_pad', None)  # do not need ratio pad
+        labels.pop("ratio_pad", None)  # do not need ratio pad

-        img = labels['img']
-        cls = labels['cls']
-        instances = labels.pop('instances')
+        img = labels["img"]
+        cls = labels["cls"]
+        instances = labels.pop("instances")
        # Make sure the coord formats are right
-        instances.convert_bbox(format='xyxy')
+        instances.convert_bbox(format="xyxy")
        instances.denormalize(*img.shape[:2][::-1])

-        border = labels.pop('mosaic_border', self.border)
+        border = labels.pop("mosaic_border", self.border)
        self.size = img.shape[1] + border[1] * 2, img.shape[0] + border[0] * 2  # w, h
        # M is affine matrix
        # Scale for func:`box_candidates`
@ -546,20 +544,20 @@ class RandomPerspective:

        if keypoints is not None:
            keypoints = self.apply_keypoints(keypoints, M)
-        new_instances = Instances(bboxes, segments, keypoints, bbox_format='xyxy', normalized=False)
+        new_instances = Instances(bboxes, segments, keypoints, bbox_format="xyxy", normalized=False)
        # Clip
        new_instances.clip(*self.size)

        # Filter instances
        instances.scale(scale_w=scale, scale_h=scale, bbox_only=True)
        # Make the bboxes have the same scale with new_bboxes
-        i = self.box_candidates(box1=instances.bboxes.T,
-                                box2=new_instances.bboxes.T,
-                                area_thr=0.01 if len(segments) else 0.10)
-        labels['instances'] = new_instances[i]
-        labels['cls'] = cls[i]
-        labels['img'] = img
-        labels['resized_shape'] = img.shape[:2]
+        i = self.box_candidates(
+            box1=instances.bboxes.T, box2=new_instances.bboxes.T, area_thr=0.01 if len(segments) else 0.10
+        )
+        labels["instances"] = new_instances[i]
+        labels["cls"] = cls[i]
+        labels["img"] = img
+        labels["resized_shape"] = img.shape[:2]
        return labels

    def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16):
@ -611,7 +609,7 @@ class RandomHSV:

        The modified image replaces the original image in the input 'labels' dict.
        """
-        img = labels['img']
+        img = labels["img"]
        if self.hgain or self.sgain or self.vgain:
            r = np.random.uniform(-1, 1, 3) * [self.hgain, self.sgain, self.vgain] + 1  # random gains
            hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
@ -634,7 +632,7 @@ class RandomFlip:
    Also updates any instances (bounding boxes, keypoints, etc.) accordingly.
    """

-    def __init__(self, p=0.5, direction='horizontal', flip_idx=None) -> None:
+    def __init__(self, p=0.5, direction="horizontal", flip_idx=None) -> None:
        """
        Initializes the RandomFlip class with probability and direction.

@ -644,7 +642,7 @@ class RandomFlip:
                Default is 'horizontal'.
            flip_idx (array-like, optional): Index mapping for flipping keypoints, if any.
        """
-        assert direction in ['horizontal', 'vertical'], f'Support direction `horizontal` or `vertical`, got {direction}'
+        assert direction in ["horizontal", "vertical"], f"Support direction `horizontal` or `vertical`, got {direction}"
        assert 0 <= p <= 1.0

        self.p = p
@ -662,25 +660,25 @@ class RandomFlip:
        Returns:
            (dict): The same dict with the flipped image and updated instances under the 'img' and 'instances' keys.
        """
-        img = labels['img']
-        instances = labels.pop('instances')
-        instances.convert_bbox(format='xywh')
+        img = labels["img"]
+        instances = labels.pop("instances")
+        instances.convert_bbox(format="xywh")
        h, w = img.shape[:2]
        h = 1 if instances.normalized else h
        w = 1 if instances.normalized else w

        # Flip up-down
-        if self.direction == 'vertical' and random.random() < self.p:
+        if self.direction == "vertical" and random.random() < self.p:
            img = np.flipud(img)
            instances.flipud(h)
-        if self.direction == 'horizontal' and random.random() < self.p:
+        if self.direction == "horizontal" and random.random() < self.p:
            img = np.fliplr(img)
            instances.fliplr(w)
            # For keypoints
            if self.flip_idx is not None and instances.keypoints is not None:
                instances.keypoints = np.ascontiguousarray(instances.keypoints[:, self.flip_idx, :])
-        labels['img'] = np.ascontiguousarray(img)
-        labels['instances'] = instances
+        labels["img"] = np.ascontiguousarray(img)
+        labels["instances"] = instances
        return labels


@ -700,9 +698,9 @@ class LetterBox:
        """Return updated labels and image with added border."""
        if labels is None:
            labels = {}
-        img = labels.get('img') if image is None else image
+        img = labels.get("img") if image is None else image
        shape = img.shape[:2]  # current shape [height, width]
-        new_shape = labels.pop('rect_shape', self.new_shape)
+        new_shape = labels.pop("rect_shape", self.new_shape)
        if isinstance(new_shape, int):
            new_shape = (new_shape, new_shape)

@ -730,25 +728,26 @@ class LetterBox:
            img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
        top, bottom = int(round(dh - 0.1)) if self.center else 0, int(round(dh + 0.1))
        left, right = int(round(dw - 0.1)) if self.center else 0, int(round(dw + 0.1))
-        img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT,
-                                 value=(114, 114, 114))  # add border
-        if labels.get('ratio_pad'):
-            labels['ratio_pad'] = (labels['ratio_pad'], (left, top))  # for evaluation
+        img = cv2.copyMakeBorder(
+            img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)
+        )  # add border
+        if labels.get("ratio_pad"):
+            labels["ratio_pad"] = (labels["ratio_pad"], (left, top))  # for evaluation

        if len(labels):
            labels = self._update_labels(labels, ratio, dw, dh)
-            labels['img'] = img
-            labels['resized_shape'] = new_shape
+            labels["img"] = img
+            labels["resized_shape"] = new_shape
            return labels
        else:
            return img

    def _update_labels(self, labels, ratio, padw, padh):
        """Update labels."""
-        labels['instances'].convert_bbox(format='xyxy')
-        labels['instances'].denormalize(*labels['img'].shape[:2][::-1])
-        labels['instances'].scale(*ratio)
-        labels['instances'].add_padding(padw, padh)
+        labels["instances"].convert_bbox(format="xyxy")
+        labels["instances"].denormalize(*labels["img"].shape[:2][::-1])
+        labels["instances"].scale(*ratio)
+        labels["instances"].add_padding(padw, padh)
        return labels


@ -785,11 +784,11 @@ class CopyPaste:
            1. Instances are expected to have 'segments' as one of their attributes for this augmentation to work.
            2. This method modifies the input dictionary 'labels' in place.
        """
-        im = labels['img']
-        cls = labels['cls']
+        im = labels["img"]
+        cls = labels["cls"]
        h, w = im.shape[:2]
-        instances = labels.pop('instances')
-        instances.convert_bbox(format='xyxy')
+        instances = labels.pop("instances")
+        instances.convert_bbox(format="xyxy")
        instances.denormalize(w, h)
        if self.p and len(instances.segments):
            n = len(instances)
@ -812,9 +811,9 @@ class CopyPaste:
            i = cv2.flip(im_new, 1).astype(bool)
            im[i] = result[i]

-        labels['img'] = im
-        labels['cls'] = cls
-        labels['instances'] = instances
+        labels["img"] = im
+        labels["cls"] = cls
+        labels["instances"] = instances
        return labels


@ -831,12 +830,13 @@ class Albumentations:
        """Initialize the transform object for YOLO bbox formatted params."""
        self.p = p
        self.transform = None
-        prefix = colorstr('albumentations: ')
+        prefix = colorstr("albumentations: ")
        try:
            import albumentations as A

-            check_version(A.__version__, '1.0.3', hard=True)  # version requirement
+            check_version(A.__version__, "1.0.3", hard=True)  # version requirement

+            # Transforms
            T = [
                A.Blur(p=0.01),
                A.MedianBlur(p=0.01),
@ -844,31 +844,32 @@ class Albumentations:
                A.CLAHE(p=0.01),
                A.RandomBrightnessContrast(p=0.0),
                A.RandomGamma(p=0.0),
-                A.ImageCompression(quality_lower=75, p=0.0)]  # transforms
-            self.transform = A.Compose(T, bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))
+                A.ImageCompression(quality_lower=75, p=0.0),
+            ]
+            self.transform = A.Compose(T, bbox_params=A.BboxParams(format="yolo", label_fields=["class_labels"]))

-            LOGGER.info(prefix + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p))
+            LOGGER.info(prefix + ", ".join(f"{x}".replace("always_apply=False, ", "") for x in T if x.p))
        except ImportError:  # package not installed, skip
            pass
        except Exception as e:
-            LOGGER.info(f'{prefix}{e}')
+            LOGGER.info(f"{prefix}{e}")

    def __call__(self, labels):
        """Generates object detections and returns a dictionary with detection results."""
-        im = labels['img']
-        cls = labels['cls']
+        im = labels["img"]
+        cls = labels["cls"]
        if len(cls):
-            labels['instances'].convert_bbox('xywh')
-            labels['instances'].normalize(*im.shape[:2][::-1])
-            bboxes = labels['instances'].bboxes
+            labels["instances"].convert_bbox("xywh")
+            labels["instances"].normalize(*im.shape[:2][::-1])
+            bboxes = labels["instances"].bboxes
            # TODO: add supports of segments and keypoints
            if self.transform and random.random() < self.p:
                new = self.transform(image=im, bboxes=bboxes, class_labels=cls)  # transformed
-                if len(new['class_labels']) > 0:  # skip update if no bbox in new im
-                    labels['img'] = new['image']
-                    labels['cls'] = np.array(new['class_labels'])
-                    bboxes = np.array(new['bboxes'], dtype=np.float32)
-            labels['instances'].update(bboxes=bboxes)
+                if len(new["class_labels"]) > 0:  # skip update if no bbox in new im
+                    labels["img"] = new["image"]
+                    labels["cls"] = np.array(new["class_labels"])
+                    bboxes = np.array(new["bboxes"], dtype=np.float32)
+            labels["instances"].update(bboxes=bboxes)
        return labels


@ -888,15 +889,17 @@ class Format:
        batch_idx (bool): Keep batch indexes. Default is True.
    """

-    def __init__(self,
-                 bbox_format='xywh',
-                 normalize=True,
-                 return_mask=False,
-                 return_keypoint=False,
-                 return_obb=False,
-                 mask_ratio=4,
-                 mask_overlap=True,
-                 batch_idx=True):
+    def __init__(
+        self,
+        bbox_format="xywh",
+        normalize=True,
+        return_mask=False,
+        return_keypoint=False,
+        return_obb=False,
+        mask_ratio=4,
+        mask_overlap=True,
+        batch_idx=True,
+    ):
        """Initializes the Format class with given parameters."""
        self.bbox_format = bbox_format
        self.normalize = normalize
@ -909,10 +912,10 @@ class Format:

    def __call__(self, labels):
        """Return formatted image, classes, bounding boxes & keypoints to be used by 'collate_fn'."""
-        img = labels.pop('img')
+        img = labels.pop("img")
        h, w = img.shape[:2]
-        cls = labels.pop('cls')
-        instances = labels.pop('instances')
+        cls = labels.pop("cls")
+        instances = labels.pop("instances")
        instances.convert_bbox(format=self.bbox_format)
        instances.denormalize(w, h)
        nl = len(instances)
@ -922,22 +925,24 @@ class Format:
                masks, instances, cls = self._format_segments(instances, cls, w, h)
                masks = torch.from_numpy(masks)
            else:
-                masks = torch.zeros(1 if self.mask_overlap else nl, img.shape[0] // self.mask_ratio,
-                                    img.shape[1] // self.mask_ratio)
-            labels['masks'] = masks
+                masks = torch.zeros(
+                    1 if self.mask_overlap else nl, img.shape[0] // self.mask_ratio, img.shape[1] // self.mask_ratio
+                )
+            labels["masks"] = masks
        if self.normalize:
            instances.normalize(w, h)
-        labels['img'] = self._format_img(img)
-        labels['cls'] = torch.from_numpy(cls) if nl else torch.zeros(nl)
-        labels['bboxes'] = torch.from_numpy(instances.bboxes) if nl else torch.zeros((nl, 4))
+        labels["img"] = self._format_img(img)
+        labels["cls"] = torch.from_numpy(cls) if nl else torch.zeros(nl)
+        labels["bboxes"] = torch.from_numpy(instances.bboxes) if nl else torch.zeros((nl, 4))
        if self.return_keypoint:
-            labels['keypoints'] = torch.from_numpy(instances.keypoints)
+            labels["keypoints"] = torch.from_numpy(instances.keypoints)
        if self.return_obb:
-            labels['bboxes'] = xyxyxyxy2xywhr(torch.from_numpy(instances.segments)) if len(
-                instances.segments) else torch.zeros((0, 5))
+            labels["bboxes"] = (
+                xyxyxyxy2xywhr(torch.from_numpy(instances.segments)) if len(instances.segments) else torch.zeros((0, 5))
+            )
        # Then we can use collate_fn
        if self.batch_idx:
-            labels['batch_idx'] = torch.zeros(nl)
+            labels["batch_idx"] = torch.zeros(nl)
        return labels

    def _format_img(self, img):
@ -964,33 +969,39 @@ class Format:

 def v8_transforms(dataset, imgsz, hyp, stretch=False):
    """Convert images to a size suitable for YOLOv8 training."""
-    pre_transform = Compose([
-        Mosaic(dataset, imgsz=imgsz, p=hyp.mosaic),
-        CopyPaste(p=hyp.copy_paste),
-        RandomPerspective(
-            degrees=hyp.degrees,
-            translate=hyp.translate,
-            scale=hyp.scale,
-            shear=hyp.shear,
-            perspective=hyp.perspective,
-            pre_transform=None if stretch else LetterBox(new_shape=(imgsz, imgsz)),
-        )])
-    flip_idx = dataset.data.get('flip_idx', [])  # for keypoints augmentation
+    pre_transform = Compose(
+        [
+            Mosaic(dataset, imgsz=imgsz, p=hyp.mosaic),
+            CopyPaste(p=hyp.copy_paste),
+            RandomPerspective(
+                degrees=hyp.degrees,
+                translate=hyp.translate,
+                scale=hyp.scale,
+                shear=hyp.shear,
+                perspective=hyp.perspective,
+                pre_transform=None if stretch else LetterBox(new_shape=(imgsz, imgsz)),
+            ),
+        ]
+    )
+    flip_idx = dataset.data.get("flip_idx", [])  # for keypoints augmentation
    if dataset.use_keypoints:
-        kpt_shape = dataset.data.get('kpt_shape', None)
+        kpt_shape = dataset.data.get("kpt_shape", None)
        if len(flip_idx) == 0 and hyp.fliplr > 0.0:
            hyp.fliplr = 0.0
            LOGGER.warning("WARNING ⚠️ No 'flip_idx' array defined in data.yaml, setting augmentation 'fliplr=0.0'")
        elif flip_idx and (len(flip_idx) != kpt_shape[0]):
-            raise ValueError(f'data.yaml flip_idx={flip_idx} length must be equal to kpt_shape[0]={kpt_shape[0]}')
+            raise ValueError(f"data.yaml flip_idx={flip_idx} length must be equal to kpt_shape[0]={kpt_shape[0]}")

-    return Compose([
-        pre_transform,
-        MixUp(dataset, pre_transform=pre_transform, p=hyp.mixup),
-        Albumentations(p=1.0),
-        RandomHSV(hgain=hyp.hsv_h, sgain=hyp.hsv_s, vgain=hyp.hsv_v),
-        RandomFlip(direction='vertical', p=hyp.flipud),
-        RandomFlip(direction='horizontal', p=hyp.fliplr, flip_idx=flip_idx)])  # transforms
+    return Compose(
+        [
+            pre_transform,
+            MixUp(dataset, pre_transform=pre_transform, p=hyp.mixup),
+            Albumentations(p=1.0),
+            RandomHSV(hgain=hyp.hsv_h, sgain=hyp.hsv_s, vgain=hyp.hsv_v),
+            RandomFlip(direction="vertical", p=hyp.flipud),
+            RandomFlip(direction="horizontal", p=hyp.fliplr, flip_idx=flip_idx),
+        ]
+    )  # transforms


 # Classification augmentations -----------------------------------------------------------------------------------------
@ -1031,10 +1042,13 @@ def classify_transforms(
        tfl = [T.Resize(scale_size)]
    tfl += [T.CenterCrop(size)]

-    tfl += [T.ToTensor(), T.Normalize(
-        mean=torch.tensor(mean),
-        std=torch.tensor(std),
-    )]
+    tfl += [
+        T.ToTensor(),
+        T.Normalize(
+            mean=torch.tensor(mean),
+            std=torch.tensor(std),
+        ),
+    ]

    return T.Compose(tfl)

@ -1053,7 +1067,7 @@ def classify_augmentations(
    hsv_s=0.4,  # image HSV-Saturation augmentation (fraction)
    hsv_v=0.4,  # image HSV-Value augmentation (fraction)
    force_color_jitter=False,
-    erasing=0.,
+    erasing=0.0,
    interpolation: T.InterpolationMode = T.InterpolationMode.BILINEAR,
 ):
    """
@ -1080,13 +1094,13 @@ def classify_augmentations(
    """
    # Transforms to apply if albumentations not installed
    if not isinstance(size, int):
-        raise TypeError(f'classify_transforms() size {size} must be integer, not (list, tuple)')
+        raise TypeError(f"classify_transforms() size {size} must be integer, not (list, tuple)")
    scale = tuple(scale or (0.08, 1.0))  # default imagenet scale range
-    ratio = tuple(ratio or (3. / 4., 4. / 3.))  # default imagenet ratio range
+    ratio = tuple(ratio or (3.0 / 4.0, 4.0 / 3.0))  # default imagenet ratio range
    primary_tfl = [T.RandomResizedCrop(size, scale=scale, ratio=ratio, interpolation=interpolation)]
-    if hflip > 0.:
+    if hflip > 0.0:
        primary_tfl += [T.RandomHorizontalFlip(p=hflip)]
-    if vflip > 0.:
+    if vflip > 0.0:
        primary_tfl += [T.RandomVerticalFlip(p=vflip)]

    secondary_tfl = []
@ -1097,27 +1111,29 @@ def classify_augmentations(
        # this allows override without breaking old hparm cfgs
        disable_color_jitter = not force_color_jitter

-        if auto_augment == 'randaugment':
+        if auto_augment == "randaugment":
            if TORCHVISION_0_11:
                secondary_tfl += [T.RandAugment(interpolation=interpolation)]
            else:
                LOGGER.warning('"auto_augment=randaugment" requires torchvision >= 0.11.0. Disabling it.')

-        elif auto_augment == 'augmix':
+        elif auto_augment == "augmix":
            if TORCHVISION_0_13:
                secondary_tfl += [T.AugMix(interpolation=interpolation)]
            else:
                LOGGER.warning('"auto_augment=augmix" requires torchvision >= 0.13.0. Disabling it.')

-        elif auto_augment == 'autoaugment':
+        elif auto_augment == "autoaugment":
            if TORCHVISION_0_10:
                secondary_tfl += [T.AutoAugment(interpolation=interpolation)]
            else:
                LOGGER.warning('"auto_augment=autoaugment" requires torchvision >= 0.10.0. Disabling it.')

        else:
-            raise ValueError(f'Invalid auto_augment policy: {auto_augment}. Should be one of "randaugment", '
-                             f'"augmix", "autoaugment" or None')
+            raise ValueError(
+                f'Invalid auto_augment policy: {auto_augment}. Should be one of "randaugment", '
+                f'"augmix", "autoaugment" or None'
+            )

    if not disable_color_jitter:
        secondary_tfl += [T.ColorJitter(brightness=hsv_v, contrast=hsv_v, saturation=hsv_s, hue=hsv_h)]
@ -1125,7 +1141,8 @@ def classify_augmentations(
    final_tfl = [
        T.ToTensor(),
        T.Normalize(mean=torch.tensor(mean), std=torch.tensor(std)),
-        T.RandomErasing(p=erasing, inplace=True)]
+        T.RandomErasing(p=erasing, inplace=True),
+    ]

    return T.Compose(primary_tfl + secondary_tfl + final_tfl)

@ -1177,7 +1194,7 @@ class ClassifyLetterBox:

        # Create padded image
        im_out = np.full((hs, ws, 3), 114, dtype=im.dtype)
-        im_out[top:top + h, left:left + w] = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
+        im_out[top : top + h, left : left + w] = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
        return im_out


@ -1205,7 +1222,7 @@ class CenterCrop:
        imh, imw = im.shape[:2]
        m = min(imh, imw)  # min dimension
        top, left = (imh - m) // 2, (imw - m) // 2
-        return cv2.resize(im[top:top + m, left:left + m], (self.w, self.h), interpolation=cv2.INTER_LINEAR)
+        return cv2.resize(im[top : top + m, left : left + m], (self.w, self.h), interpolation=cv2.INTER_LINEAR)


 # NOTE: keep this class for backward compatibility