ultralytics 8.2.38 official YOLOv10 support (#13113)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: UltralyticsAssistant <web@ultralytics.com> Co-authored-by: Laughing-q <1185102784@qq.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: Laughing <61612323+Laughing-q@users.noreply.github.com>
2024-06-20 14:31:48 -04:00 · 2024-06-20 14:31:48 -04:00 · ffb46fd7fb
commit ffb46fd7fb
parent 821e5fa477
23 changed files with 785 additions and 32 deletions
--- a/ultralytics/nn/tasks.py
+++ b/ultralytics/nn/tasks.py
@ -15,6 +15,7 @@ from ultralytics.nn.modules import (
    C3TR,
    ELAN1,
    OBB,
+    PSA,
    SPP,
    SPPELAN,
    SPPF,
@ -24,6 +25,7 @@ from ultralytics.nn.modules import (
    BottleneckCSP,
    C2f,
    C2fAttn,
+    C2fCIB,
    C3Ghost,
    C3x,
    CBFuse,
@ -46,14 +48,24 @@ from ultralytics.nn.modules import (
    RepC3,
    RepConv,
    RepNCSPELAN4,
+    RepVGGDW,
    ResNetLayer,
    RTDETRDecoder,
+    SCDown,
    Segment,
    WorldDetect,
+    v10Detect,
 )
 from ultralytics.utils import DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, colorstr, emojis, yaml_load
 from ultralytics.utils.checks import check_requirements, check_suffix, check_yaml
-from ultralytics.utils.loss import v8ClassificationLoss, v8DetectionLoss, v8OBBLoss, v8PoseLoss, v8SegmentationLoss
+from ultralytics.utils.loss import (
+    E2EDetectLoss,
+    v8ClassificationLoss,
+    v8DetectionLoss,
+    v8OBBLoss,
+    v8PoseLoss,
+    v8SegmentationLoss,
+)
 from ultralytics.utils.plotting import feature_visualization
 from ultralytics.utils.torch_utils import (
    fuse_conv_and_bn,
@ -192,6 +204,9 @@ class BaseModel(nn.Module):
                if isinstance(m, RepConv):
                    m.fuse_convs()
                    m.forward = m.forward_fuse  # update forward
+                if isinstance(m, RepVGGDW):
+                    m.fuse()
+                    m.forward = m.forward_fuse
            self.info(verbose=verbose)

        return self
@ -294,6 +309,7 @@ class DetectionModel(BaseModel):
        self.model, self.save = parse_model(deepcopy(self.yaml), ch=ch, verbose=verbose)  # model, savelist
        self.names = {i: f"{i}" for i in range(self.yaml["nc"])}  # default names dict
        self.inplace = self.yaml.get("inplace", True)
+        self.end2end = getattr(self.model[-1], "end2end", False)

        # Build strides
        m = self.model[-1]  # Detect()
@ -303,6 +319,8 @@ class DetectionModel(BaseModel):

            def _forward(x):
                """Performs a forward pass through the model, handling different Detect subclass types accordingly."""
+                if self.end2end:
+                    return self.forward(x)["one2many"]
                return self.forward(x)[0] if isinstance(m, (Segment, Pose, OBB)) else self.forward(x)

            m.stride = torch.tensor([s / x.shape[-2] for x in _forward(torch.zeros(1, ch, s, s))])  # forward
@ -355,7 +373,7 @@ class DetectionModel(BaseModel):

    def init_criterion(self):
        """Initialize the loss criterion for the DetectionModel."""
-        return v8DetectionLoss(self)
+        return E2EDetectLoss(self) if self.end2end else v8DetectionLoss(self)


 class OBBModel(DetectionModel):
@ -689,8 +707,8 @@ def temporary_modules(modules={}, attributes={}):

    Example:
        ```python
-        with temporary_modules({'old.module.path': 'new.module.path'}, {'old.module.attribute': 'new.module.attribute'}):
-            import old.module.path  # this will now import new.module.path
+        with temporary_modules({'old.module': 'new.module'}, {'old.module.attribute': 'new.module.attribute'}):
+            import old.module  # this will now import new.module
            from old.module import attribute  # this will now import new.module.attribute
        ```

@ -700,23 +718,19 @@ def temporary_modules(modules={}, attributes={}):
        applications or libraries. Use this function with caution.
    """

-    import importlib
    import sys
+    from importlib import import_module

    try:
        # Set attributes in sys.modules under their old name
        for old, new in attributes.items():
            old_module, old_attr = old.rsplit(".", 1)
            new_module, new_attr = new.rsplit(".", 1)
-            setattr(
-                importlib.import_module(old_module),
-                old_attr,
-                getattr(importlib.import_module(new_module), new_attr),
-            )
+            setattr(import_module(old_module), old_attr, getattr(import_module(new_module), new_attr))

        # Set modules in sys.modules under their old name
        for old, new in modules.items():
-            sys.modules[old] = importlib.import_module(new)
+            sys.modules[old] = import_module(new)

        yield
    finally:
@ -750,9 +764,10 @@ def torch_safe_load(weight):
                "ultralytics.yolo.data": "ultralytics.data",
            },
            attributes={
-                "ultralytics.nn.modules.block.Silence": "torch.nn.Identity",
+                "ultralytics.nn.modules.block.Silence": "torch.nn.Identity",  # YOLOv9e
+                "ultralytics.nn.tasks.YOLOv10DetectionModel": "ultralytics.nn.tasks.DetectionModel",  # YOLOv10
            },
-        ):  # for legacy 8.0 Classify and Pose models
+        ):
            ckpt = torch.load(file, map_location="cpu")

    except ModuleNotFoundError as e:  # e.name is missing module name
@ -911,6 +926,9 @@ def parse_model(d, ch, verbose=True):  # model_dict, input_channels(3)
            DWConvTranspose2d,
            C3x,
            RepC3,
+            PSA,
+            SCDown,
+            C2fCIB,
        }:
            c1, c2 = ch[f], args[0]
            if c2 != nc:  # if c2 not equal to number of classes (i.e. for Classify() output)
@ -922,7 +940,7 @@ def parse_model(d, ch, verbose=True):  # model_dict, input_channels(3)
                )  # num heads

            args = [c1, c2, *args[1:]]
-            if m in {BottleneckCSP, C1, C2, C2f, C2fAttn, C3, C3TR, C3Ghost, C3x, RepC3}:
+            if m in {BottleneckCSP, C1, C2, C2f, C2fAttn, C3, C3TR, C3Ghost, C3x, RepC3, C2fCIB}:
                args.insert(2, n)  # number of repeats
                n = 1
        elif m is AIFI:
@ -939,7 +957,7 @@ def parse_model(d, ch, verbose=True):  # model_dict, input_channels(3)
            args = [ch[f]]
        elif m is Concat:
            c2 = sum(ch[x] for x in f)
-        elif m in {Detect, WorldDetect, Segment, Pose, OBB, ImagePoolingAttn}:
+        elif m in {Detect, WorldDetect, Segment, Pose, OBB, ImagePoolingAttn, v10Detect}:
            args.append([ch[x] for x in f])
            if m is Segment:
                args[2] = make_divisible(min(args[2], max_channels) * width, 8)
@ -1024,7 +1042,7 @@ def guess_model_task(model):
        m = cfg["head"][-1][-2].lower()  # output module name
        if m in {"classify", "classifier", "cls", "fc"}:
            return "classify"
-        if m == "detect":
+        if "detect" in m:
            return "detect"
        if m == "segment":
            return "segment"
@ -1056,7 +1074,7 @@ def guess_model_task(model):
                return "pose"
            elif isinstance(m, OBB):
                return "obb"
-            elif isinstance(m, (Detect, WorldDetect)):
+            elif isinstance(m, (Detect, WorldDetect, v10Detect)):
                return "detect"

    # Guess from model filename