From 5306a8cc1f01ed79c8409b102df67b3cd90c717d Mon Sep 17 00:00:00 2001
From: Mohammed Yasin <32206511+Y-T-G@users.noreply.github.com>
Date: Tue, 21 Jan 2025 18:41:05 +0800
Subject: [PATCH] Cleanup TorchVision related functions (#18790)

---
 ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml | 9 +--------
 ultralytics/nn/modules/block.py                    | 4 +---
 ultralytics/nn/modules/conv.py                     | 2 +-
 ultralytics/nn/tasks.py                            | 6 +++++-
 4 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml b/ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml
index baedcb5d..e2fbcfac 100644
--- a/ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml
+++ b/ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml
@@ -6,18 +6,11 @@
 
 # Parameters
 nc: 10 # number of classes
-scales: # model compound scaling constants, i.e. 'model=yolo11n-cls.yaml' will call yolo11-cls.yaml with scale 'n'
-  # [depth, width, max_channels]
-  n: [0.33, 0.25, 1024]
-  s: [0.33, 0.50, 1024]
-  m: [0.67, 0.75, 1024]
-  l: [1.00, 1.00, 1024]
-  x: [1.00, 1.25, 1024]
 
 # ResNet18 backbone
 backbone:
   # [from, repeats, module, args]
-  - [-1, 1, TorchVision, [512, "resnet18", "DEFAULT", True, 2]] # truncate two layers from the end
+  - [-1, 1, TorchVision, [512, resnet18, DEFAULT, True, 2]] # truncate two layers from the end
 
 # YOLO11n head
 head:
diff --git a/ultralytics/nn/modules/block.py b/ultralytics/nn/modules/block.py
index 1edb9c80..beb03c20 100644
--- a/ultralytics/nn/modules/block.py
+++ b/ultralytics/nn/modules/block.py
@@ -1120,8 +1120,6 @@ class TorchVision(nn.Module):
         m (nn.Module): The loaded torchvision model, possibly truncated and unwrapped.
 
     Args:
-        c1 (int): Input channels.
-        c2 (): Output channels.
         model (str): Name of the torchvision model to load.
         weights (str, optional): Pre-trained weights to load. Default is "DEFAULT".
         unwrap (bool, optional): If True, unwraps the model to a sequential containing all but the last `truncate` layers. Default is True.
@@ -1129,7 +1127,7 @@ class TorchVision(nn.Module):
         split (bool, optional): Returns output from intermediate child modules as list. Default is False.
     """
 
-    def __init__(self, c1, c2, model, weights="DEFAULT", unwrap=True, truncate=2, split=False):
+    def __init__(self, model, weights="DEFAULT", unwrap=True, truncate=2, split=False):
         """Load the model and weights from torchvision."""
         import torchvision  # scope for faster 'import ultralytics'
 
diff --git a/ultralytics/nn/modules/conv.py b/ultralytics/nn/modules/conv.py
index 51847078..6c15e1d6 100644
--- a/ultralytics/nn/modules/conv.py
+++ b/ultralytics/nn/modules/conv.py
@@ -336,7 +336,7 @@ class Concat(nn.Module):
 class Index(nn.Module):
     """Returns a particular index of the input."""
 
-    def __init__(self, c1, c2, index=0):
+    def __init__(self, index=0):
         """Returns a particular index of the input."""
         super().__init__()
         self.index = index
diff --git a/ultralytics/nn/tasks.py b/ultralytics/nn/tasks.py
index 91a0ec57..a754f5e7 100644
--- a/ultralytics/nn/tasks.py
+++ b/ultralytics/nn/tasks.py
@@ -1060,12 +1060,16 @@ def parse_model(d, ch, verbose=True):  # model_dict, input_channels(3)
                 m.legacy = legacy
         elif m is RTDETRDecoder:  # special case, channels arg must be passed in index 1
             args.insert(1, [ch[x] for x in f])
-        elif m in frozenset({CBLinear, TorchVision, Index}):
+        elif m is CBLinear:
             c2 = args[0]
             c1 = ch[f]
             args = [c1, c2, *args[1:]]
         elif m is CBFuse:
             c2 = ch[f[-1]]
+        elif m in frozenset({TorchVision, Index}):
+            c2 = args[0]
+            c1 = ch[f]
+            args = [*args[1:]]
         else:
             c2 = ch[f]