From 5306a8cc1f01ed79c8409b102df67b3cd90c717d Mon Sep 17 00:00:00 2001 From: Mohammed Yasin <32206511+Y-T-G@users.noreply.github.com> Date: Tue, 21 Jan 2025 18:41:05 +0800 Subject: [PATCH] Cleanup TorchVision related functions (#18790) --- ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml | 9 +-------- ultralytics/nn/modules/block.py | 4 +--- ultralytics/nn/modules/conv.py | 2 +- ultralytics/nn/tasks.py | 6 +++++- 4 files changed, 8 insertions(+), 13 deletions(-) diff --git a/ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml b/ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml index baedcb5d..e2fbcfac 100644 --- a/ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml +++ b/ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml @@ -6,18 +6,11 @@ # Parameters nc: 10 # number of classes -scales: # model compound scaling constants, i.e. 'model=yolo11n-cls.yaml' will call yolo11-cls.yaml with scale 'n' - # [depth, width, max_channels] - n: [0.33, 0.25, 1024] - s: [0.33, 0.50, 1024] - m: [0.67, 0.75, 1024] - l: [1.00, 1.00, 1024] - x: [1.00, 1.25, 1024] # ResNet18 backbone backbone: # [from, repeats, module, args] - - [-1, 1, TorchVision, [512, "resnet18", "DEFAULT", True, 2]] # truncate two layers from the end + - [-1, 1, TorchVision, [512, resnet18, DEFAULT, True, 2]] # truncate two layers from the end # YOLO11n head head: diff --git a/ultralytics/nn/modules/block.py b/ultralytics/nn/modules/block.py index 1edb9c80..beb03c20 100644 --- a/ultralytics/nn/modules/block.py +++ b/ultralytics/nn/modules/block.py @@ -1120,8 +1120,6 @@ class TorchVision(nn.Module): m (nn.Module): The loaded torchvision model, possibly truncated and unwrapped. Args: - c1 (int): Input channels. - c2 (): Output channels. model (str): Name of the torchvision model to load. weights (str, optional): Pre-trained weights to load. Default is "DEFAULT". unwrap (bool, optional): If True, unwraps the model to a sequential containing all but the last `truncate` layers. Default is True. @@ -1129,7 +1127,7 @@ class TorchVision(nn.Module): split (bool, optional): Returns output from intermediate child modules as list. Default is False. """ - def __init__(self, c1, c2, model, weights="DEFAULT", unwrap=True, truncate=2, split=False): + def __init__(self, model, weights="DEFAULT", unwrap=True, truncate=2, split=False): """Load the model and weights from torchvision.""" import torchvision # scope for faster 'import ultralytics' diff --git a/ultralytics/nn/modules/conv.py b/ultralytics/nn/modules/conv.py index 51847078..6c15e1d6 100644 --- a/ultralytics/nn/modules/conv.py +++ b/ultralytics/nn/modules/conv.py @@ -336,7 +336,7 @@ class Concat(nn.Module): class Index(nn.Module): """Returns a particular index of the input.""" - def __init__(self, c1, c2, index=0): + def __init__(self, index=0): """Returns a particular index of the input.""" super().__init__() self.index = index diff --git a/ultralytics/nn/tasks.py b/ultralytics/nn/tasks.py index 91a0ec57..a754f5e7 100644 --- a/ultralytics/nn/tasks.py +++ b/ultralytics/nn/tasks.py @@ -1060,12 +1060,16 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3) m.legacy = legacy elif m is RTDETRDecoder: # special case, channels arg must be passed in index 1 args.insert(1, [ch[x] for x in f]) - elif m in frozenset({CBLinear, TorchVision, Index}): + elif m is CBLinear: c2 = args[0] c1 = ch[f] args = [c1, c2, *args[1:]] elif m is CBFuse: c2 = ch[f[-1]] + elif m in frozenset({TorchVision, Index}): + c2 = args[0] + c1 = ch[f] + args = [*args[1:]] else: c2 = ch[f]