From cc1e77138c2a124563b898d8e267a3045fa7bedf Mon Sep 17 00:00:00 2001 From: Mohammed Yasin <32206511+Y-T-G@users.noreply.github.com> Date: Thu, 9 Jan 2025 20:57:46 +0800 Subject: [PATCH] `ultralytics 8.3.59` Add ability to load any `torchvision` model as module (#18564) Signed-off-by: Glenn Jocher Co-authored-by: UltralyticsAssistant Co-authored-by: Glenn Jocher --- docs/en/reference/nn/modules/block.md | 4 ++ docs/en/reference/nn/modules/conv.md | 4 ++ ultralytics/__init__.py | 2 +- .../cfg/models/11/yolo11-cls-resnet18.yaml | 21 ++++++++ ultralytics/nn/modules/__init__.py | 4 ++ ultralytics/nn/modules/block.py | 49 +++++++++++++++++++ ultralytics/nn/modules/conv.py | 18 +++++++ ultralytics/nn/tasks.py | 4 +- 8 files changed, 104 insertions(+), 2 deletions(-) create mode 100644 ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml diff --git a/docs/en/reference/nn/modules/block.md b/docs/en/reference/nn/modules/block.md index da0ca655..05134721 100644 --- a/docs/en/reference/nn/modules/block.md +++ b/docs/en/reference/nn/modules/block.md @@ -189,4 +189,8 @@ keywords: Ultralytics, YOLO, neural networks, block modules, DFL, Proto, HGStem, ## ::: ultralytics.nn.modules.block.SCDown +



+ +## ::: ultralytics.nn.modules.block.TorchVision +

diff --git a/docs/en/reference/nn/modules/conv.md b/docs/en/reference/nn/modules/conv.md index 6c68fa7f..e7a16b57 100644 --- a/docs/en/reference/nn/modules/conv.md +++ b/docs/en/reference/nn/modules/conv.md @@ -63,6 +63,10 @@ keywords: Ultralytics, convolution modules, Conv, LightConv, GhostConv, YOLO, de



+## ::: ultralytics.nn.modules.conv.Index + +



+ ## ::: ultralytics.nn.modules.conv.autopad

diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py index eef82497..ec8851ed 100644 --- a/ultralytics/__init__.py +++ b/ultralytics/__init__.py @@ -1,6 +1,6 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -__version__ = "8.3.58" +__version__ = "8.3.59" import os diff --git a/ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml b/ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml new file mode 100644 index 00000000..4604ed94 --- /dev/null +++ b/ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml @@ -0,0 +1,21 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# YOLO11-cls image classification model. For Usage examples see https://docs.ultralytics.com/tasks/classify + +# Parameters +nc: 10 # number of classes +scales: # model compound scaling constants, i.e. 'model=yolo11n-cls.yaml' will call yolo11-cls.yaml with scale 'n' + # [depth, width, max_channels] + n: [0.33, 0.25, 1024] + s: [0.33, 0.50, 1024] + m: [0.67, 0.75, 1024] + l: [1.00, 1.00, 1024] + x: [1.00, 1.25, 1024] + +# YOLO11n backbone +backbone: + # [from, repeats, module, args] + - [-1, 1, TorchVision, [512, "resnet18", "DEFAULT", True, 2]] # truncate two layers from the end + +# YOLO11n head +head: + - [-1, 1, Classify, [nc]] # Classify diff --git a/ultralytics/nn/modules/__init__.py b/ultralytics/nn/modules/__init__.py index a840c5a7..147a0a55 100644 --- a/ultralytics/nn/modules/__init__.py +++ b/ultralytics/nn/modules/__init__.py @@ -56,6 +56,7 @@ from .block import ( RepVGGDW, ResNetLayer, SCDown, + TorchVision, ) from .conv import ( CBAM, @@ -68,6 +69,7 @@ from .conv import ( DWConvTranspose2d, Focus, GhostConv, + Index, LightConv, RepConv, SpatialAttention, @@ -156,4 +158,6 @@ __all__ = ( "C2fCIB", "Attention", "PSA", + "TorchVision", + "Index", ) diff --git a/ultralytics/nn/modules/block.py b/ultralytics/nn/modules/block.py index ddb21a74..6ea6ef6e 100644 --- a/ultralytics/nn/modules/block.py +++ b/ultralytics/nn/modules/block.py @@ -49,6 +49,7 @@ __all__ = ( "Attention", "PSA", "SCDown", + "TorchVision", ) @@ -1107,3 +1108,51 @@ class SCDown(nn.Module): def forward(self, x): """Applies convolution and downsampling to the input tensor in the SCDown module.""" return self.cv2(self.cv1(x)) + + +class TorchVision(nn.Module): + """ + TorchVision module to allow loading any torchvision model. + + This class provides a way to load a model from the torchvision library, optionally load pre-trained weights, and customize the model by truncating or unwrapping layers. + + Attributes: + m (nn.Module): The loaded torchvision model, possibly truncated and unwrapped. + + Args: + c1 (int): Input channels. + c2 (): Output channels. + model (str): Name of the torchvision model to load. + weights (str, optional): Pre-trained weights to load. Default is "DEFAULT". + unwrap (bool, optional): If True, unwraps the model to a sequential containing all but the last `truncate` layers. Default is True. + truncate (int, optional): Number of layers to truncate from the end if `unwrap` is True. Default is 2. + split (bool, optional): Returns output from intermediate child modules as list. Default is False. + """ + + def __init__(self, c1, c2, model, weights="DEFAULT", unwrap=True, truncate=2, split=False): + """Load the model and weights from torchvision.""" + import torchvision + + super().__init__() + if hasattr(torchvision.models, "get_model"): + self.m = torchvision.models.get_model(model, weights=weights) + else: + self.m = torchvision.models.__dict__[model](pretrained=bool(weights)) + if unwrap: + layers = list(self.m.children())[:-truncate] + if isinstance(layers[0], nn.Sequential): # Second-level for some models like EfficientNet, Swin + layers = [*list(layers[0].children()), *layers[1:]] + self.m = nn.Sequential(*layers) + self.split = split + else: + self.split = False + self.m.head = self.m.heads = nn.Identity() + + def forward(self, x): + """Forward pass through the model.""" + if self.split: + y = [x] + y.extend(m(y[-1]) for m in self.m) + else: + y = self.m(x) + return y diff --git a/ultralytics/nn/modules/conv.py b/ultralytics/nn/modules/conv.py index 94c5b6c6..96387ad8 100644 --- a/ultralytics/nn/modules/conv.py +++ b/ultralytics/nn/modules/conv.py @@ -21,6 +21,7 @@ __all__ = ( "CBAM", "Concat", "RepConv", + "Index", ) @@ -330,3 +331,20 @@ class Concat(nn.Module): def forward(self, x): """Forward pass for the YOLOv8 mask Proto module.""" return torch.cat(x, self.d) + + +class Index(nn.Module): + """Returns a particular index of the input.""" + + def __init__(self, c1, c2, index=0): + """Returns a particular index of the input.""" + super().__init__() + self.index = index + + def forward(self, x): + """ + Forward pass. + + Expects a list of tensors as input. + """ + return x[self.index] diff --git a/ultralytics/nn/tasks.py b/ultralytics/nn/tasks.py index c1a24c34..43047432 100644 --- a/ultralytics/nn/tasks.py +++ b/ultralytics/nn/tasks.py @@ -50,6 +50,7 @@ from ultralytics.nn.modules import ( HGBlock, HGStem, ImagePoolingAttn, + Index, Pose, RepC3, RepConv, @@ -59,6 +60,7 @@ from ultralytics.nn.modules import ( RTDETRDecoder, SCDown, Segment, + TorchVision, WorldDetect, v10Detect, ) @@ -1052,7 +1054,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3) m.legacy = legacy elif m is RTDETRDecoder: # special case, channels arg must be passed in index 1 args.insert(1, [ch[x] for x in f]) - elif m is CBLinear: + elif m in {CBLinear, TorchVision, Index}: c2 = args[0] c1 = ch[f] args = [c1, c2, *args[1:]]