diff --git a/docs/en/reference/nn/modules/block.md b/docs/en/reference/nn/modules/block.md
index da0ca655..05134721 100644
--- a/docs/en/reference/nn/modules/block.md
+++ b/docs/en/reference/nn/modules/block.md
@@ -189,4 +189,8 @@ keywords: Ultralytics, YOLO, neural networks, block modules, DFL, Proto, HGStem,
## ::: ultralytics.nn.modules.block.SCDown
+
+
+## ::: ultralytics.nn.modules.block.TorchVision
+
diff --git a/docs/en/reference/nn/modules/conv.md b/docs/en/reference/nn/modules/conv.md
index 6c68fa7f..e7a16b57 100644
--- a/docs/en/reference/nn/modules/conv.md
+++ b/docs/en/reference/nn/modules/conv.md
@@ -63,6 +63,10 @@ keywords: Ultralytics, convolution modules, Conv, LightConv, GhostConv, YOLO, de
+## ::: ultralytics.nn.modules.conv.Index
+
+
+
## ::: ultralytics.nn.modules.conv.autopad
diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py
index eef82497..ec8851ed 100644
--- a/ultralytics/__init__.py
+++ b/ultralytics/__init__.py
@@ -1,6 +1,6 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
-__version__ = "8.3.58"
+__version__ = "8.3.59"
import os
diff --git a/ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml b/ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml
new file mode 100644
index 00000000..4604ed94
--- /dev/null
+++ b/ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml
@@ -0,0 +1,21 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLO11-cls image classification model. For Usage examples see https://docs.ultralytics.com/tasks/classify
+
+# Parameters
+nc: 10 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolo11n-cls.yaml' will call yolo11-cls.yaml with scale 'n'
+ # [depth, width, max_channels]
+ n: [0.33, 0.25, 1024]
+ s: [0.33, 0.50, 1024]
+ m: [0.67, 0.75, 1024]
+ l: [1.00, 1.00, 1024]
+ x: [1.00, 1.25, 1024]
+
+# YOLO11n backbone
+backbone:
+ # [from, repeats, module, args]
+ - [-1, 1, TorchVision, [512, "resnet18", "DEFAULT", True, 2]] # truncate two layers from the end
+
+# YOLO11n head
+head:
+ - [-1, 1, Classify, [nc]] # Classify
diff --git a/ultralytics/nn/modules/__init__.py b/ultralytics/nn/modules/__init__.py
index a840c5a7..147a0a55 100644
--- a/ultralytics/nn/modules/__init__.py
+++ b/ultralytics/nn/modules/__init__.py
@@ -56,6 +56,7 @@ from .block import (
RepVGGDW,
ResNetLayer,
SCDown,
+ TorchVision,
)
from .conv import (
CBAM,
@@ -68,6 +69,7 @@ from .conv import (
DWConvTranspose2d,
Focus,
GhostConv,
+ Index,
LightConv,
RepConv,
SpatialAttention,
@@ -156,4 +158,6 @@ __all__ = (
"C2fCIB",
"Attention",
"PSA",
+ "TorchVision",
+ "Index",
)
diff --git a/ultralytics/nn/modules/block.py b/ultralytics/nn/modules/block.py
index ddb21a74..6ea6ef6e 100644
--- a/ultralytics/nn/modules/block.py
+++ b/ultralytics/nn/modules/block.py
@@ -49,6 +49,7 @@ __all__ = (
"Attention",
"PSA",
"SCDown",
+ "TorchVision",
)
@@ -1107,3 +1108,51 @@ class SCDown(nn.Module):
def forward(self, x):
"""Applies convolution and downsampling to the input tensor in the SCDown module."""
return self.cv2(self.cv1(x))
+
+
+class TorchVision(nn.Module):
+ """
+ TorchVision module to allow loading any torchvision model.
+
+ This class provides a way to load a model from the torchvision library, optionally load pre-trained weights, and customize the model by truncating or unwrapping layers.
+
+ Attributes:
+ m (nn.Module): The loaded torchvision model, possibly truncated and unwrapped.
+
+ Args:
+ c1 (int): Input channels.
+ c2 (): Output channels.
+ model (str): Name of the torchvision model to load.
+ weights (str, optional): Pre-trained weights to load. Default is "DEFAULT".
+ unwrap (bool, optional): If True, unwraps the model to a sequential containing all but the last `truncate` layers. Default is True.
+ truncate (int, optional): Number of layers to truncate from the end if `unwrap` is True. Default is 2.
+ split (bool, optional): Returns output from intermediate child modules as list. Default is False.
+ """
+
+ def __init__(self, c1, c2, model, weights="DEFAULT", unwrap=True, truncate=2, split=False):
+ """Load the model and weights from torchvision."""
+ import torchvision
+
+ super().__init__()
+ if hasattr(torchvision.models, "get_model"):
+ self.m = torchvision.models.get_model(model, weights=weights)
+ else:
+ self.m = torchvision.models.__dict__[model](pretrained=bool(weights))
+ if unwrap:
+ layers = list(self.m.children())[:-truncate]
+ if isinstance(layers[0], nn.Sequential): # Second-level for some models like EfficientNet, Swin
+ layers = [*list(layers[0].children()), *layers[1:]]
+ self.m = nn.Sequential(*layers)
+ self.split = split
+ else:
+ self.split = False
+ self.m.head = self.m.heads = nn.Identity()
+
+ def forward(self, x):
+ """Forward pass through the model."""
+ if self.split:
+ y = [x]
+ y.extend(m(y[-1]) for m in self.m)
+ else:
+ y = self.m(x)
+ return y
diff --git a/ultralytics/nn/modules/conv.py b/ultralytics/nn/modules/conv.py
index 94c5b6c6..96387ad8 100644
--- a/ultralytics/nn/modules/conv.py
+++ b/ultralytics/nn/modules/conv.py
@@ -21,6 +21,7 @@ __all__ = (
"CBAM",
"Concat",
"RepConv",
+ "Index",
)
@@ -330,3 +331,20 @@ class Concat(nn.Module):
def forward(self, x):
"""Forward pass for the YOLOv8 mask Proto module."""
return torch.cat(x, self.d)
+
+
+class Index(nn.Module):
+ """Returns a particular index of the input."""
+
+ def __init__(self, c1, c2, index=0):
+ """Returns a particular index of the input."""
+ super().__init__()
+ self.index = index
+
+ def forward(self, x):
+ """
+ Forward pass.
+
+ Expects a list of tensors as input.
+ """
+ return x[self.index]
diff --git a/ultralytics/nn/tasks.py b/ultralytics/nn/tasks.py
index c1a24c34..43047432 100644
--- a/ultralytics/nn/tasks.py
+++ b/ultralytics/nn/tasks.py
@@ -50,6 +50,7 @@ from ultralytics.nn.modules import (
HGBlock,
HGStem,
ImagePoolingAttn,
+ Index,
Pose,
RepC3,
RepConv,
@@ -59,6 +60,7 @@ from ultralytics.nn.modules import (
RTDETRDecoder,
SCDown,
Segment,
+ TorchVision,
WorldDetect,
v10Detect,
)
@@ -1052,7 +1054,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
m.legacy = legacy
elif m is RTDETRDecoder: # special case, channels arg must be passed in index 1
args.insert(1, [ch[x] for x in f])
- elif m is CBLinear:
+ elif m in {CBLinear, TorchVision, Index}:
c2 = args[0]
c1 = ch[f]
args = [c1, c2, *args[1:]]