Add docformatter to pre-commit (#5279)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Burhan <62214284+Burhan-Q@users.noreply.github.com>
This commit is contained in:
Glenn Jocher 2023-10-09 02:25:22 +02:00 committed by GitHub
parent c7aa83da31
commit 7517667a33
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
90 changed files with 1396 additions and 497 deletions

View file

@ -20,7 +20,11 @@ from ultralytics.utils.downloads import attempt_download_asset, is_url
def check_class_names(names):
"""Check class names. Map imagenet class codes to human-readable names if required. Convert lists to dicts."""
"""
Check class names.
Map imagenet class codes to human-readable names if required. Convert lists to dicts.
"""
if isinstance(names, list): # names is a list
names = dict(enumerate(names)) # convert to dict
if isinstance(names, dict):
@ -37,6 +41,32 @@ def check_class_names(names):
class AutoBackend(nn.Module):
"""
Handles dynamic backend selection for running inference using Ultralytics YOLO models.
The AutoBackend class is designed to provide an abstraction layer for various inference engines. It supports a wide
range of formats, each with specific naming conventions as outlined below:
Supported Formats and Naming Conventions:
| Format | File Suffix |
|-----------------------|------------------|
| PyTorch | *.pt |
| TorchScript | *.torchscript |
| ONNX Runtime | *.onnx |
| ONNX OpenCV DNN | *.onnx (dnn=True)|
| OpenVINO | *openvino_model/ |
| CoreML | *.mlpackage |
| TensorRT | *.engine |
| TensorFlow SavedModel | *_saved_model |
| TensorFlow GraphDef | *.pb |
| TensorFlow Lite | *.tflite |
| TensorFlow Edge TPU | *_edgetpu.tflite |
| PaddlePaddle | *_paddle_model |
| ncnn | *_ncnn_model |
This class offers dynamic backend switching capabilities based on the input model format, making it easier to deploy
models across various platforms.
"""
@torch.no_grad()
def __init__(self,
@ -48,33 +78,16 @@ class AutoBackend(nn.Module):
fuse=True,
verbose=True):
"""
MultiBackend class for python inference on various platforms using Ultralytics YOLO.
Initialize the AutoBackend for inference.
Args:
weights (str): The path to the weights file. Default: 'yolov8n.pt'
device (torch.device): The device to run the model on.
dnn (bool): Use OpenCV DNN module for inference if True, defaults to False.
data (str | Path | optional): Additional data.yaml file for class names.
fp16 (bool): If True, use half precision. Default: False
fuse (bool): Whether to fuse the model or not. Default: True
verbose (bool): Whether to run in verbose mode or not. Default: True
Supported formats and their naming conventions:
| Format | Suffix |
|-----------------------|------------------|
| PyTorch | *.pt |
| TorchScript | *.torchscript |
| ONNX Runtime | *.onnx |
| ONNX OpenCV DNN | *.onnx dnn=True |
| OpenVINO | *.xml |
| CoreML | *.mlpackage |
| TensorRT | *.engine |
| TensorFlow SavedModel | *_saved_model |
| TensorFlow GraphDef | *.pb |
| TensorFlow Lite | *.tflite |
| TensorFlow Edge TPU | *_edgetpu.tflite |
| PaddlePaddle | *_paddle_model |
| ncnn | *_ncnn_model |
weights (str): Path to the model weights file. Defaults to 'yolov8n.pt'.
device (torch.device): Device to run the model on. Defaults to CPU.
dnn (bool): Use OpenCV DNN module for ONNX inference. Defaults to False.
data (str | Path | optional): Path to the additional data.yaml file containing class names. Optional.
fp16 (bool): Enable half-precision inference. Supported only on specific backends. Defaults to False.
fuse (bool): Fuse Conv2D + BatchNorm layers for optimization. Defaults to True.
verbose (bool): Enable verbose logging. Defaults to True.
"""
super().__init__()
w = str(weights[0] if isinstance(weights, list) else weights)
@ -440,14 +453,14 @@ class AutoBackend(nn.Module):
def from_numpy(self, x):
"""
Convert a numpy array to a tensor.
Convert a numpy array to a tensor.
Args:
x (np.ndarray): The array to be converted.
Args:
x (np.ndarray): The array to be converted.
Returns:
(torch.Tensor): The converted tensor
"""
Returns:
(torch.Tensor): The converted tensor
"""
return torch.tensor(x).to(self.device) if isinstance(x, np.ndarray) else x
def warmup(self, imgsz=(1, 3, 640, 640)):
@ -476,7 +489,7 @@ class AutoBackend(nn.Module):
@staticmethod
def _model_type(p='path/to/model.pt'):
"""
This function takes a path to a model file and returns the model type
This function takes a path to a model file and returns the model type.
Args:
p: path to the model file. Defaults to path/to/model.pt

View file

@ -1,16 +1,20 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Ultralytics modules. Visualize with:
Ultralytics modules.
from ultralytics.nn.modules import *
import torch
import os
Example:
Visualize a module with Netron.
```python
from ultralytics.nn.modules import *
import torch
import os
x = torch.ones(1, 128, 40, 40)
m = Conv(128, 128)
f = f'{m._get_name()}.onnx'
torch.onnx.export(m, x, f)
os.system(f'onnxsim {f} {f} && open {f}')
x = torch.ones(1, 128, 40, 40)
m = Conv(128, 128)
f = f'{m._get_name()}.onnx'
torch.onnx.export(m, x, f)
os.system(f'onnxsim {f} {f} && open {f}')
```
"""
from .block import (C1, C2, C3, C3TR, DFL, SPP, SPPF, Bottleneck, BottleneckCSP, C2f, C3Ghost, C3x, GhostBottleneck,

View file

@ -1,7 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Block modules
"""
"""Block modules."""
import torch
import torch.nn as nn
@ -17,6 +15,7 @@ __all__ = ('DFL', 'HGBlock', 'HGStem', 'SPP', 'SPPF', 'C1', 'C2', 'C3', 'C2f', '
class DFL(nn.Module):
"""
Integral module of Distribution Focal Loss (DFL).
Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
"""
@ -51,11 +50,14 @@ class Proto(nn.Module):
class HGStem(nn.Module):
"""StemBlock of PPHGNetV2 with 5 convolutions and one maxpool2d.
"""
StemBlock of PPHGNetV2 with 5 convolutions and one maxpool2d.
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
"""
def __init__(self, c1, cm, c2):
"""Initialize the SPP layer with input/output channels and specified kernel sizes for max pooling."""
super().__init__()
self.stem1 = Conv(c1, cm, 3, 2, act=nn.ReLU())
self.stem2a = Conv(cm, cm // 2, 2, 1, 0, act=nn.ReLU())
@ -79,11 +81,14 @@ class HGStem(nn.Module):
class HGBlock(nn.Module):
"""HG_Block of PPHGNetV2 with 2 convolutions and LightConv.
"""
HG_Block of PPHGNetV2 with 2 convolutions and LightConv.
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
"""
def __init__(self, c1, cm, c2, k=3, n=6, lightconv=False, shortcut=False, act=nn.ReLU()):
"""Initializes a CSP Bottleneck with 1 convolution using specified input and output channels."""
super().__init__()
block = LightConv if lightconv else Conv
self.m = nn.ModuleList(block(c1 if i == 0 else cm, cm, k=k, act=act) for i in range(n))
@ -218,6 +223,7 @@ class RepC3(nn.Module):
"""Rep C3."""
def __init__(self, c1, c2, n=3, e=1.0):
"""Initialize CSP Bottleneck with a single convolution using input channels, output channels, and number."""
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c2, 1, 1)

View file

@ -1,7 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Convolution modules
"""
"""Convolution modules."""
import math
@ -69,7 +67,9 @@ class Conv2(Conv):
class LightConv(nn.Module):
"""Light convolution with args(ch_in, ch_out, kernel).
"""
Light convolution with args(ch_in, ch_out, kernel).
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
"""
@ -148,12 +148,15 @@ class GhostConv(nn.Module):
class RepConv(nn.Module):
"""
RepConv is a basic rep-style block, including training and deploy status. This module is used in RT-DETR.
RepConv is a basic rep-style block, including training and deploy status.
This module is used in RT-DETR.
Based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py
"""
default_act = nn.SiLU() # default activation
def __init__(self, c1, c2, k=3, s=1, p=1, g=1, d=1, act=True, bn=False, deploy=False):
"""Initializes Light Convolution layer with inputs, outputs & optional activation function."""
super().__init__()
assert k == 3 and p == 1
self.g = g
@ -166,27 +169,30 @@ class RepConv(nn.Module):
self.conv2 = Conv(c1, c2, 1, s, p=(p - k // 2), g=g, act=False)
def forward_fuse(self, x):
"""Forward process"""
"""Forward process."""
return self.act(self.conv(x))
def forward(self, x):
"""Forward process"""
"""Forward process."""
id_out = 0 if self.bn is None else self.bn(x)
return self.act(self.conv1(x) + self.conv2(x) + id_out)
def get_equivalent_kernel_bias(self):
"""Returns equivalent kernel and bias by adding 3x3 kernel, 1x1 kernel and identity kernel with their biases."""
kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1)
kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2)
kernelid, biasid = self._fuse_bn_tensor(self.bn)
return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid
def _pad_1x1_to_3x3_tensor(self, kernel1x1):
"""Pads a 1x1 tensor to a 3x3 tensor."""
if kernel1x1 is None:
return 0
else:
return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1])
def _fuse_bn_tensor(self, branch):
"""Generates appropriate kernels and biases for convolution by fusing branches of the neural network."""
if branch is None:
return 0, 0
if isinstance(branch, Conv):
@ -214,6 +220,7 @@ class RepConv(nn.Module):
return kernel * t, beta - running_mean * gamma / std
def fuse_convs(self):
"""Combines two convolution layers into a single layer and removes unused attributes from the class."""
if hasattr(self, 'conv'):
return
kernel, bias = self.get_equivalent_kernel_bias()
@ -243,12 +250,14 @@ class ChannelAttention(nn.Module):
"""Channel-attention module https://github.com/open-mmlab/mmdetection/tree/v3.0.0rc1/configs/rtmdet."""
def __init__(self, channels: int) -> None:
"""Initializes the class and sets the basic configurations and instance variables required."""
super().__init__()
self.pool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Conv2d(channels, channels, 1, 1, 0, bias=True)
self.act = nn.Sigmoid()
def forward(self, x: torch.Tensor) -> torch.Tensor:
"""Applies forward pass using activation on convolutions of the input, optionally using batch normalization."""
return x * self.act(self.fc(self.pool(x)))

View file

@ -1,7 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Model head modules
"""
"""Model head modules."""
import math
@ -229,6 +227,7 @@ class RTDETRDecoder(nn.Module):
self._reset_parameters()
def forward(self, x, batch=None):
"""Runs the forward pass of the module, returning bounding box and classification scores for the input."""
from ultralytics.models.utils.ops import get_cdn_group
# input projection and embedding
@ -265,6 +264,7 @@ class RTDETRDecoder(nn.Module):
return y if self.export else (y, x)
def _generate_anchors(self, shapes, grid_size=0.05, dtype=torch.float32, device='cpu', eps=1e-2):
"""Generates anchor bounding boxes for given shapes with specific grid size and validates them."""
anchors = []
for i, (h, w) in enumerate(shapes):
sy = torch.arange(end=h, dtype=dtype, device=device)
@ -284,6 +284,7 @@ class RTDETRDecoder(nn.Module):
return anchors, valid_mask
def _get_encoder_input(self, x):
"""Processes and returns encoder inputs by getting projection features from input and concatenating them."""
# get projection features
x = [self.input_proj[i](feat) for i, feat in enumerate(x)]
# get encoder inputs
@ -301,6 +302,7 @@ class RTDETRDecoder(nn.Module):
return feats, shapes
def _get_decoder_input(self, feats, shapes, dn_embed=None, dn_bbox=None):
"""Generates and prepares the input required for the decoder from the provided features and shapes."""
bs = len(feats)
# prepare input for decoder
anchors, valid_mask = self._generate_anchors(shapes, dtype=feats.dtype, device=feats.device)
@ -339,6 +341,7 @@ class RTDETRDecoder(nn.Module):
# TODO
def _reset_parameters(self):
"""Initializes or resets the parameters of the model's various components with predefined weights and biases."""
# class and bbox head init
bias_cls = bias_init_with_prob(0.01) / 80 * self.nc
# NOTE: the weight initialization in `linear_init_` would cause NaN when training with custom datasets.

View file

@ -1,7 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Transformer modules
"""
"""Transformer modules."""
import math
@ -18,9 +16,10 @@ __all__ = ('TransformerEncoderLayer', 'TransformerLayer', 'TransformerBlock', 'M
class TransformerEncoderLayer(nn.Module):
"""Transformer Encoder."""
"""Defines a single layer of the transformer encoder."""
def __init__(self, c1, cm=2048, num_heads=8, dropout=0.0, act=nn.GELU(), normalize_before=False):
"""Initialize the TransformerEncoderLayer with specified parameters."""
super().__init__()
from ...utils.torch_utils import TORCH_1_9
if not TORCH_1_9:
@ -41,10 +40,11 @@ class TransformerEncoderLayer(nn.Module):
self.normalize_before = normalize_before
def with_pos_embed(self, tensor, pos=None):
"""Add position embeddings if given."""
"""Add position embeddings to the tensor if provided."""
return tensor if pos is None else tensor + pos
def forward_post(self, src, src_mask=None, src_key_padding_mask=None, pos=None):
"""Performs forward pass with post-normalization."""
q = k = self.with_pos_embed(src, pos)
src2 = self.ma(q, k, value=src, attn_mask=src_mask, key_padding_mask=src_key_padding_mask)[0]
src = src + self.dropout1(src2)
@ -54,6 +54,7 @@ class TransformerEncoderLayer(nn.Module):
return self.norm2(src)
def forward_pre(self, src, src_mask=None, src_key_padding_mask=None, pos=None):
"""Performs forward pass with pre-normalization."""
src2 = self.norm1(src)
q = k = self.with_pos_embed(src2, pos)
src2 = self.ma(q, k, value=src2, attn_mask=src_mask, key_padding_mask=src_key_padding_mask)[0]
@ -70,11 +71,14 @@ class TransformerEncoderLayer(nn.Module):
class AIFI(TransformerEncoderLayer):
"""Defines the AIFI transformer layer."""
def __init__(self, c1, cm=2048, num_heads=8, dropout=0, act=nn.GELU(), normalize_before=False):
"""Initialize the AIFI instance with specified parameters."""
super().__init__(c1, cm, num_heads, dropout, act, normalize_before)
def forward(self, x):
"""Forward pass for the AIFI transformer layer."""
c, h, w = x.shape[1:]
pos_embed = self.build_2d_sincos_position_embedding(w, h, c)
# flatten [B, C, H, W] to [B, HxW, C]
@ -82,7 +86,8 @@ class AIFI(TransformerEncoderLayer):
return x.permute(0, 2, 1).view([-1, c, h, w]).contiguous()
@staticmethod
def build_2d_sincos_position_embedding(w, h, embed_dim=256, temperature=10000.):
def build_2d_sincos_position_embedding(w, h, embed_dim=256, temperature=10000.0):
"""Builds 2D sine-cosine position embedding."""
grid_w = torch.arange(int(w), dtype=torch.float32)
grid_h = torch.arange(int(h), dtype=torch.float32)
grid_w, grid_h = torch.meshgrid(grid_w, grid_h, indexing='ij')
@ -140,27 +145,32 @@ class TransformerBlock(nn.Module):
class MLPBlock(nn.Module):
"""Implements a single block of a multi-layer perceptron."""
def __init__(self, embedding_dim, mlp_dim, act=nn.GELU):
"""Initialize the MLPBlock with specified embedding dimension, MLP dimension, and activation function."""
super().__init__()
self.lin1 = nn.Linear(embedding_dim, mlp_dim)
self.lin2 = nn.Linear(mlp_dim, embedding_dim)
self.act = act()
def forward(self, x: torch.Tensor) -> torch.Tensor:
"""Forward pass for the MLPBlock."""
return self.lin2(self.act(self.lin1(x)))
class MLP(nn.Module):
""" Very simple multi-layer perceptron (also called FFN)"""
"""Implements a simple multi-layer perceptron (also called FFN)."""
def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
"""Initialize the MLP with specified input, hidden, output dimensions and number of layers."""
super().__init__()
self.num_layers = num_layers
h = [hidden_dim] * (num_layers - 1)
self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
def forward(self, x):
"""Forward pass for the entire MLP."""
for i, layer in enumerate(self.layers):
x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
return x
@ -168,17 +178,22 @@ class MLP(nn.Module):
class LayerNorm2d(nn.Module):
"""
LayerNorm2d module from https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py
2D Layer Normalization module inspired by Detectron2 and ConvNeXt implementations.
Original implementation at
https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py
https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119
"""
def __init__(self, num_channels, eps=1e-6):
"""Initialize LayerNorm2d with the given parameters."""
super().__init__()
self.weight = nn.Parameter(torch.ones(num_channels))
self.bias = nn.Parameter(torch.zeros(num_channels))
self.eps = eps
def forward(self, x):
"""Perform forward pass for 2D layer normalization."""
u = x.mean(1, keepdim=True)
s = (x - u).pow(2).mean(1, keepdim=True)
x = (x - u) / torch.sqrt(s + self.eps)
@ -187,11 +202,13 @@ class LayerNorm2d(nn.Module):
class MSDeformAttn(nn.Module):
"""
Original Multi-Scale Deformable Attention Module.
Multi-Scale Deformable Attention Module based on Deformable-DETR and PaddleDetection implementations.
https://github.com/fundamentalvision/Deformable-DETR/blob/main/models/ops/modules/ms_deform_attn.py
"""
def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4):
"""Initialize MSDeformAttn with the given parameters."""
super().__init__()
if d_model % n_heads != 0:
raise ValueError(f'd_model must be divisible by n_heads, but got {d_model} and {n_heads}')
@ -214,6 +231,7 @@ class MSDeformAttn(nn.Module):
self._reset_parameters()
def _reset_parameters(self):
"""Reset module parameters."""
constant_(self.sampling_offsets.weight.data, 0.)
thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2.0 * math.pi / self.n_heads)
grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
@ -232,7 +250,10 @@ class MSDeformAttn(nn.Module):
def forward(self, query, refer_bbox, value, value_shapes, value_mask=None):
"""
Perform forward pass for multi-scale deformable attention.
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
Args:
query (torch.Tensor): [bs, query_length, C]
refer_bbox (torch.Tensor): [bs, query_length, n_levels, 2], range in [0, 1], top-left (0,0),
@ -272,24 +293,27 @@ class MSDeformAttn(nn.Module):
class DeformableTransformerDecoderLayer(nn.Module):
"""
Deformable Transformer Decoder Layer inspired by PaddleDetection and Deformable-DETR implementations.
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
https://github.com/fundamentalvision/Deformable-DETR/blob/main/models/deformable_transformer.py
"""
def __init__(self, d_model=256, n_heads=8, d_ffn=1024, dropout=0., act=nn.ReLU(), n_levels=4, n_points=4):
"""Initialize the DeformableTransformerDecoderLayer with the given parameters."""
super().__init__()
# self attention
# Self attention
self.self_attn = nn.MultiheadAttention(d_model, n_heads, dropout=dropout)
self.dropout1 = nn.Dropout(dropout)
self.norm1 = nn.LayerNorm(d_model)
# cross attention
# Cross attention
self.cross_attn = MSDeformAttn(d_model, n_levels, n_heads, n_points)
self.dropout2 = nn.Dropout(dropout)
self.norm2 = nn.LayerNorm(d_model)
# ffn
# FFN
self.linear1 = nn.Linear(d_model, d_ffn)
self.act = act
self.dropout3 = nn.Dropout(dropout)
@ -299,37 +323,44 @@ class DeformableTransformerDecoderLayer(nn.Module):
@staticmethod
def with_pos_embed(tensor, pos):
"""Add positional embeddings to the input tensor, if provided."""
return tensor if pos is None else tensor + pos
def forward_ffn(self, tgt):
"""Perform forward pass through the Feed-Forward Network part of the layer."""
tgt2 = self.linear2(self.dropout3(self.act(self.linear1(tgt))))
tgt = tgt + self.dropout4(tgt2)
return self.norm3(tgt)
def forward(self, embed, refer_bbox, feats, shapes, padding_mask=None, attn_mask=None, query_pos=None):
# self attention
"""Perform the forward pass through the entire decoder layer."""
# Self attention
q = k = self.with_pos_embed(embed, query_pos)
tgt = self.self_attn(q.transpose(0, 1), k.transpose(0, 1), embed.transpose(0, 1),
attn_mask=attn_mask)[0].transpose(0, 1)
embed = embed + self.dropout1(tgt)
embed = self.norm1(embed)
# cross attention
# Cross attention
tgt = self.cross_attn(self.with_pos_embed(embed, query_pos), refer_bbox.unsqueeze(2), feats, shapes,
padding_mask)
embed = embed + self.dropout2(tgt)
embed = self.norm2(embed)
# ffn
# FFN
return self.forward_ffn(embed)
class DeformableTransformerDecoder(nn.Module):
"""
Implementation of Deformable Transformer Decoder based on PaddleDetection.
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
"""
def __init__(self, hidden_dim, decoder_layer, num_layers, eval_idx=-1):
"""Initialize the DeformableTransformerDecoder with the given parameters."""
super().__init__()
self.layers = _get_clones(decoder_layer, num_layers)
self.num_layers = num_layers
@ -347,6 +378,7 @@ class DeformableTransformerDecoder(nn.Module):
pos_mlp,
attn_mask=None,
padding_mask=None):
"""Perform the forward pass through the entire decoder."""
output = embed
dec_bboxes = []
dec_cls = []

View file

@ -1,7 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Module utils
"""
"""Module utils."""
import copy
import math
@ -16,15 +14,17 @@ __all__ = 'multi_scale_deformable_attn_pytorch', 'inverse_sigmoid'
def _get_clones(module, n):
"""Create a list of cloned modules from the given module."""
return nn.ModuleList([copy.deepcopy(module) for _ in range(n)])
def bias_init_with_prob(prior_prob=0.01):
"""initialize conv/fc bias value according to a given probability value."""
"""Initialize conv/fc bias value according to a given probability value."""
return float(-np.log((1 - prior_prob) / prior_prob)) # return bias_init
def linear_init_(module):
"""Initialize the weights and biases of a linear module."""
bound = 1 / math.sqrt(module.weight.shape[0])
uniform_(module.weight, -bound, bound)
if hasattr(module, 'bias') and module.bias is not None:
@ -32,6 +32,7 @@ def linear_init_(module):
def inverse_sigmoid(x, eps=1e-5):
"""Calculate the inverse sigmoid function for a tensor."""
x = x.clamp(min=0, max=1)
x1 = x.clamp(min=eps)
x2 = (1 - x).clamp(min=eps)
@ -43,6 +44,7 @@ def multi_scale_deformable_attn_pytorch(value: torch.Tensor, value_spatial_shape
attention_weights: torch.Tensor) -> torch.Tensor:
"""
Multi-scale deformable attention.
https://github.com/IDEA-Research/detrex/blob/main/detrex/layers/multi_scale_deform_attn.py
"""

View file

@ -25,14 +25,11 @@ except ImportError:
class BaseModel(nn.Module):
"""
The BaseModel class serves as a base class for all the models in the Ultralytics YOLO family.
"""
"""The BaseModel class serves as a base class for all the models in the Ultralytics YOLO family."""
def forward(self, x, *args, **kwargs):
"""
Forward pass of the model on a single scale.
Wrapper for `_forward_once` method.
Forward pass of the model on a single scale. Wrapper for `_forward_once` method.
Args:
x (torch.Tensor | dict): The input image tensor or a dict including image tensor and gt labels.
@ -93,8 +90,8 @@ class BaseModel(nn.Module):
def _profile_one_layer(self, m, x, dt):
"""
Profile the computation time and FLOPs of a single layer of the model on a given input.
Appends the results to the provided list.
Profile the computation time and FLOPs of a single layer of the model on a given input. Appends the results to
the provided list.
Args:
m (nn.Module): The layer to be profiled.
@ -158,7 +155,7 @@ class BaseModel(nn.Module):
def info(self, detailed=False, verbose=True, imgsz=640):
"""
Prints model information
Prints model information.
Args:
detailed (bool): if True, prints out detailed information about the model. Defaults to False
@ -175,7 +172,7 @@ class BaseModel(nn.Module):
fn (function): the function to apply to the model
Returns:
A model that is a Detect() object.
(BaseModel): An updated BaseModel object.
"""
self = super()._apply(fn)
m = self.model[-1] # Detect()
@ -202,7 +199,7 @@ class BaseModel(nn.Module):
def loss(self, batch, preds=None):
"""
Compute loss
Compute loss.
Args:
batch (dict): Batch to compute loss on
@ -215,6 +212,7 @@ class BaseModel(nn.Module):
return self.criterion(preds, batch)
def init_criterion(self):
"""Initialize the loss criterion for the BaseModel."""
raise NotImplementedError('compute_loss() needs to be implemented by task heads')
@ -222,6 +220,7 @@ class DetectionModel(BaseModel):
"""YOLOv8 detection model."""
def __init__(self, cfg='yolov8n.yaml', ch=3, nc=None, verbose=True): # model, input channels, number of classes
"""Initialize the YOLOv8 detection model with the given config and parameters."""
super().__init__()
self.yaml = cfg if isinstance(cfg, dict) else yaml_model_load(cfg) # cfg dict
@ -289,6 +288,7 @@ class DetectionModel(BaseModel):
return y
def init_criterion(self):
"""Initialize the loss criterion for the DetectionModel."""
return v8DetectionLoss(self)
@ -300,6 +300,7 @@ class SegmentationModel(DetectionModel):
super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
def init_criterion(self):
"""Initialize the loss criterion for the SegmentationModel."""
return v8SegmentationLoss(self)
@ -316,6 +317,7 @@ class PoseModel(DetectionModel):
super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
def init_criterion(self):
"""Initialize the loss criterion for the PoseModel."""
return v8PoseLoss(self)
@ -365,22 +367,59 @@ class ClassificationModel(BaseModel):
m[i] = nn.Conv2d(m[i].in_channels, nc, m[i].kernel_size, m[i].stride, bias=m[i].bias is not None)
def init_criterion(self):
"""Compute the classification loss between predictions and true labels."""
"""Initialize the loss criterion for the ClassificationModel."""
return v8ClassificationLoss()
class RTDETRDetectionModel(DetectionModel):
"""
RTDETR (Real-time DEtection and Tracking using Transformers) Detection Model class.
This class is responsible for constructing the RTDETR architecture, defining loss functions, and
facilitating both the training and inference processes. RTDETR is an object detection and tracking model
that extends from the DetectionModel base class.
Attributes:
cfg (str): The configuration file path or preset string. Default is 'rtdetr-l.yaml'.
ch (int): Number of input channels. Default is 3 (RGB).
nc (int, optional): Number of classes for object detection. Default is None.
verbose (bool): Specifies if summary statistics are shown during initialization. Default is True.
Methods:
init_criterion: Initializes the criterion used for loss calculation.
loss: Computes and returns the loss during training.
predict: Performs a forward pass through the network and returns the output.
"""
def __init__(self, cfg='rtdetr-l.yaml', ch=3, nc=None, verbose=True):
"""
Initialize the RTDETRDetectionModel.
Args:
cfg (str): Configuration file name or path.
ch (int): Number of input channels.
nc (int, optional): Number of classes. Defaults to None.
verbose (bool, optional): Print additional information during initialization. Defaults to True.
"""
super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
def init_criterion(self):
"""Compute the classification loss between predictions and true labels."""
"""Initialize the loss criterion for the RTDETRDetectionModel."""
from ultralytics.models.utils.loss import RTDETRDetectionLoss
return RTDETRDetectionLoss(nc=self.nc, use_vfl=True)
def loss(self, batch, preds=None):
"""
Compute the loss for the given batch of data.
Args:
batch (dict): Dictionary containing image and label data.
preds (torch.Tensor, optional): Precomputed model predictions. Defaults to None.
Returns:
tuple: A tuple containing the total loss and main three losses in a tensor.
"""
if not hasattr(self, 'criterion'):
self.criterion = self.init_criterion()
@ -417,16 +456,17 @@ class RTDETRDetectionModel(DetectionModel):
def predict(self, x, profile=False, visualize=False, batch=None, augment=False):
"""
Perform a forward pass through the network.
Perform a forward pass through the model.
Args:
x (torch.Tensor): The input tensor to the model
profile (bool): Print the computation time of each layer if True, defaults to False.
visualize (bool): Save the feature maps of the model if True, defaults to False
batch (dict): A dict including gt boxes and labels from dataloader.
x (torch.Tensor): The input tensor.
profile (bool, optional): If True, profile the computation time for each layer. Defaults to False.
visualize (bool, optional): If True, save feature maps for visualization. Defaults to False.
batch (dict, optional): Ground truth data for evaluation. Defaults to None.
augment (bool, optional): If True, perform data augmentation during inference. Defaults to False.
Returns:
(torch.Tensor): The last output of the model.
torch.Tensor: Model's output tensor.
"""
y, dt = [], [] # outputs
for m in self.model[:-1]: # except the head part
@ -708,9 +748,9 @@ def yaml_model_load(path):
def guess_model_scale(model_path):
"""
Takes a path to a YOLO model's YAML file as input and extracts the size character of the model's scale.
The function uses regular expression matching to find the pattern of the model scale in the YAML file name,
which is denoted by n, s, m, l, or x. The function returns the size character of the model scale as a string.
Takes a path to a YOLO model's YAML file as input and extracts the size character of the model's scale. The function
uses regular expression matching to find the pattern of the model scale in the YAML file name, which is denoted by
n, s, m, l, or x. The function returns the size character of the model scale as a string.
Args:
model_path (str | Path): The path to the YOLO model's YAML file.