diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index e9766c42..5d34e3f8 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -48,7 +48,8 @@ jobs: python-version: "3.x" - uses: astral-sh/setup-uv@v5 - name: Install Dependencies - run: uv pip install --system ruff black tqdm mkdocs-material "mkdocstrings[python]" mkdocs-redirects mkdocs-ultralytics-plugin mkdocs-macros-plugin + # Note "beautifulsoup4<=4.12.3" required due to errors errors with >=4.13 in https://github.com/ultralytics/ultralytics/pull/19067 + run: uv pip install --system "beautifulsoup4<=4.12.3" ruff black tqdm mkdocs-material "mkdocstrings[python]" mkdocs-redirects mkdocs-ultralytics-plugin mkdocs-macros-plugin - name: Ruff fixes continue-on-error: true run: ruff check --fix --unsafe-fixes --select D --ignore=D100,D104,D203,D205,D212,D213,D401,D406,D407,D413 . diff --git a/docs/build_docs.py b/docs/build_docs.py index 021989d2..68a333b8 100644 --- a/docs/build_docs.py +++ b/docs/build_docs.py @@ -113,7 +113,7 @@ def update_subdir_edit_links(subdir="", docs_url=""): if str(subdir[0]) == "/": subdir = str(subdir[0])[1:] html_files = (SITE / subdir).rglob("*.html") - for html_file in tqdm(html_files, desc="Processing subdir files"): + for html_file in tqdm(html_files, desc="Processing subdir files", mininterval=1.0): with html_file.open("r", encoding="utf-8") as file: soup = BeautifulSoup(file, "html.parser") @@ -178,7 +178,7 @@ def update_docs_html(): # Convert plaintext links to HTML hyperlinks files_modified = 0 - for html_file in tqdm(SITE.rglob("*.html"), desc="Converting plaintext links"): + for html_file in tqdm(SITE.rglob("*.html"), desc="Converting plaintext links", mininterval=1.0): with open(html_file, encoding="utf-8") as file: content = file.read() updated_content = convert_plaintext_links_to_html(content) @@ -294,7 +294,7 @@ def minify_files(html=True, css=True, js=True): }.items(): stats[ext] = {"original": 0, "minified": 0} directory = "" # "stylesheets" if ext == css else "javascript" if ext == "js" else "" - for f in tqdm((SITE / directory).rglob(f"*.{ext}"), desc=f"Minifying {ext.upper()}"): + for f in tqdm((SITE / directory).rglob(f"*.{ext}"), desc=f"Minifying {ext.upper()}", mininterval=1.0): content = f.read_text(encoding="utf-8") minified = minifier(content) if minifier else remove_comments_and_empty_lines(content, ext) stats[ext]["original"] += len(content) diff --git a/pyproject.toml b/pyproject.toml index 58e9c448..6ab128e0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -87,6 +87,7 @@ dev = [ "pytest-cov", "coverage[toml]", "mkdocs>=1.6.0", + "beautifulsoup4<=4.12.3", # For docs https://github.com/ultralytics/ultralytics/pull/19067 "mkdocs-material>=9.5.9", "mkdocstrings[python]", "mkdocs-redirects", # 301 redirects diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py index 0bf12781..01631ca8 100644 --- a/ultralytics/__init__.py +++ b/ultralytics/__init__.py @@ -1,6 +1,6 @@ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license -__version__ = "8.3.70" +__version__ = "8.3.71" import os diff --git a/ultralytics/cfg/models/v6/yolov6.yaml b/ultralytics/cfg/models/v6/yolov6.yaml index 0812ac7e..4a45224e 100644 --- a/ultralytics/cfg/models/v6/yolov6.yaml +++ b/ultralytics/cfg/models/v6/yolov6.yaml @@ -6,7 +6,7 @@ # Parameters nc: 80 # number of classes -activation: nn.ReLU() # (optional) model default activation function +activation: torch.nn.ReLU() # (optional) model default activation function scales: # model compound scaling constants, i.e. 'model=yolov6n.yaml' will call yolov8.yaml with scale 'n' # [depth, width, max_channels] n: [0.33, 0.25, 1024] diff --git a/ultralytics/engine/model.py b/ultralytics/engine/model.py index 7cf91b86..e4153837 100644 --- a/ultralytics/engine/model.py +++ b/ultralytics/engine/model.py @@ -11,7 +11,7 @@ from PIL import Image from ultralytics.cfg import TASK2DATA, get_cfg, get_save_dir from ultralytics.engine.results import Results from ultralytics.hub import HUB_WEB_ROOT, HUBTrainingSession -from ultralytics.nn.tasks import attempt_load_one_weight, guess_model_task, nn, yaml_model_load +from ultralytics.nn.tasks import attempt_load_one_weight, guess_model_task, yaml_model_load from ultralytics.utils import ( ARGV, ASSETS, @@ -26,7 +26,7 @@ from ultralytics.utils import ( ) -class Model(nn.Module): +class Model(torch.nn.Module): """ A base class for implementing YOLO models, unifying APIs across different model types. @@ -37,7 +37,7 @@ class Model(nn.Module): Attributes: callbacks (Dict): A dictionary of callback functions for various events during model operations. predictor (BasePredictor): The predictor object used for making predictions. - model (nn.Module): The underlying PyTorch model. + model (torch.nn.Module): The underlying PyTorch model. trainer (BaseTrainer): The trainer object used for training the model. ckpt (Dict): The checkpoint data if the model is loaded from a *.pt file. cfg (str): The configuration of the model if loaded from a *.yaml file. @@ -317,7 +317,7 @@ class Model(nn.Module): >>> model._check_is_pytorch_model() # Raises TypeError """ pt_str = isinstance(self.model, (str, Path)) and Path(self.model).suffix == ".pt" - pt_module = isinstance(self.model, nn.Module) + pt_module = isinstance(self.model, torch.nn.Module) if not (pt_module or pt_str): raise TypeError( f"model='{self.model}' should be a *.pt PyTorch model to run this method, but is a different format. " @@ -405,7 +405,7 @@ class Model(nn.Module): from ultralytics import __version__ updates = { - "model": deepcopy(self.model).half() if isinstance(self.model, nn.Module) else self.model, + "model": deepcopy(self.model).half() if isinstance(self.model, torch.nn.Module) else self.model, "date": datetime.now().isoformat(), "version": __version__, "license": "AGPL-3.0 License (https://ultralytics.com/license)", @@ -452,7 +452,7 @@ class Model(nn.Module): performs both convolution and normalization in one step. Raises: - TypeError: If the model is not a PyTorch nn.Module. + TypeError: If the model is not a PyTorch torch.nn.Module. Examples: >>> model = Model("yolo11n.pt") @@ -921,13 +921,13 @@ class Model(nn.Module): Retrieves the device on which the model's parameters are allocated. This property determines the device (CPU or GPU) where the model's parameters are currently stored. It is - applicable only to models that are instances of nn.Module. + applicable only to models that are instances of torch.nn.Module. Returns: (torch.device): The device (CPU/GPU) of the model. Raises: - AttributeError: If the model is not a PyTorch nn.Module instance. + AttributeError: If the model is not a torch.nn.Module instance. Examples: >>> model = YOLO("yolo11n.pt") @@ -937,7 +937,7 @@ class Model(nn.Module): >>> print(model.device) device(type='cpu') """ - return next(self.model.parameters()).device if isinstance(self.model, nn.Module) else None + return next(self.model.parameters()).device if isinstance(self.model, torch.nn.Module) else None @property def transforms(self): diff --git a/ultralytics/models/sam/modules/sam.py b/ultralytics/models/sam/modules/sam.py index 420a4c3b..8f5c5b77 100644 --- a/ultralytics/models/sam/modules/sam.py +++ b/ultralytics/models/sam/modules/sam.py @@ -426,8 +426,7 @@ class SAM2Model(torch.nn.Module): high_res_masks: Tensor of shape (B, 1, H*16, W*16) with the best high-resolution mask. obj_ptr: Tensor of shape (B, C) with object pointer vector for the output mask. object_score_logits: Tensor of shape (B) with object score logits. - - Where M is 3 if multimask_output=True, and 1 if multimask_output=False. + Where M is 3 if multimask_output=True, and 1 if multimask_output=False. Examples: >>> backbone_features = torch.rand(1, 256, 32, 32) diff --git a/ultralytics/models/yolo/pose/val.py b/ultralytics/models/yolo/pose/val.py index 9fc872f9..1a32350c 100644 --- a/ultralytics/models/yolo/pose/val.py +++ b/ultralytics/models/yolo/pose/val.py @@ -158,7 +158,7 @@ class PoseValidator(DetectionValidator): gt_kpts (torch.Tensor | None): Optional tensor with shape (N, 51) representing ground truth keypoints. Returns: - torch.Tensor: A tensor with shape (N, 10) representing the correct prediction matrix for 10 IoU levels, + (torch.Tensor): A tensor with shape (N, 10) representing the correct prediction matrix for 10 IoU levels, where N is the number of detections. Example: diff --git a/ultralytics/nn/autobackend.py b/ultralytics/nn/autobackend.py index 0a79b91a..ae4ed065 100644 --- a/ultralytics/nn/autobackend.py +++ b/ultralytics/nn/autobackend.py @@ -780,7 +780,7 @@ class AutoBackend(nn.Module): saved_model, pb, tflite, edgetpu, tfjs, ncnn or paddle. Args: - p: path to the model file. Defaults to path/to/model.pt + p (str): path to the model file. Defaults to path/to/model.pt Examples: >>> model = AutoBackend(weights="path/to/model.onnx") diff --git a/ultralytics/nn/tasks.py b/ultralytics/nn/tasks.py index a754f5e7..31f7349f 100644 --- a/ultralytics/nn/tasks.py +++ b/ultralytics/nn/tasks.py @@ -9,7 +9,6 @@ from pathlib import Path import thop import torch -import torch.nn as nn from ultralytics.nn.modules import ( AIFI, @@ -88,7 +87,7 @@ from ultralytics.utils.torch_utils import ( ) -class BaseModel(nn.Module): +class BaseModel(torch.nn.Module): """The BaseModel class serves as a base class for all the models in the Ultralytics YOLO family.""" def forward(self, x, *args, **kwargs): @@ -151,7 +150,7 @@ class BaseModel(nn.Module): if visualize: feature_visualization(x, m.type, m.i, save_dir=visualize) if embed and m.i in embed: - embeddings.append(nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1)) # flatten + embeddings.append(torch.nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1)) # flatten if m.i == max(embed): return torch.unbind(torch.cat(embeddings, 1), dim=0) return x @@ -170,12 +169,9 @@ class BaseModel(nn.Module): the provided list. Args: - m (nn.Module): The layer to be profiled. + m (torch.nn.Module): The layer to be profiled. x (torch.Tensor): The input data to the layer. dt (list): A list to store the computation time of the layer. - - Returns: - None """ c = m == self.model[-1] and isinstance(x, list) # is final layer list, copy input as inplace fix flops = thop.profile(m, inputs=[x.copy() if c else x], verbose=False)[0] / 1e9 * 2 if thop else 0 # GFLOPs @@ -195,7 +191,7 @@ class BaseModel(nn.Module): computation efficiency. Returns: - (nn.Module): The fused model is returned. + (torch.nn.Module): The fused model is returned. """ if not self.is_fused(): for m in self.model.modules(): @@ -229,7 +225,7 @@ class BaseModel(nn.Module): Returns: (bool): True if the number of BatchNorm layers in the model is less than the threshold, False otherwise. """ - bn = tuple(v for k, v in nn.__dict__.items() if "Norm" in k) # normalization layers, i.e. BatchNorm2d() + bn = tuple(v for k, v in torch.nn.__dict__.items() if "Norm" in k) # normalization layers, i.e. BatchNorm2d() return sum(isinstance(v, bn) for v in self.modules()) < thresh # True if < 'thresh' BatchNorm layers in model def info(self, detailed=False, verbose=True, imgsz=640): @@ -304,7 +300,7 @@ class DetectionModel(BaseModel): self.yaml = cfg if isinstance(cfg, dict) else yaml_model_load(cfg) # cfg dict if self.yaml["backbone"][0][2] == "Silence": LOGGER.warning( - "WARNING ⚠️ YOLOv9 `Silence` module is deprecated in favor of nn.Identity. " + "WARNING ⚠️ YOLOv9 `Silence` module is deprecated in favor of torch.nn.Identity. " "Please delete local *.pt file and re-download the latest model checkpoint." ) self.yaml["backbone"][0][2] = "nn.Identity" @@ -458,20 +454,22 @@ class ClassificationModel(BaseModel): name, m = list((model.model if hasattr(model, "model") else model).named_children())[-1] # last module if isinstance(m, Classify): # YOLO Classify() head if m.linear.out_features != nc: - m.linear = nn.Linear(m.linear.in_features, nc) - elif isinstance(m, nn.Linear): # ResNet, EfficientNet + m.linear = torch.nn.Linear(m.linear.in_features, nc) + elif isinstance(m, torch.nn.Linear): # ResNet, EfficientNet if m.out_features != nc: - setattr(model, name, nn.Linear(m.in_features, nc)) - elif isinstance(m, nn.Sequential): + setattr(model, name, torch.nn.Linear(m.in_features, nc)) + elif isinstance(m, torch.nn.Sequential): types = [type(x) for x in m] - if nn.Linear in types: - i = len(types) - 1 - types[::-1].index(nn.Linear) # last nn.Linear index + if torch.nn.Linear in types: + i = len(types) - 1 - types[::-1].index(torch.nn.Linear) # last torch.nn.Linear index if m[i].out_features != nc: - m[i] = nn.Linear(m[i].in_features, nc) - elif nn.Conv2d in types: - i = len(types) - 1 - types[::-1].index(nn.Conv2d) # last nn.Conv2d index + m[i] = torch.nn.Linear(m[i].in_features, nc) + elif torch.nn.Conv2d in types: + i = len(types) - 1 - types[::-1].index(torch.nn.Conv2d) # last torch.nn.Conv2d index if m[i].out_channels != nc: - m[i] = nn.Conv2d(m[i].in_channels, nc, m[i].kernel_size, m[i].stride, bias=m[i].bias is not None) + m[i] = torch.nn.Conv2d( + m[i].in_channels, nc, m[i].kernel_size, m[i].stride, bias=m[i].bias is not None + ) def init_criterion(self): """Initialize the loss criterion for the ClassificationModel.""" @@ -587,7 +585,7 @@ class RTDETRDetectionModel(DetectionModel): if visualize: feature_visualization(x, m.type, m.i, save_dir=visualize) if embed and m.i in embed: - embeddings.append(nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1)) # flatten + embeddings.append(torch.nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1)) # flatten if m.i == max(embed): return torch.unbind(torch.cat(embeddings, 1), dim=0) head = self.model[-1] @@ -663,7 +661,7 @@ class WorldModel(DetectionModel): if visualize: feature_visualization(x, m.type, m.i, save_dir=visualize) if embed and m.i in embed: - embeddings.append(nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1)) # flatten + embeddings.append(torch.nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1)) # flatten if m.i == max(embed): return torch.unbind(torch.cat(embeddings, 1), dim=0) return x @@ -684,7 +682,7 @@ class WorldModel(DetectionModel): return self.criterion(preds, batch) -class Ensemble(nn.ModuleList): +class Ensemble(torch.nn.ModuleList): """Ensemble of models.""" def __init__(self): @@ -887,7 +885,7 @@ def attempt_load_weights(weights, device=None, inplace=True, fuse=False): for m in ensemble.modules(): if hasattr(m, "inplace"): m.inplace = inplace - elif isinstance(m, nn.Upsample) and not hasattr(m, "recompute_scale_factor"): + elif isinstance(m, torch.nn.Upsample) and not hasattr(m, "recompute_scale_factor"): m.recompute_scale_factor = None # torch 1.11.0 compatibility # Return model @@ -922,7 +920,7 @@ def attempt_load_one_weight(weight, device=None, inplace=True, fuse=False): for m in model.modules(): if hasattr(m, "inplace"): m.inplace = inplace - elif isinstance(m, nn.Upsample) and not hasattr(m, "recompute_scale_factor"): + elif isinstance(m, torch.nn.Upsample) and not hasattr(m, "recompute_scale_factor"): m.recompute_scale_factor = None # torch 1.11.0 compatibility # Return model and ckpt @@ -946,7 +944,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3) depth, width, max_channels = scales[scale] if act: - Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU() + Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = torch.nn.SiLU() if verbose: LOGGER.info(f"{colorstr('activation:')} {act}") # print @@ -982,7 +980,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3) C3, C3TR, C3Ghost, - nn.ConvTranspose2d, + torch.nn.ConvTranspose2d, DWConvTranspose2d, C3x, RepC3, @@ -1048,7 +1046,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3) n = 1 elif m is ResNetLayer: c2 = args[1] if args[3] else args[1] * 4 - elif m is nn.BatchNorm2d: + elif m is torch.nn.BatchNorm2d: args = [ch[f]] elif m is Concat: c2 = sum(ch[x] for x in f) @@ -1073,7 +1071,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3) else: c2 = ch[f] - m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module + m_ = torch.nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module t = str(m)[8:-2].replace("__main__.", "") # module type m_.np = sum(x.numel() for x in m_.parameters()) # number params m_.i, m_.f, m_.type = i, f, t # attach index, 'from' index, type @@ -1084,7 +1082,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3) if i == 0: ch = [] ch.append(c2) - return nn.Sequential(*layers), sorted(save) + return torch.nn.Sequential(*layers), sorted(save) def yaml_model_load(path): @@ -1126,7 +1124,7 @@ def guess_model_task(model): Guess the task of a PyTorch model from its architecture or configuration. Args: - model (nn.Module | dict): PyTorch model or model configuration in YAML format. + model (torch.nn.Module | dict): PyTorch model or model configuration in YAML format. Returns: (str): Task of the model ('detect', 'segment', 'classify', 'pose'). @@ -1154,7 +1152,7 @@ def guess_model_task(model): with contextlib.suppress(Exception): return cfg2task(model) # Guess from PyTorch model - if isinstance(model, nn.Module): # PyTorch model + if isinstance(model, torch.nn.Module): # PyTorch model for x in "model.args", "model.model.args", "model.model.model.args": with contextlib.suppress(Exception): return eval(x)["task"]