update: 适配昇腾

2025-11-27 09:57:34 +00:00 · 2025-11-27 09:57:34 +00:00 · 696c1b0793
commit 696c1b0793
parent e74b035c02
5 changed files with 34 additions and 11 deletions
--- a/ultralytics/engine/validator.py
+++ b/ultralytics/engine/validator.py
@ -158,7 +158,12 @@ class BaseValidator:
            self.dataloader = self.dataloader or self.get_dataloader(self.data.get(self.args.split), self.args.batch)
            model.eval()
-            model.warmup(imgsz=(1 if pt else self.args.batch, 3, imgsz, imgsz))  # warmup
+            # model.warmup(imgsz=(1 if pt else self.args.batch, 3, imgsz, imgsz))  # warmup
            # 新增warmup，保证性能准确
            print('start warm up')
            model.warmup(imgsz=(self.args.batch, 3, imgsz, imgsz))  # warmup
            model.warmup(imgsz=(self.args.batch, 3, 288, imgsz))  # warmup
            print('end warm up')
        self.run_callbacks("on_val_start")
        dt = (
--- a/ultralytics/nn/autobackend.py
+++ b/ultralytics/nn/autobackend.py
@ -12,6 +12,10 @@ import numpy as np
 import torch
 import torch.nn as nn
 from PIL import Image
 import torch_npu
 from torch_npu.contrib import transfer_to_npu
 import torchair as tng
 from torchair.configs.compiler_config import CompilerConfig
 from ultralytics.utils import ARM64, IS_JETSON, IS_RASPBERRYPI, LINUX, LOGGER, PYTHON_VERSION, ROOT, yaml_load
 from ultralytics.utils.checks import check_requirements, check_suffix, check_version, check_yaml, is_rockchip
@ -156,6 +160,12 @@ class AutoBackend(nn.Module):
            names = model.module.names if hasattr(model, "module") else model.names  # get class names
            model.half() if fp16 else model.float()
            self.model = model  # explicitly assign for to(), cpu(), cuda(), half()
            # 添加torchair适配代码
            config = CompilerConfig()
            config.experimental_config.frozen_parameter = True
            npu_backbend = tng.get_npu_backend(compiler_config=config)
            model = torch.compile(model, dynamic=True, fullgraph=True, backend=npu_backbend)
            tng.use_internal_format_weight(model.model)
            pt = True
        # PyTorch
--- a/ultralytics/nn/modules/block.py
+++ b/ultralytics/nn/modules/block.py
@ -185,7 +185,10 @@ class SPPF(nn.Module):
    def forward(self, x):
        """Forward pass through Ghost Convolution block."""
        y = [self.cv1(x)]
-        y.extend(self.m(y[-1]) for _ in range(3))
+        # y.extend(self.m(y[-1]) for _ in range(3))
        for _ in range(3):
            o1 = self.m(y[-1])
            y.extend(o1.unsqueeze(0))
        return self.cv2(torch.cat(y, 1))
@ -236,7 +239,11 @@ class C2f(nn.Module):
    def forward(self, x):
        """Forward pass through C2f layer."""
        y = list(self.cv1(x).chunk(2, 1))
-        y.extend(m(y[-1]) for m in self.m)
+        # y.extend(m(y[-1]) for m in self.m)
        # 该条代码在torch和dynamo中存在逻辑分歧，改为外部循环表示
        for m in self.m:
            o1 = m(y[-1])
            y.extend(o1.unsqueeze(0))
        return self.cv2(torch.cat(y, 1))
    def forward_split(self, x):
--- a/ultralytics/nn/modules/head.py
+++ b/ultralytics/nn/modules/head.py
@ -102,9 +102,9 @@ class Detect(nn.Module):
        # Inference path
        shape = x[0].shape  # BCHW
        x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2)
-        if self.format != "imx" and (self.dynamic or self.shape != shape):
+        anchors, strides = make_anchors(x, self.stride, 0.5)
-            self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
+        anchors = anchors.transpose(0, 1)
-            self.shape = shape
+        strides = strides.transpose(0, 1)
        if self.export and self.format in {"saved_model", "pb", "tflite", "edgetpu", "tfjs"}:  # avoid TF FlexSplitV ops
            box = x_cat[:, : self.reg_max * 4]
@ -118,15 +118,15 @@ class Detect(nn.Module):
            grid_h = shape[2]
            grid_w = shape[3]
            grid_size = torch.tensor([grid_w, grid_h, grid_w, grid_h], device=box.device).reshape(1, 4, 1)
-            norm = self.strides / (self.stride[0] * grid_size)
+            norm = strides / (stride[0] * grid_size)
-            dbox = self.decode_bboxes(self.dfl(box) * norm, self.anchors.unsqueeze(0) * norm[:, :2])
+            dbox = self.decode_bboxes(self.dfl(box) * norm, anchors.unsqueeze(0) * norm[:, :2])
        elif self.export and self.format == "imx":
            dbox = self.decode_bboxes(
-                self.dfl(box) * self.strides, self.anchors.unsqueeze(0) * self.strides, xywh=False
+                self.dfl(box) * strides, anchors.unsqueeze(0) * strides, xywh=False
            )
            return dbox.transpose(1, 2), cls.sigmoid().permute(0, 2, 1)
        else:
-            dbox = self.decode_bboxes(self.dfl(box), self.anchors.unsqueeze(0)) * self.strides
+            dbox = self.decode_bboxes(self.dfl(box), anchors.unsqueeze(0)) * strides
        return torch.cat((dbox, cls.sigmoid()), 1)
--- a/ultralytics/utils/tal.py
+++ b/ultralytics/utils/tal.py
@ -341,7 +341,8 @@ def make_anchors(feats, strides, grid_cell_offset=0.5):
        sy = torch.arange(end=h, device=device, dtype=dtype) + grid_cell_offset  # shift y
        sy, sx = torch.meshgrid(sy, sx, indexing="ij") if TORCH_1_10 else torch.meshgrid(sy, sx)
        anchor_points.append(torch.stack((sx, sy), -1).view(-1, 2))
-        stride_tensor.append(torch.full((h * w, 1), stride, dtype=dtype, device=device))
+        # stride_tensor.append(torch.full((h * w, 1), stride, dtype=dtype, device=device))
        stride_tensor.append(torch.ones((h * w, 1), dtype=dtype, device=device)*stride)
    return torch.cat(anchor_points), torch.cat(stride_tensor)