diff --git a/ultralytics/engine/validator.py b/ultralytics/engine/validator.py index 42c0e329..0f46395e 100644 --- a/ultralytics/engine/validator.py +++ b/ultralytics/engine/validator.py @@ -158,7 +158,12 @@ class BaseValidator: self.dataloader = self.dataloader or self.get_dataloader(self.data.get(self.args.split), self.args.batch) model.eval() - model.warmup(imgsz=(1 if pt else self.args.batch, 3, imgsz, imgsz)) # warmup + # model.warmup(imgsz=(1 if pt else self.args.batch, 3, imgsz, imgsz)) # warmup + # 新增warmup,保证性能准确 + print('start warm up') + model.warmup(imgsz=(self.args.batch, 3, imgsz, imgsz)) # warmup + model.warmup(imgsz=(self.args.batch, 3, 288, imgsz)) # warmup + print('end warm up') self.run_callbacks("on_val_start") dt = ( diff --git a/ultralytics/nn/autobackend.py b/ultralytics/nn/autobackend.py index 7629fb7c..36ce75d6 100644 --- a/ultralytics/nn/autobackend.py +++ b/ultralytics/nn/autobackend.py @@ -12,6 +12,10 @@ import numpy as np import torch import torch.nn as nn from PIL import Image +import torch_npu +from torch_npu.contrib import transfer_to_npu +import torchair as tng +from torchair.configs.compiler_config import CompilerConfig from ultralytics.utils import ARM64, IS_JETSON, IS_RASPBERRYPI, LINUX, LOGGER, PYTHON_VERSION, ROOT, yaml_load from ultralytics.utils.checks import check_requirements, check_suffix, check_version, check_yaml, is_rockchip @@ -156,6 +160,12 @@ class AutoBackend(nn.Module): names = model.module.names if hasattr(model, "module") else model.names # get class names model.half() if fp16 else model.float() self.model = model # explicitly assign for to(), cpu(), cuda(), half() + # 添加torchair适配代码 + config = CompilerConfig() + config.experimental_config.frozen_parameter = True + npu_backbend = tng.get_npu_backend(compiler_config=config) + model = torch.compile(model, dynamic=True, fullgraph=True, backend=npu_backbend) + tng.use_internal_format_weight(model.model) pt = True # PyTorch diff --git a/ultralytics/nn/modules/block.py b/ultralytics/nn/modules/block.py index 567e1aaf..a7cf8dd5 100644 --- a/ultralytics/nn/modules/block.py +++ b/ultralytics/nn/modules/block.py @@ -185,7 +185,10 @@ class SPPF(nn.Module): def forward(self, x): """Forward pass through Ghost Convolution block.""" y = [self.cv1(x)] - y.extend(self.m(y[-1]) for _ in range(3)) + # y.extend(self.m(y[-1]) for _ in range(3)) + for _ in range(3): + o1 = self.m(y[-1]) + y.extend(o1.unsqueeze(0)) return self.cv2(torch.cat(y, 1)) @@ -236,7 +239,11 @@ class C2f(nn.Module): def forward(self, x): """Forward pass through C2f layer.""" y = list(self.cv1(x).chunk(2, 1)) - y.extend(m(y[-1]) for m in self.m) + # y.extend(m(y[-1]) for m in self.m) + # 该条代码在torch和dynamo中存在逻辑分歧,改为外部循环表示 + for m in self.m: + o1 = m(y[-1]) + y.extend(o1.unsqueeze(0)) return self.cv2(torch.cat(y, 1)) def forward_split(self, x): diff --git a/ultralytics/nn/modules/head.py b/ultralytics/nn/modules/head.py index b9d05024..7906afa5 100644 --- a/ultralytics/nn/modules/head.py +++ b/ultralytics/nn/modules/head.py @@ -102,9 +102,9 @@ class Detect(nn.Module): # Inference path shape = x[0].shape # BCHW x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2) - if self.format != "imx" and (self.dynamic or self.shape != shape): - self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5)) - self.shape = shape + anchors, strides = make_anchors(x, self.stride, 0.5) + anchors = anchors.transpose(0, 1) + strides = strides.transpose(0, 1) if self.export and self.format in {"saved_model", "pb", "tflite", "edgetpu", "tfjs"}: # avoid TF FlexSplitV ops box = x_cat[:, : self.reg_max * 4] @@ -118,15 +118,15 @@ class Detect(nn.Module): grid_h = shape[2] grid_w = shape[3] grid_size = torch.tensor([grid_w, grid_h, grid_w, grid_h], device=box.device).reshape(1, 4, 1) - norm = self.strides / (self.stride[0] * grid_size) - dbox = self.decode_bboxes(self.dfl(box) * norm, self.anchors.unsqueeze(0) * norm[:, :2]) + norm = strides / (stride[0] * grid_size) + dbox = self.decode_bboxes(self.dfl(box) * norm, anchors.unsqueeze(0) * norm[:, :2]) elif self.export and self.format == "imx": dbox = self.decode_bboxes( - self.dfl(box) * self.strides, self.anchors.unsqueeze(0) * self.strides, xywh=False + self.dfl(box) * strides, anchors.unsqueeze(0) * strides, xywh=False ) return dbox.transpose(1, 2), cls.sigmoid().permute(0, 2, 1) else: - dbox = self.decode_bboxes(self.dfl(box), self.anchors.unsqueeze(0)) * self.strides + dbox = self.decode_bboxes(self.dfl(box), anchors.unsqueeze(0)) * strides return torch.cat((dbox, cls.sigmoid()), 1) diff --git a/ultralytics/utils/tal.py b/ultralytics/utils/tal.py index e4a40f5e..2ded54be 100644 --- a/ultralytics/utils/tal.py +++ b/ultralytics/utils/tal.py @@ -341,7 +341,8 @@ def make_anchors(feats, strides, grid_cell_offset=0.5): sy = torch.arange(end=h, device=device, dtype=dtype) + grid_cell_offset # shift y sy, sx = torch.meshgrid(sy, sx, indexing="ij") if TORCH_1_10 else torch.meshgrid(sy, sx) anchor_points.append(torch.stack((sx, sy), -1).view(-1, 2)) - stride_tensor.append(torch.full((h * w, 1), stride, dtype=dtype, device=device)) + # stride_tensor.append(torch.full((h * w, 1), stride, dtype=dtype, device=device)) + stride_tensor.append(torch.ones((h * w, 1), dtype=dtype, device=device)*stride) return torch.cat(anchor_points), torch.cat(stride_tensor)