ultralytics 8.2.82 YOLOv10 CoreML, Edge TPU and TF.js export support (#15796)

Signed-off-by: UltralyticsAssistant <web@ultralytics.com> Co-authored-by: UltralyticsAssistant <web@ultralytics.com> Co-authored-by: Ryan Hirasaki <4690732+RyanHir@users.noreply.github.com>
2024-08-25 23:43:35 +08:00 · 2024-08-25 23:43:35 +08:00 · b2604c7df1
commit b2604c7df1
parent dc15242cbd
4 changed files with 31 additions and 45 deletions
--- a/ultralytics/init.py
+++ b/ultralytics/init.py
@ -1,6 +1,6 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license

-__version__ = "8.2.81"
+__version__ = "8.2.82"

 import os

--- a/ultralytics/nn/modules/head.py
+++ b/ultralytics/nn/modules/head.py
@ -8,7 +8,6 @@ import torch
 import torch.nn as nn
 from torch.nn.init import constant_, xavier_uniform_

-from ultralytics.utils import MACOS
 from ultralytics.utils.tal import TORCH_1_10, dist2bbox, dist2rbox, make_anchors

 from .block import DFL, BNContrastiveHead, ContrastiveHead, Proto
@ -133,38 +132,26 @@ class Detect(nn.Module):
    @staticmethod
    def postprocess(preds: torch.Tensor, max_det: int, nc: int = 80):
        """
-        Post-processes the predictions obtained from a YOLOv10 model.
+        Post-processes YOLO model predictions.

        Args:
-            preds (torch.Tensor): The predictions obtained from the model. It should have a shape of (batch_size, num_boxes, 4 + num_classes).
-            max_det (int): The maximum number of detections to keep.
-            nc (int, optional): The number of classes. Defaults to 80.
+            preds (torch.Tensor): Raw predictions with shape (batch_size, num_anchors, 4 + nc) with last dimension
+                format [x, y, w, h, class_probs].
+            max_det (int): Maximum detections per image.
+            nc (int, optional): Number of classes. Default: 80.

        Returns:
-            (torch.Tensor): The post-processed predictions with shape (batch_size, max_det, 6),
-                including bounding boxes, scores and cls.
+            (torch.Tensor): Processed predictions with shape (batch_size, min(max_det, num_anchors), 6) and last
+                dimension format [x, y, w, h, max_class_prob, class_index].
        """
-        assert 4 + nc == preds.shape[-1]
+        batch_size, anchors, predictions = preds.shape  # i.e. shape(16,8400,84)
        boxes, scores = preds.split([4, nc], dim=-1)
-        max_scores = scores.amax(dim=-1)
-        max_scores, index = torch.topk(max_scores, min(max_det, max_scores.shape[1]), axis=-1)
-        index = index.unsqueeze(-1)
-        boxes = torch.gather(boxes, dim=1, index=index.repeat(1, 1, boxes.shape[-1]))
-        scores = torch.gather(scores, dim=1, index=index.repeat(1, 1, scores.shape[-1]))
-
-        # NOTE: simplify result but slightly lower mAP
-        # scores, labels = scores.max(dim=-1)
-        # return torch.cat([boxes, scores.unsqueeze(-1), labels.unsqueeze(-1)], dim=-1)
-
-        scores, index = torch.topk(scores.flatten(1), max_det, axis=-1)
-        labels = index % nc
-        index = index // nc
-        # Set int64 dtype for MPS and CoreML compatibility to avoid 'gather_along_axis' ops error
-        if MACOS:
-            index = index.to(torch.int64)
-        boxes = boxes.gather(dim=1, index=index.unsqueeze(-1).repeat(1, 1, boxes.shape[-1]))
-
-        return torch.cat([boxes, scores.unsqueeze(-1), labels.unsqueeze(-1).to(boxes.dtype)], dim=-1)
+        index = scores.amax(dim=-1).topk(min(max_det, anchors))[1].unsqueeze(-1)
+        boxes = boxes.gather(dim=1, index=index.repeat(1, 1, 4))
+        scores = scores.gather(dim=1, index=index.repeat(1, 1, nc))
+        scores, index = scores.flatten(1).topk(max_det)
+        i = torch.arange(batch_size)[..., None]  # batch indices
+        return torch.cat([boxes[i, index // nc], scores[..., None], (index % nc)[..., None].float()], dim=-1)


 class Segment(Detect):
--- a/ultralytics/utils/benchmarks.py
+++ b/ultralytics/utils/benchmarks.py
@ -97,20 +97,17 @@ def benchmark(
                assert MACOS or LINUX, "CoreML and TF.js export only supported on macOS and Linux"
                assert not IS_RASPBERRYPI, "CoreML and TF.js export not supported on Raspberry Pi"
                assert not IS_JETSON, "CoreML and TF.js export not supported on NVIDIA Jetson"
-                assert not is_end2end, "End-to-end models not supported by CoreML and TF.js yet"
            if i in {3, 5}:  # CoreML and OpenVINO
                assert not IS_PYTHON_3_12, "CoreML and OpenVINO not supported on Python 3.12"
            if i in {6, 7, 8}:  # TF SavedModel, TF GraphDef, and TFLite
                assert not isinstance(model, YOLOWorld), "YOLOWorldv2 TensorFlow exports not supported by onnx2tf yet"
            if i in {9, 10}:  # TF EdgeTPU and TF.js
                assert not isinstance(model, YOLOWorld), "YOLOWorldv2 TensorFlow exports not supported by onnx2tf yet"
-                assert not is_end2end, "End-to-end models not supported by TF EdgeTPU and TF.js yet"
            if i in {11}:  # Paddle
                assert not isinstance(model, YOLOWorld), "YOLOWorldv2 Paddle exports not supported yet"
                assert not is_end2end, "End-to-end models not supported by PaddlePaddle yet"
            if i in {12}:  # NCNN
                assert not isinstance(model, YOLOWorld), "YOLOWorldv2 NCNN exports not supported yet"
-                assert not is_end2end, "End-to-end models not supported by NCNN yet"
            if "cpu" in device.type:
                assert cpu, "inference not supported on CPU"
            if "cuda" in device.type:
@ -130,6 +127,8 @@ def benchmark(
            assert model.task != "pose" or i != 7, "GraphDef Pose inference is not supported"
            assert i not in {9, 10}, "inference not supported"  # Edge TPU and TF.js are unsupported
            assert i != 5 or platform.system() == "Darwin", "inference only supported on macOS>=10.13"  # CoreML
+            if i in {12}:
+                assert not is_end2end, "End-to-end torch.topk operation is not supported for NCNN prediction yet"
            exported_model.predict(ASSETS / "bus.jpg", imgsz=imgsz, device=device, half=half)

            # Validate