ultralytics 8.2.82 YOLOv10 CoreML, Edge TPU and TF.js export support (#15796)

Signed-off-by: UltralyticsAssistant <web@ultralytics.com>
Co-authored-by: UltralyticsAssistant <web@ultralytics.com>
Co-authored-by: Ryan Hirasaki <4690732+RyanHir@users.noreply.github.com>
This commit is contained in:
Glenn Jocher 2024-08-25 23:43:35 +08:00 committed by GitHub
parent dc15242cbd
commit b2604c7df1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 31 additions and 45 deletions

View file

@ -1,6 +1,6 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
__version__ = "8.2.81"
__version__ = "8.2.82"
import os

View file

@ -8,7 +8,6 @@ import torch
import torch.nn as nn
from torch.nn.init import constant_, xavier_uniform_
from ultralytics.utils import MACOS
from ultralytics.utils.tal import TORCH_1_10, dist2bbox, dist2rbox, make_anchors
from .block import DFL, BNContrastiveHead, ContrastiveHead, Proto
@ -133,38 +132,26 @@ class Detect(nn.Module):
@staticmethod
def postprocess(preds: torch.Tensor, max_det: int, nc: int = 80):
"""
Post-processes the predictions obtained from a YOLOv10 model.
Post-processes YOLO model predictions.
Args:
preds (torch.Tensor): The predictions obtained from the model. It should have a shape of (batch_size, num_boxes, 4 + num_classes).
max_det (int): The maximum number of detections to keep.
nc (int, optional): The number of classes. Defaults to 80.
preds (torch.Tensor): Raw predictions with shape (batch_size, num_anchors, 4 + nc) with last dimension
format [x, y, w, h, class_probs].
max_det (int): Maximum detections per image.
nc (int, optional): Number of classes. Default: 80.
Returns:
(torch.Tensor): The post-processed predictions with shape (batch_size, max_det, 6),
including bounding boxes, scores and cls.
(torch.Tensor): Processed predictions with shape (batch_size, min(max_det, num_anchors), 6) and last
dimension format [x, y, w, h, max_class_prob, class_index].
"""
assert 4 + nc == preds.shape[-1]
batch_size, anchors, predictions = preds.shape # i.e. shape(16,8400,84)
boxes, scores = preds.split([4, nc], dim=-1)
max_scores = scores.amax(dim=-1)
max_scores, index = torch.topk(max_scores, min(max_det, max_scores.shape[1]), axis=-1)
index = index.unsqueeze(-1)
boxes = torch.gather(boxes, dim=1, index=index.repeat(1, 1, boxes.shape[-1]))
scores = torch.gather(scores, dim=1, index=index.repeat(1, 1, scores.shape[-1]))
# NOTE: simplify result but slightly lower mAP
# scores, labels = scores.max(dim=-1)
# return torch.cat([boxes, scores.unsqueeze(-1), labels.unsqueeze(-1)], dim=-1)
scores, index = torch.topk(scores.flatten(1), max_det, axis=-1)
labels = index % nc
index = index // nc
# Set int64 dtype for MPS and CoreML compatibility to avoid 'gather_along_axis' ops error
if MACOS:
index = index.to(torch.int64)
boxes = boxes.gather(dim=1, index=index.unsqueeze(-1).repeat(1, 1, boxes.shape[-1]))
return torch.cat([boxes, scores.unsqueeze(-1), labels.unsqueeze(-1).to(boxes.dtype)], dim=-1)
index = scores.amax(dim=-1).topk(min(max_det, anchors))[1].unsqueeze(-1)
boxes = boxes.gather(dim=1, index=index.repeat(1, 1, 4))
scores = scores.gather(dim=1, index=index.repeat(1, 1, nc))
scores, index = scores.flatten(1).topk(max_det)
i = torch.arange(batch_size)[..., None] # batch indices
return torch.cat([boxes[i, index // nc], scores[..., None], (index % nc)[..., None].float()], dim=-1)
class Segment(Detect):

View file

@ -97,20 +97,17 @@ def benchmark(
assert MACOS or LINUX, "CoreML and TF.js export only supported on macOS and Linux"
assert not IS_RASPBERRYPI, "CoreML and TF.js export not supported on Raspberry Pi"
assert not IS_JETSON, "CoreML and TF.js export not supported on NVIDIA Jetson"
assert not is_end2end, "End-to-end models not supported by CoreML and TF.js yet"
if i in {3, 5}: # CoreML and OpenVINO
assert not IS_PYTHON_3_12, "CoreML and OpenVINO not supported on Python 3.12"
if i in {6, 7, 8}: # TF SavedModel, TF GraphDef, and TFLite
assert not isinstance(model, YOLOWorld), "YOLOWorldv2 TensorFlow exports not supported by onnx2tf yet"
if i in {9, 10}: # TF EdgeTPU and TF.js
assert not isinstance(model, YOLOWorld), "YOLOWorldv2 TensorFlow exports not supported by onnx2tf yet"
assert not is_end2end, "End-to-end models not supported by TF EdgeTPU and TF.js yet"
if i in {11}: # Paddle
assert not isinstance(model, YOLOWorld), "YOLOWorldv2 Paddle exports not supported yet"
assert not is_end2end, "End-to-end models not supported by PaddlePaddle yet"
if i in {12}: # NCNN
assert not isinstance(model, YOLOWorld), "YOLOWorldv2 NCNN exports not supported yet"
assert not is_end2end, "End-to-end models not supported by NCNN yet"
if "cpu" in device.type:
assert cpu, "inference not supported on CPU"
if "cuda" in device.type:
@ -130,6 +127,8 @@ def benchmark(
assert model.task != "pose" or i != 7, "GraphDef Pose inference is not supported"
assert i not in {9, 10}, "inference not supported" # Edge TPU and TF.js are unsupported
assert i != 5 or platform.system() == "Darwin", "inference only supported on macOS>=10.13" # CoreML
if i in {12}:
assert not is_end2end, "End-to-end torch.topk operation is not supported for NCNN prediction yet"
exported_model.predict(ASSETS / "bus.jpg", imgsz=imgsz, device=device, half=half)
# Validate