Revert "Support CoreML NMS export for Segment, Pose and OBB" (#19273)

Co-authored-by: Laughing <61612323+Laughing-q@users.noreply.github.com>
This commit is contained in:
Glenn Jocher 2025-02-18 19:45:14 +08:00 committed by GitHub
parent 7397aa7079
commit e981fc629d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 167 additions and 20 deletions

View file

@ -1,4 +1,5 @@
{%set tip1 = ':material-information-outline:{ title="conf, iou, agnostic_nms are also available when nms=True" }' %} {%set tip1 = ':material-information-outline:{ title="conf, iou, agnostic_nms are also available when nms=True" }' %}
{%set tip2 = ':material-information-outline:{ title="conf, iou are also available when nms=True" }' %}
| Format | `format` Argument | Model | Metadata | Arguments | | Format | `format` Argument | Model | Metadata | Arguments |
| ------------------------------------------------- | ----------------- | ----------------------------------------------- | -------- | --------------------------------------------------------------------------------------------- | | ------------------------------------------------- | ----------------- | ----------------------------------------------- | -------- | --------------------------------------------------------------------------------------------- |
@ -7,7 +8,7 @@
| [ONNX](../integrations/onnx.md) | `onnx` | `{{ model_name or "yolo11n" }}.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `nms`{{ tip1 }}, `batch` | | [ONNX](../integrations/onnx.md) | `onnx` | `{{ model_name or "yolo11n" }}.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `nms`{{ tip1 }}, `batch` |
| [OpenVINO](../integrations/openvino.md) | `openvino` | `{{ model_name or "yolo11n" }}_openvino_model/` | ✅ | `imgsz`, `half`, `dynamic`, `int8`, `nms`{{ tip1 }}, `batch`, `data` | | [OpenVINO](../integrations/openvino.md) | `openvino` | `{{ model_name or "yolo11n" }}_openvino_model/` | ✅ | `imgsz`, `half`, `dynamic`, `int8`, `nms`{{ tip1 }}, `batch`, `data` |
| [TensorRT](../integrations/tensorrt.md) | `engine` | `{{ model_name or "yolo11n" }}.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `int8`, `nms`{{ tip1 }}, `batch`, `data` | | [TensorRT](../integrations/tensorrt.md) | `engine` | `{{ model_name or "yolo11n" }}.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `int8`, `nms`{{ tip1 }}, `batch`, `data` |
| [CoreML](../integrations/coreml.md) | `coreml` | `{{ model_name or "yolo11n" }}.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`{{ tip1 }}, `batch` | | [CoreML](../integrations/coreml.md) | `coreml` | `{{ model_name or "yolo11n" }}.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`{{ tip2 }}, `batch` |
| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `{{ model_name or "yolo11n" }}_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `nms`{{ tip1 }}, `batch` | | [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `{{ model_name or "yolo11n" }}_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `nms`{{ tip1 }}, `batch` |
| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `{{ model_name or "yolo11n" }}.pb` | ❌ | `imgsz`, `batch` | | [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `{{ model_name or "yolo11n" }}.pb` | ❌ | `imgsz`, `batch` |
| [TF Lite](../integrations/tflite.md) | `tflite` | `{{ model_name or "yolo11n" }}.tflite` | ✅ | `imgsz`, `half`, `int8`, `nms`{{ tip1 }}, `batch`, `data` | | [TF Lite](../integrations/tflite.md) | `tflite` | `{{ model_name or "yolo11n" }}.tflite` | ✅ | `imgsz`, `half`, `int8`, `nms`{{ tip1 }}, `batch`, `data` |

View file

@ -15,6 +15,10 @@ keywords: YOLOv8, export formats, ONNX, TensorRT, CoreML, machine learning model
<br><br><hr><br> <br><br><hr><br>
## ::: ultralytics.engine.exporter.IOSDetectModel
<br><br><hr><br>
## ::: ultralytics.engine.exporter.NMSModel ## ::: ultralytics.engine.exporter.NMSModel
<br><br><hr><br> <br><br><hr><br>

View file

@ -116,16 +116,14 @@ def test_export_torchscript_matrix(task, dynamic, int8, half, batch, nms):
@pytest.mark.skipif(not TORCH_1_9, reason="CoreML>=7.2 not supported with PyTorch<=1.8") @pytest.mark.skipif(not TORCH_1_9, reason="CoreML>=7.2 not supported with PyTorch<=1.8")
@pytest.mark.skipif(checks.IS_PYTHON_3_12, reason="CoreML not supported in Python 3.12") @pytest.mark.skipif(checks.IS_PYTHON_3_12, reason="CoreML not supported in Python 3.12")
@pytest.mark.parametrize( @pytest.mark.parametrize(
"task, dynamic, int8, half, batch, nms", "task, dynamic, int8, half, batch",
[ # generate all combinations except for exclusion cases [ # generate all combinations except for exclusion cases
(task, dynamic, int8, half, batch, nms) (task, dynamic, int8, half, batch)
for task, dynamic, int8, half, batch, nms in product( for task, dynamic, int8, half, batch in product(TASKS, [False], [True, False], [True, False], [1])
TASKS, [False], [True, False], [True, False], [1], [True, False] if not (int8 and half)
)
if not ((int8 and half) or (task == "classify" and nms))
], ],
) )
def test_export_coreml_matrix(task, dynamic, int8, half, batch, nms): def test_export_coreml_matrix(task, dynamic, int8, half, batch):
"""Test YOLO exports to CoreML format with various parameter configurations.""" """Test YOLO exports to CoreML format with various parameter configurations."""
file = YOLO(TASK2MODEL[task]).export( file = YOLO(TASK2MODEL[task]).export(
format="coreml", format="coreml",
@ -134,7 +132,6 @@ def test_export_coreml_matrix(task, dynamic, int8, half, batch, nms):
int8=int8, int8=int8,
half=half, half=half,
batch=batch, batch=batch,
nms=nms,
) )
YOLO(file)([SOURCE] * batch, imgsz=32) # exported model inference at batch=3 YOLO(file)([SOURCE] * batch, imgsz=32) # exported model inference at batch=3
shutil.rmtree(file) # cleanup shutil.rmtree(file) # cleanup

View file

@ -84,6 +84,7 @@ from ultralytics.utils import (
LINUX, LINUX,
LOGGER, LOGGER,
MACOS, MACOS,
PYTHON_VERSION,
RKNN_CHIPS, RKNN_CHIPS,
ROOT, ROOT,
WINDOWS, WINDOWS,
@ -355,7 +356,7 @@ class Exporter:
y = None y = None
for _ in range(2): # dry runs for _ in range(2): # dry runs
y = NMSModel(model, self.args)(im) if self.args.nms else model(im) y = NMSModel(model, self.args)(im) if self.args.nms and not coreml else model(im)
if self.args.half and onnx and self.device.type != "cpu": if self.args.half and onnx and self.device.type != "cpu":
im, model = im.half(), model.half() # to FP16 im, model = im.half(), model.half() # to FP16
@ -765,9 +766,12 @@ class Exporter:
if self.model.task == "classify": if self.model.task == "classify":
classifier_config = ct.ClassifierConfig(list(self.model.names.values())) if self.args.nms else None classifier_config = ct.ClassifierConfig(list(self.model.names.values())) if self.args.nms else None
model = self.model model = self.model
elif self.args.nms: elif self.model.task == "detect":
model = NMSModel(self.model, self.args) model = IOSDetectModel(self.model, self.im) if self.args.nms else self.model
else: else:
if self.args.nms:
LOGGER.warning(f"{prefix} WARNING ⚠️ 'nms=True' is only available for Detect models like 'yolo11n.pt'.")
# TODO CoreML Segment and Pose model pipelining
model = self.model model = self.model
ts = torch.jit.trace(model.eval(), self.im, strict=False) # TorchScript model ts = torch.jit.trace(model.eval(), self.im, strict=False) # TorchScript model
@ -789,6 +793,15 @@ class Exporter:
op_config = cto.OpPalettizerConfig(mode="kmeans", nbits=bits, weight_threshold=512) op_config = cto.OpPalettizerConfig(mode="kmeans", nbits=bits, weight_threshold=512)
config = cto.OptimizationConfig(global_config=op_config) config = cto.OptimizationConfig(global_config=op_config)
ct_model = cto.palettize_weights(ct_model, config=config) ct_model = cto.palettize_weights(ct_model, config=config)
if self.args.nms and self.model.task == "detect":
if mlmodel:
# coremltools<=6.2 NMS export requires Python<3.11
check_version(PYTHON_VERSION, "<3.11", name="Python ", hard=True)
weights_dir = None
else:
ct_model.save(str(f)) # save otherwise weights_dir does not exist
weights_dir = str(f / "Data/com.apple.CoreML/weights")
ct_model = self._pipeline_coreml(ct_model, weights_dir=weights_dir)
m = self.metadata # metadata dict m = self.metadata # metadata dict
ct_model.short_description = m.pop("description") ct_model.short_description = m.pop("description")
@ -1378,6 +1391,112 @@ class Exporter:
populator.populate() populator.populate()
tmp_file.unlink() tmp_file.unlink()
def _pipeline_coreml(self, model, weights_dir=None, prefix=colorstr("CoreML Pipeline:")):
"""YOLO CoreML pipeline."""
import coremltools as ct # noqa
LOGGER.info(f"{prefix} starting pipeline with coremltools {ct.__version__}...")
_, _, h, w = list(self.im.shape) # BCHW
# Output shapes
spec = model.get_spec()
out0, out1 = iter(spec.description.output)
if MACOS:
from PIL import Image
img = Image.new("RGB", (w, h)) # w=192, h=320
out = model.predict({"image": img})
out0_shape = out[out0.name].shape # (3780, 80)
out1_shape = out[out1.name].shape # (3780, 4)
else: # linux and windows can not run model.predict(), get sizes from PyTorch model output y
out0_shape = self.output_shape[2], self.output_shape[1] - 4 # (3780, 80)
out1_shape = self.output_shape[2], 4 # (3780, 4)
# Checks
names = self.metadata["names"]
nx, ny = spec.description.input[0].type.imageType.width, spec.description.input[0].type.imageType.height
_, nc = out0_shape # number of anchors, number of classes
assert len(names) == nc, f"{len(names)} names found for nc={nc}" # check
# Define output shapes (missing)
out0.type.multiArrayType.shape[:] = out0_shape # (3780, 80)
out1.type.multiArrayType.shape[:] = out1_shape # (3780, 4)
# Model from spec
model = ct.models.MLModel(spec, weights_dir=weights_dir)
# 3. Create NMS protobuf
nms_spec = ct.proto.Model_pb2.Model()
nms_spec.specificationVersion = 5
for i in range(2):
decoder_output = model._spec.description.output[i].SerializeToString()
nms_spec.description.input.add()
nms_spec.description.input[i].ParseFromString(decoder_output)
nms_spec.description.output.add()
nms_spec.description.output[i].ParseFromString(decoder_output)
nms_spec.description.output[0].name = "confidence"
nms_spec.description.output[1].name = "coordinates"
output_sizes = [nc, 4]
for i in range(2):
ma_type = nms_spec.description.output[i].type.multiArrayType
ma_type.shapeRange.sizeRanges.add()
ma_type.shapeRange.sizeRanges[0].lowerBound = 0
ma_type.shapeRange.sizeRanges[0].upperBound = -1
ma_type.shapeRange.sizeRanges.add()
ma_type.shapeRange.sizeRanges[1].lowerBound = output_sizes[i]
ma_type.shapeRange.sizeRanges[1].upperBound = output_sizes[i]
del ma_type.shape[:]
nms = nms_spec.nonMaximumSuppression
nms.confidenceInputFeatureName = out0.name # 1x507x80
nms.coordinatesInputFeatureName = out1.name # 1x507x4
nms.confidenceOutputFeatureName = "confidence"
nms.coordinatesOutputFeatureName = "coordinates"
nms.iouThresholdInputFeatureName = "iouThreshold"
nms.confidenceThresholdInputFeatureName = "confidenceThreshold"
nms.iouThreshold = self.args.iou
nms.confidenceThreshold = self.args.conf
nms.pickTop.perClass = True
nms.stringClassLabels.vector.extend(names.values())
nms_model = ct.models.MLModel(nms_spec)
# 4. Pipeline models together
pipeline = ct.models.pipeline.Pipeline(
input_features=[
("image", ct.models.datatypes.Array(3, ny, nx)),
("iouThreshold", ct.models.datatypes.Double()),
("confidenceThreshold", ct.models.datatypes.Double()),
],
output_features=["confidence", "coordinates"],
)
pipeline.add_model(model)
pipeline.add_model(nms_model)
# Correct datatypes
pipeline.spec.description.input[0].ParseFromString(model._spec.description.input[0].SerializeToString())
pipeline.spec.description.output[0].ParseFromString(nms_model._spec.description.output[0].SerializeToString())
pipeline.spec.description.output[1].ParseFromString(nms_model._spec.description.output[1].SerializeToString())
# Update metadata
pipeline.spec.specificationVersion = 5
pipeline.spec.description.metadata.userDefined.update(
{"IoU threshold": str(nms.iouThreshold), "Confidence threshold": str(nms.confidenceThreshold)}
)
# Save the model
model = ct.models.MLModel(pipeline.spec, weights_dir=weights_dir)
model.input_description["image"] = "Input image"
model.input_description["iouThreshold"] = f"(optional) IoU threshold override (default: {nms.iouThreshold})"
model.input_description["confidenceThreshold"] = (
f"(optional) Confidence threshold override (default: {nms.confidenceThreshold})"
)
model.output_description["confidence"] = 'Boxes × Class confidence (see user-defined metadata "classes")'
model.output_description["coordinates"] = "Boxes × [x, y, width, height] (relative to image size)"
LOGGER.info(f"{prefix} pipeline success")
return model
def add_callback(self, event: str, callback): def add_callback(self, event: str, callback):
"""Appends the given callback.""" """Appends the given callback."""
self.callbacks[event].append(callback) self.callbacks[event].append(callback)
@ -1388,6 +1507,26 @@ class Exporter:
callback(self) callback(self)
class IOSDetectModel(torch.nn.Module):
"""Wrap an Ultralytics YOLO model for Apple iOS CoreML export."""
def __init__(self, model, im):
"""Initialize the IOSDetectModel class with a YOLO model and example image."""
super().__init__()
_, _, h, w = im.shape # batch, channel, height, width
self.model = model
self.nc = len(model.names) # number of classes
if w == h:
self.normalize = 1.0 / w # scalar
else:
self.normalize = torch.tensor([1.0 / w, 1.0 / h, 1.0 / w, 1.0 / h]) # broadcast (slower, smaller)
def forward(self, x):
"""Normalize predictions of object detection model with input size-dependent factors."""
xywh, cls = self.model(x)[0].transpose(0, 1).split((4, self.nc), 1)
return cls, xywh * self.normalize # confidence (3780, 80), coordinates (3780, 4)
class NMSModel(torch.nn.Module): class NMSModel(torch.nn.Module):
"""Model wrapper with embedded NMS for Detect, Segment, Pose and OBB.""" """Model wrapper with embedded NMS for Detect, Segment, Pose and OBB."""
@ -1446,8 +1585,7 @@ class NMSModel(torch.nn.Module):
box = xywh2xyxy(box) box = xywh2xyxy(box)
if self.is_tf: if self.is_tf:
# TFlite bug returns less boxes # TFlite bug returns less boxes
pad = torch.zeros((mask.shape[0] - box.shape[0], box.shape[-1]), device=box.device, dtype=box.dtype) box = torch.nn.functional.pad(box, (0, 0, 0, mask.shape[0] - box.shape[0]))
box = torch.cat((box, pad))
nmsbox = box.clone() nmsbox = box.clone()
# `8` is the minimum value experimented to get correct NMS results for obb # `8` is the minimum value experimented to get correct NMS results for obb
multiplier = 8 if self.obb else 1 multiplier = 8 if self.obb else 1
@ -1484,6 +1622,6 @@ class NMSModel(torch.nn.Module):
[box[keep], score[keep].view(-1, 1), cls[keep].view(-1, 1).to(out.dtype), extra[keep]], dim=-1 [box[keep], score[keep].view(-1, 1), cls[keep].view(-1, 1).to(out.dtype), extra[keep]], dim=-1
) )
# Zero-pad to max_det size to avoid reshape error # Zero-pad to max_det size to avoid reshape error
pad = torch.zeros((self.args.max_det - dets.shape[0], out.shape[-1]), device=out.device, dtype=out.dtype) pad = (0, 0, 0, self.args.max_det - dets.shape[0])
out[i] = torch.cat((dets, pad)) out[i] = torch.nn.functional.pad(dets, pad)
return (out, preds[1]) if self.model.task == "segment" else out return (out, preds[1]) if self.model.task == "segment" else out

View file

@ -640,10 +640,14 @@ class AutoBackend(nn.Module):
y = self.model.predict({"image": im_pil}) # coordinates are xywh normalized y = self.model.predict({"image": im_pil}) # coordinates are xywh normalized
if "confidence" in y: if "confidence" in y:
raise TypeError( raise TypeError(
"'model={w}' has an NMS pipeline created by an older version of Ultralytics. " "Ultralytics only supports inference of non-pipelined CoreML models exported with "
"CoreML inference with NMS is only supported for models exported with latest Ultralytics. " f"'nms=False', but 'model={w}' has an NMS pipeline created by an 'nms=True' export."
"You may export the model again with latest Ultralytics to resolve this."
) )
# TODO: CoreML NMS inference handling
# from ultralytics.utils.ops import xywh2xyxy
# box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels
# conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float32)
# y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
y = list(y.values()) y = list(y.values())
if len(y) == 2 and len(y[1].shape) != 4: # segmentation model if len(y) == 2 and len(y[1].shape) != 4: # segmentation model
y = list(reversed(y)) # reversed for segmentation models (pred, proto) y = list(reversed(y)) # reversed for segmentation models (pred, proto)

View file

@ -441,9 +441,12 @@ def xywh2xyxy(x):
y (np.ndarray | torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format. y (np.ndarray | torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.
""" """
assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}" assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
y = empty_like(x) # faster than clone/copy
xy = x[..., :2] # centers xy = x[..., :2] # centers
wh = x[..., 2:] / 2 # half width-height wh = x[..., 2:] / 2 # half width-height
return (np.concatenate if isinstance(x, np.ndarray) else torch.cat)((xy - wh, xy + wh), -1) y[..., :2] = xy - wh # top left xy
y[..., 2:] = xy + wh # bottom right xy
return y
def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0): def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):