ultralytics 8.3.67 NMS Export for Detect, Segment, Pose and OBB YOLO models (#18484)

Signed-off-by: Mohammed Yasin <32206511+Y-T-G@users.noreply.github.com>
Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com>
Co-authored-by: UltralyticsAssistant <web@ultralytics.com>
Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
Co-authored-by: Laughing <61612323+Laughing-q@users.noreply.github.com>
Co-authored-by: Laughing-q <1185102784@qq.com>
Co-authored-by: Ultralytics Assistant <135830346+UltralyticsAssistant@users.noreply.github.com>
This commit is contained in:
Mohammed Yasin 2025-01-24 18:00:36 +08:00 committed by GitHub
parent 0e48a00303
commit 9181ff62f5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 320 additions and 208 deletions

View file

@ -38,13 +38,7 @@ class NASValidator(DetectionValidator):
"""Apply Non-maximum suppression to prediction outputs."""
boxes = ops.xyxy2xywh(preds_in[0][0])
preds = torch.cat((boxes, preds_in[0][1]), -1).permute(0, 2, 1)
return ops.non_max_suppression(
return super().postprocess(
preds,
self.args.conf,
self.args.iou,
labels=self.lb,
multi_label=False,
agnostic=self.args.single_cls or self.args.agnostic_nms,
max_det=self.args.max_det,
max_time_img=0.5,
)

View file

@ -20,22 +20,54 @@ class DetectionPredictor(BasePredictor):
```
"""
def postprocess(self, preds, img, orig_imgs):
def postprocess(self, preds, img, orig_imgs, **kwargs):
"""Post-processes predictions and returns a list of Results objects."""
preds = ops.non_max_suppression(
preds,
self.args.conf,
self.args.iou,
agnostic=self.args.agnostic_nms,
self.args.classes,
self.args.agnostic_nms,
max_det=self.args.max_det,
classes=self.args.classes,
nc=len(self.model.names),
end2end=getattr(self.model, "end2end", False),
rotated=self.args.task == "obb",
)
if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list
orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
results = []
for pred, orig_img, img_path in zip(preds, orig_imgs, self.batch[0]):
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred))
return results
return self.construct_results(preds, img, orig_imgs, **kwargs)
def construct_results(self, preds, img, orig_imgs):
"""
Constructs a list of result objects from the predictions.
Args:
preds (List[torch.Tensor]): List of predicted bounding boxes and scores.
img (torch.Tensor): The image after preprocessing.
orig_imgs (List[np.ndarray]): List of original images before preprocessing.
Returns:
(list): List of result objects containing the original images, image paths, class names, and bounding boxes.
"""
return [
self.construct_result(pred, img, orig_img, img_path)
for pred, orig_img, img_path in zip(preds, orig_imgs, self.batch[0])
]
def construct_result(self, pred, img, orig_img, img_path):
"""
Constructs the result object from the prediction.
Args:
pred (torch.Tensor): The predicted bounding boxes and scores.
img (torch.Tensor): The image after preprocessing.
orig_img (np.ndarray): The original image before preprocessing.
img_path (str): The path to the original image.
Returns:
(Results): The result object containing the original image, image path, class names, and bounding boxes.
"""
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
return Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6])

View file

@ -78,6 +78,7 @@ class DetectionValidator(BaseValidator):
self.args.save_json |= self.args.val and (self.is_coco or self.is_lvis) and not self.training # run final val
self.names = model.names
self.nc = len(model.names)
self.end2end = getattr(model, "end2end", False)
self.metrics.names = self.names
self.metrics.plot = self.args.plots
self.confusion_matrix = ConfusionMatrix(nc=self.nc, conf=self.args.conf)
@ -96,9 +97,12 @@ class DetectionValidator(BaseValidator):
self.args.conf,
self.args.iou,
labels=self.lb,
nc=self.nc,
multi_label=True,
agnostic=self.args.single_cls or self.args.agnostic_nms,
max_det=self.args.max_det,
end2end=self.end2end,
rotated=self.args.task == "obb",
)
def _prepare_batch(self, si, batch):

View file

@ -27,27 +27,20 @@ class OBBPredictor(DetectionPredictor):
super().__init__(cfg, overrides, _callbacks)
self.args.task = "obb"
def postprocess(self, preds, img, orig_imgs):
"""Post-processes predictions and returns a list of Results objects."""
preds = ops.non_max_suppression(
preds,
self.args.conf,
self.args.iou,
agnostic=self.args.agnostic_nms,
max_det=self.args.max_det,
nc=len(self.model.names),
classes=self.args.classes,
rotated=True,
)
def construct_result(self, pred, img, orig_img, img_path):
"""
Constructs the result object from the prediction.
if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list
orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
Args:
pred (torch.Tensor): The predicted bounding boxes, scores, and rotation angles.
img (torch.Tensor): The image after preprocessing.
orig_img (np.ndarray): The original image before preprocessing.
img_path (str): The path to the original image.
results = []
for pred, orig_img, img_path in zip(preds, orig_imgs, self.batch[0]):
rboxes = ops.regularize_rboxes(torch.cat([pred[:, :4], pred[:, -1:]], dim=-1))
rboxes[:, :4] = ops.scale_boxes(img.shape[2:], rboxes[:, :4], orig_img.shape, xywh=True)
# xywh, r, conf, cls
obb = torch.cat([rboxes, pred[:, 4:6]], dim=-1)
results.append(Results(orig_img, path=img_path, names=self.model.names, obb=obb))
return results
Returns:
(Results): The result object containing the original image, image path, class names, and oriented bounding boxes.
"""
rboxes = ops.regularize_rboxes(torch.cat([pred[:, :4], pred[:, -1:]], dim=-1))
rboxes[:, :4] = ops.scale_boxes(img.shape[2:], rboxes[:, :4], orig_img.shape, xywh=True)
obb = torch.cat([rboxes, pred[:, 4:6]], dim=-1)
return Results(orig_img, path=img_path, names=self.model.names, obb=obb)

View file

@ -36,20 +36,6 @@ class OBBValidator(DetectionValidator):
val = self.data.get(self.args.split, "") # validation path
self.is_dota = isinstance(val, str) and "DOTA" in val # is COCO
def postprocess(self, preds):
"""Apply Non-maximum suppression to prediction outputs."""
return ops.non_max_suppression(
preds,
self.args.conf,
self.args.iou,
labels=self.lb,
nc=self.nc,
multi_label=True,
agnostic=self.args.single_cls or self.args.agnostic_nms,
max_det=self.args.max_det,
rotated=True,
)
def _process_batch(self, detections, gt_bboxes, gt_cls):
"""
Perform computation of the correct prediction matrix for a batch of detections and ground truth bounding boxes.

View file

@ -1,6 +1,5 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
from ultralytics.engine.results import Results
from ultralytics.models.yolo.detect.predict import DetectionPredictor
from ultralytics.utils import DEFAULT_CFG, LOGGER, ops
@ -30,27 +29,21 @@ class PosePredictor(DetectionPredictor):
"See https://github.com/ultralytics/ultralytics/issues/4031."
)
def postprocess(self, preds, img, orig_imgs):
"""Return detection results for a given input image or list of images."""
preds = ops.non_max_suppression(
preds,
self.args.conf,
self.args.iou,
agnostic=self.args.agnostic_nms,
max_det=self.args.max_det,
classes=self.args.classes,
nc=len(self.model.names),
)
def construct_result(self, pred, img, orig_img, img_path):
"""
Constructs the result object from the prediction.
if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list
orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
Args:
pred (torch.Tensor): The predicted bounding boxes, scores, and keypoints.
img (torch.Tensor): The image after preprocessing.
orig_img (np.ndarray): The original image before preprocessing.
img_path (str): The path to the original image.
results = []
for pred, orig_img, img_path in zip(preds, orig_imgs, self.batch[0]):
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape).round()
pred_kpts = pred[:, 6:].view(len(pred), *self.model.kpt_shape) if len(pred) else pred[:, 6:]
pred_kpts = ops.scale_coords(img.shape[2:], pred_kpts, orig_img.shape)
results.append(
Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], keypoints=pred_kpts)
)
return results
Returns:
(Results): The result object containing the original image, image path, class names, bounding boxes, and keypoints.
"""
result = super().construct_result(pred, img, orig_img, img_path)
pred_kpts = pred[:, 6:].view(len(pred), *self.model.kpt_shape) if len(pred) else pred[:, 6:]
pred_kpts = ops.scale_coords(img.shape[2:], pred_kpts, orig_img.shape)
result.update(keypoints=pred_kpts)
return result

View file

@ -61,19 +61,6 @@ class PoseValidator(DetectionValidator):
"mAP50-95)",
)
def postprocess(self, preds):
"""Apply non-maximum suppression and return detections with high confidence scores."""
return ops.non_max_suppression(
preds,
self.args.conf,
self.args.iou,
labels=self.lb,
multi_label=True,
agnostic=self.args.single_cls or self.args.agnostic_nms,
max_det=self.args.max_det,
nc=self.nc,
)
def init_metrics(self, model):
"""Initiate pose estimation metrics for YOLO model."""
super().init_metrics(model)

View file

@ -27,29 +27,48 @@ class SegmentationPredictor(DetectionPredictor):
def postprocess(self, preds, img, orig_imgs):
"""Applies non-max suppression and processes detections for each image in an input batch."""
p = ops.non_max_suppression(
preds[0],
self.args.conf,
self.args.iou,
agnostic=self.args.agnostic_nms,
max_det=self.args.max_det,
nc=len(self.model.names),
classes=self.args.classes,
)
# tuple if PyTorch model or array if exported
protos = preds[1][-1] if isinstance(preds[1], tuple) else preds[1]
return super().postprocess(preds[0], img, orig_imgs, protos=protos)
if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list
orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
def construct_results(self, preds, img, orig_imgs, protos):
"""
Constructs a list of result objects from the predictions.
results = []
proto = preds[1][-1] if isinstance(preds[1], tuple) else preds[1] # tuple if PyTorch model or array if exported
for i, (pred, orig_img, img_path) in enumerate(zip(p, orig_imgs, self.batch[0])):
if not len(pred): # save empty boxes
masks = None
elif self.args.retina_masks:
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
masks = ops.process_mask_native(proto[i], pred[:, 6:], pred[:, :4], orig_img.shape[:2]) # HWC
else:
masks = ops.process_mask(proto[i], pred[:, 6:], pred[:, :4], img.shape[2:], upsample=True) # HWC
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], masks=masks))
return results
Args:
preds (List[torch.Tensor]): List of predicted bounding boxes, scores, and masks.
img (torch.Tensor): The image after preprocessing.
orig_imgs (List[np.ndarray]): List of original images before preprocessing.
protos (List[torch.Tensor]): List of prototype masks.
Returns:
(list): List of result objects containing the original images, image paths, class names, bounding boxes, and masks.
"""
return [
self.construct_result(pred, img, orig_img, img_path, proto)
for pred, orig_img, img_path, proto in zip(preds, orig_imgs, self.batch[0], protos)
]
def construct_result(self, pred, img, orig_img, img_path, proto):
"""
Constructs the result object from the prediction.
Args:
pred (np.ndarray): The predicted bounding boxes, scores, and masks.
img (torch.Tensor): The image after preprocessing.
orig_img (np.ndarray): The original image before preprocessing.
img_path (str): The path to the original image.
proto (torch.Tensor): The prototype masks.
Returns:
(Results): The result object containing the original image, image path, class names, bounding boxes, and masks.
"""
if not len(pred): # save empty boxes
masks = None
elif self.args.retina_masks:
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
masks = ops.process_mask_native(proto, pred[:, 6:], pred[:, :4], orig_img.shape[:2]) # HWC
else:
masks = ops.process_mask(proto, pred[:, 6:], pred[:, :4], img.shape[2:], upsample=True) # HWC
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
return Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], masks=masks)

View file

@ -70,16 +70,7 @@ class SegmentationValidator(DetectionValidator):
def postprocess(self, preds):
"""Post-processes YOLO predictions and returns output detections with proto."""
p = ops.non_max_suppression(
preds[0],
self.args.conf,
self.args.iou,
labels=self.lb,
multi_label=True,
agnostic=self.args.single_cls or self.args.agnostic_nms,
max_det=self.args.max_det,
nc=self.nc,
)
p = super().postprocess(preds[0])
proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported
return p, proto