ultralytics 8.3.67 NMS Export for Detect, Segment, Pose and OBB YOLO models (#18484)

Signed-off-by: Mohammed Yasin <32206511+Y-T-G@users.noreply.github.com> Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: UltralyticsAssistant <web@ultralytics.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: Laughing <61612323+Laughing-q@users.noreply.github.com> Co-authored-by: Laughing-q <1185102784@qq.com> Co-authored-by: Ultralytics Assistant <135830346+UltralyticsAssistant@users.noreply.github.com>
2025-01-24 18:00:36 +08:00 · 2025-01-24 18:00:36 +08:00 · 9181ff62f5
commit 9181ff62f5
parent 0e48a00303
17 changed files with 320 additions and 208 deletions
--- a/ultralytics/models/nas/val.py
+++ b/ultralytics/models/nas/val.py
@ -38,13 +38,7 @@ class NASValidator(DetectionValidator):
        """Apply Non-maximum suppression to prediction outputs."""
        boxes = ops.xyxy2xywh(preds_in[0][0])
        preds = torch.cat((boxes, preds_in[0][1]), -1).permute(0, 2, 1)
-        return ops.non_max_suppression(
+        return super().postprocess(
            preds,
-            self.args.conf,
-            self.args.iou,
-            labels=self.lb,
-            multi_label=False,
-            agnostic=self.args.single_cls or self.args.agnostic_nms,
-            max_det=self.args.max_det,
            max_time_img=0.5,
        )
--- a/ultralytics/models/yolo/detect/predict.py
+++ b/ultralytics/models/yolo/detect/predict.py
@ -20,22 +20,54 @@ class DetectionPredictor(BasePredictor):
        ```
    """

-    def postprocess(self, preds, img, orig_imgs):
+    def postprocess(self, preds, img, orig_imgs, **kwargs):
        """Post-processes predictions and returns a list of Results objects."""
        preds = ops.non_max_suppression(
            preds,
            self.args.conf,
            self.args.iou,
-            agnostic=self.args.agnostic_nms,
+            self.args.classes,
+            self.args.agnostic_nms,
            max_det=self.args.max_det,
-            classes=self.args.classes,
+            nc=len(self.model.names),
+            end2end=getattr(self.model, "end2end", False),
+            rotated=self.args.task == "obb",
        )

        if not isinstance(orig_imgs, list):  # input images are a torch.Tensor, not a list
            orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)

-        results = []
-        for pred, orig_img, img_path in zip(preds, orig_imgs, self.batch[0]):
-            pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
-            results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred))
-        return results
+        return self.construct_results(preds, img, orig_imgs, **kwargs)
+
+    def construct_results(self, preds, img, orig_imgs):
+        """
+        Constructs a list of result objects from the predictions.
+
+        Args:
+            preds (List[torch.Tensor]): List of predicted bounding boxes and scores.
+            img (torch.Tensor): The image after preprocessing.
+            orig_imgs (List[np.ndarray]): List of original images before preprocessing.
+
+        Returns:
+            (list): List of result objects containing the original images, image paths, class names, and bounding boxes.
+        """
+        return [
+            self.construct_result(pred, img, orig_img, img_path)
+            for pred, orig_img, img_path in zip(preds, orig_imgs, self.batch[0])
+        ]
+
+    def construct_result(self, pred, img, orig_img, img_path):
+        """
+        Constructs the result object from the prediction.
+
+        Args:
+            pred (torch.Tensor): The predicted bounding boxes and scores.
+            img (torch.Tensor): The image after preprocessing.
+            orig_img (np.ndarray): The original image before preprocessing.
+            img_path (str): The path to the original image.
+
+        Returns:
+            (Results): The result object containing the original image, image path, class names, and bounding boxes.
+        """
+        pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
+        return Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6])
--- a/ultralytics/models/yolo/detect/val.py
+++ b/ultralytics/models/yolo/detect/val.py
@ -78,6 +78,7 @@ class DetectionValidator(BaseValidator):
        self.args.save_json |= self.args.val and (self.is_coco or self.is_lvis) and not self.training  # run final val
        self.names = model.names
        self.nc = len(model.names)
+        self.end2end = getattr(model, "end2end", False)
        self.metrics.names = self.names
        self.metrics.plot = self.args.plots
        self.confusion_matrix = ConfusionMatrix(nc=self.nc, conf=self.args.conf)
@ -96,9 +97,12 @@ class DetectionValidator(BaseValidator):
            self.args.conf,
            self.args.iou,
            labels=self.lb,
+            nc=self.nc,
            multi_label=True,
            agnostic=self.args.single_cls or self.args.agnostic_nms,
            max_det=self.args.max_det,
+            end2end=self.end2end,
+            rotated=self.args.task == "obb",
        )

    def _prepare_batch(self, si, batch):
--- a/ultralytics/models/yolo/obb/predict.py
+++ b/ultralytics/models/yolo/obb/predict.py
@ -27,27 +27,20 @@ class OBBPredictor(DetectionPredictor):
        super().__init__(cfg, overrides, _callbacks)
        self.args.task = "obb"

-    def postprocess(self, preds, img, orig_imgs):
-        """Post-processes predictions and returns a list of Results objects."""
-        preds = ops.non_max_suppression(
-            preds,
-            self.args.conf,
-            self.args.iou,
-            agnostic=self.args.agnostic_nms,
-            max_det=self.args.max_det,
-            nc=len(self.model.names),
-            classes=self.args.classes,
-            rotated=True,
-        )
+    def construct_result(self, pred, img, orig_img, img_path):
+        """
+        Constructs the result object from the prediction.

-        if not isinstance(orig_imgs, list):  # input images are a torch.Tensor, not a list
-            orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
+        Args:
+            pred (torch.Tensor): The predicted bounding boxes, scores, and rotation angles.
+            img (torch.Tensor): The image after preprocessing.
+            orig_img (np.ndarray): The original image before preprocessing.
+            img_path (str): The path to the original image.

-        results = []
-        for pred, orig_img, img_path in zip(preds, orig_imgs, self.batch[0]):
-            rboxes = ops.regularize_rboxes(torch.cat([pred[:, :4], pred[:, -1:]], dim=-1))
-            rboxes[:, :4] = ops.scale_boxes(img.shape[2:], rboxes[:, :4], orig_img.shape, xywh=True)
-            # xywh, r, conf, cls
-            obb = torch.cat([rboxes, pred[:, 4:6]], dim=-1)
-            results.append(Results(orig_img, path=img_path, names=self.model.names, obb=obb))
-        return results
+        Returns:
+            (Results): The result object containing the original image, image path, class names, and oriented bounding boxes.
+        """
+        rboxes = ops.regularize_rboxes(torch.cat([pred[:, :4], pred[:, -1:]], dim=-1))
+        rboxes[:, :4] = ops.scale_boxes(img.shape[2:], rboxes[:, :4], orig_img.shape, xywh=True)
+        obb = torch.cat([rboxes, pred[:, 4:6]], dim=-1)
+        return Results(orig_img, path=img_path, names=self.model.names, obb=obb)
--- a/ultralytics/models/yolo/obb/val.py
+++ b/ultralytics/models/yolo/obb/val.py
@ -36,20 +36,6 @@ class OBBValidator(DetectionValidator):
        val = self.data.get(self.args.split, "")  # validation path
        self.is_dota = isinstance(val, str) and "DOTA" in val  # is COCO

-    def postprocess(self, preds):
-        """Apply Non-maximum suppression to prediction outputs."""
-        return ops.non_max_suppression(
-            preds,
-            self.args.conf,
-            self.args.iou,
-            labels=self.lb,
-            nc=self.nc,
-            multi_label=True,
-            agnostic=self.args.single_cls or self.args.agnostic_nms,
-            max_det=self.args.max_det,
-            rotated=True,
-        )
-
    def _process_batch(self, detections, gt_bboxes, gt_cls):
        """
        Perform computation of the correct prediction matrix for a batch of detections and ground truth bounding boxes.
--- a/ultralytics/models/yolo/pose/predict.py
+++ b/ultralytics/models/yolo/pose/predict.py
@ -1,6 +1,5 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license

-from ultralytics.engine.results import Results
 from ultralytics.models.yolo.detect.predict import DetectionPredictor
 from ultralytics.utils import DEFAULT_CFG, LOGGER, ops

@ -30,27 +29,21 @@ class PosePredictor(DetectionPredictor):
                "See https://github.com/ultralytics/ultralytics/issues/4031."
            )

-    def postprocess(self, preds, img, orig_imgs):
-        """Return detection results for a given input image or list of images."""
-        preds = ops.non_max_suppression(
-            preds,
-            self.args.conf,
-            self.args.iou,
-            agnostic=self.args.agnostic_nms,
-            max_det=self.args.max_det,
-            classes=self.args.classes,
-            nc=len(self.model.names),
-        )
+    def construct_result(self, pred, img, orig_img, img_path):
+        """
+        Constructs the result object from the prediction.

-        if not isinstance(orig_imgs, list):  # input images are a torch.Tensor, not a list
-            orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
+        Args:
+            pred (torch.Tensor): The predicted bounding boxes, scores, and keypoints.
+            img (torch.Tensor): The image after preprocessing.
+            orig_img (np.ndarray): The original image before preprocessing.
+            img_path (str): The path to the original image.

-        results = []
-        for pred, orig_img, img_path in zip(preds, orig_imgs, self.batch[0]):
-            pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape).round()
-            pred_kpts = pred[:, 6:].view(len(pred), *self.model.kpt_shape) if len(pred) else pred[:, 6:]
-            pred_kpts = ops.scale_coords(img.shape[2:], pred_kpts, orig_img.shape)
-            results.append(
-                Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], keypoints=pred_kpts)
-            )
-        return results
+        Returns:
+            (Results): The result object containing the original image, image path, class names, bounding boxes, and keypoints.
+        """
+        result = super().construct_result(pred, img, orig_img, img_path)
+        pred_kpts = pred[:, 6:].view(len(pred), *self.model.kpt_shape) if len(pred) else pred[:, 6:]
+        pred_kpts = ops.scale_coords(img.shape[2:], pred_kpts, orig_img.shape)
+        result.update(keypoints=pred_kpts)
+        return result
--- a/ultralytics/models/yolo/pose/val.py
+++ b/ultralytics/models/yolo/pose/val.py
@ -61,19 +61,6 @@ class PoseValidator(DetectionValidator):
            "mAP50-95)",
        )

-    def postprocess(self, preds):
-        """Apply non-maximum suppression and return detections with high confidence scores."""
-        return ops.non_max_suppression(
-            preds,
-            self.args.conf,
-            self.args.iou,
-            labels=self.lb,
-            multi_label=True,
-            agnostic=self.args.single_cls or self.args.agnostic_nms,
-            max_det=self.args.max_det,
-            nc=self.nc,
-        )
-
    def init_metrics(self, model):
        """Initiate pose estimation metrics for YOLO model."""
        super().init_metrics(model)
--- a/ultralytics/models/yolo/segment/predict.py
+++ b/ultralytics/models/yolo/segment/predict.py
@ -27,29 +27,48 @@ class SegmentationPredictor(DetectionPredictor):

    def postprocess(self, preds, img, orig_imgs):
        """Applies non-max suppression and processes detections for each image in an input batch."""
-        p = ops.non_max_suppression(
-            preds[0],
-            self.args.conf,
-            self.args.iou,
-            agnostic=self.args.agnostic_nms,
-            max_det=self.args.max_det,
-            nc=len(self.model.names),
-            classes=self.args.classes,
-        )
+        # tuple if PyTorch model or array if exported
+        protos = preds[1][-1] if isinstance(preds[1], tuple) else preds[1]
+        return super().postprocess(preds[0], img, orig_imgs, protos=protos)

-        if not isinstance(orig_imgs, list):  # input images are a torch.Tensor, not a list
-            orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
+    def construct_results(self, preds, img, orig_imgs, protos):
+        """
+        Constructs a list of result objects from the predictions.

-        results = []
-        proto = preds[1][-1] if isinstance(preds[1], tuple) else preds[1]  # tuple if PyTorch model or array if exported
-        for i, (pred, orig_img, img_path) in enumerate(zip(p, orig_imgs, self.batch[0])):
-            if not len(pred):  # save empty boxes
-                masks = None
-            elif self.args.retina_masks:
-                pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
-                masks = ops.process_mask_native(proto[i], pred[:, 6:], pred[:, :4], orig_img.shape[:2])  # HWC
-            else:
-                masks = ops.process_mask(proto[i], pred[:, 6:], pred[:, :4], img.shape[2:], upsample=True)  # HWC
-                pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
-            results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], masks=masks))
-        return results
+        Args:
+            preds (List[torch.Tensor]): List of predicted bounding boxes, scores, and masks.
+            img (torch.Tensor): The image after preprocessing.
+            orig_imgs (List[np.ndarray]): List of original images before preprocessing.
+            protos (List[torch.Tensor]): List of prototype masks.
+
+        Returns:
+            (list): List of result objects containing the original images, image paths, class names, bounding boxes, and masks.
+        """
+        return [
+            self.construct_result(pred, img, orig_img, img_path, proto)
+            for pred, orig_img, img_path, proto in zip(preds, orig_imgs, self.batch[0], protos)
+        ]
+
+    def construct_result(self, pred, img, orig_img, img_path, proto):
+        """
+        Constructs the result object from the prediction.
+
+        Args:
+            pred (np.ndarray): The predicted bounding boxes, scores, and masks.
+            img (torch.Tensor): The image after preprocessing.
+            orig_img (np.ndarray): The original image before preprocessing.
+            img_path (str): The path to the original image.
+            proto (torch.Tensor): The prototype masks.
+
+        Returns:
+            (Results): The result object containing the original image, image path, class names, bounding boxes, and masks.
+        """
+        if not len(pred):  # save empty boxes
+            masks = None
+        elif self.args.retina_masks:
+            pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
+            masks = ops.process_mask_native(proto, pred[:, 6:], pred[:, :4], orig_img.shape[:2])  # HWC
+        else:
+            masks = ops.process_mask(proto, pred[:, 6:], pred[:, :4], img.shape[2:], upsample=True)  # HWC
+            pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
+        return Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], masks=masks)
--- a/ultralytics/models/yolo/segment/val.py
+++ b/ultralytics/models/yolo/segment/val.py
@ -70,16 +70,7 @@ class SegmentationValidator(DetectionValidator):

    def postprocess(self, preds):
        """Post-processes YOLO predictions and returns output detections with proto."""
-        p = ops.non_max_suppression(
-            preds[0],
-            self.args.conf,
-            self.args.iou,
-            labels=self.lb,
-            multi_label=True,
-            agnostic=self.args.single_cls or self.args.agnostic_nms,
-            max_det=self.args.max_det,
-            nc=self.nc,
-        )
+        p = super().postprocess(preds[0])
        proto = preds[1][-1] if len(preds[1]) == 3 else preds[1]  # second output is len 3 if pt, but only 1 if exported
        return p, proto