diff --git a/ultralytics/models/yolo/detect/val.py b/ultralytics/models/yolo/detect/val.py index a810c0e2..99cdab7c 100644 --- a/ultralytics/models/yolo/detect/val.py +++ b/ultralytics/models/yolo/detect/val.py @@ -202,13 +202,18 @@ class DetectionValidator(BaseValidator): Return correct prediction matrix. Args: - detections (torch.Tensor): Tensor of shape [N, 6] representing detections. - Each detection is of the format: x1, y1, x2, y2, conf, class. - labels (torch.Tensor): Tensor of shape [M, 5] representing labels. - Each label is of the format: class, x1, y1, x2, y2. + detections (torch.Tensor): Tensor of shape (N, 6) representing detections where each detection is + (x1, y1, x2, y2, conf, class). + gt_bboxes (torch.Tensor): Tensor of shape (M, 4) representing ground-truth bounding box coordinates. Each + bounding box is of the format: (x1, y1, x2, y2). + gt_cls (torch.Tensor): Tensor of shape (M,) representing target class indices. Returns: - (torch.Tensor): Correct prediction matrix of shape [N, 10] for 10 IoU levels. + (torch.Tensor): Correct prediction matrix of shape (N, 10) for 10 IoU levels. + + Note: + The function does not return any value directly usable for metrics calculation. Instead, it provides an + intermediate representation used for evaluating predictions against ground truth. """ iou = box_iou(gt_bboxes, detections[:, :4]) return self.match_predictions(detections[:, 5], gt_cls, iou) diff --git a/ultralytics/models/yolo/obb/val.py b/ultralytics/models/yolo/obb/val.py index 08c056ce..0a279f51 100644 --- a/ultralytics/models/yolo/obb/val.py +++ b/ultralytics/models/yolo/obb/val.py @@ -52,17 +52,29 @@ class OBBValidator(DetectionValidator): def _process_batch(self, detections, gt_bboxes, gt_cls): """ - Return correct prediction matrix. + Perform computation of the correct prediction matrix for a batch of detections and ground truth bounding boxes. Args: - detections (torch.Tensor): Tensor of shape [N, 7] representing detections. - Each detection is of the format: x1, y1, x2, y2, conf, class, angle. - gt_bboxes (torch.Tensor): Tensor of shape [M, 5] representing rotated boxes. - Each box is of the format: x1, y1, x2, y2, angle. - labels (torch.Tensor): Tensor of shape [M] representing labels. + detections (torch.Tensor): A tensor of shape (N, 7) representing the detected bounding boxes and associated + data. Each detection is represented as (x1, y1, x2, y2, conf, class, angle). + gt_bboxes (torch.Tensor): A tensor of shape (M, 5) representing the ground truth bounding boxes. Each box is + represented as (x1, y1, x2, y2, angle). + gt_cls (torch.Tensor): A tensor of shape (M,) representing class labels for the ground truth bounding boxes. Returns: - (torch.Tensor): Correct prediction matrix of shape [N, 10] for 10 IoU levels. + (torch.Tensor): The correct prediction matrix with shape (N, 10), which includes 10 IoU (Intersection over + Union) levels for each detection, indicating the accuracy of predictions compared to the ground truth. + + Example: + ```python + detections = torch.rand(100, 7) # 100 sample detections + gt_bboxes = torch.rand(50, 5) # 50 sample ground truth boxes + gt_cls = torch.randint(0, 5, (50,)) # 50 ground truth class labels + correct_matrix = OBBValidator._process_batch(detections, gt_bboxes, gt_cls) + ``` + + Note: + This method relies on `batch_probiou` to calculate IoU between detections and ground truth bounding boxes. """ iou = batch_probiou(gt_bboxes, torch.cat([detections[:, :4], detections[:, -1:]], dim=-1)) return self.match_predictions(detections[:, 5], gt_cls, iou) diff --git a/ultralytics/models/yolo/pose/val.py b/ultralytics/models/yolo/pose/val.py index 3516b8a2..155d57ff 100644 --- a/ultralytics/models/yolo/pose/val.py +++ b/ultralytics/models/yolo/pose/val.py @@ -152,19 +152,34 @@ class PoseValidator(DetectionValidator): def _process_batch(self, detections, gt_bboxes, gt_cls, pred_kpts=None, gt_kpts=None): """ - Return correct prediction matrix. + Return correct prediction matrix by computing Intersection over Union (IoU) between detections and ground truth. Args: - detections (torch.Tensor): Tensor of shape [N, 6] representing detections. - Each detection is of the format: x1, y1, x2, y2, conf, class. - labels (torch.Tensor): Tensor of shape [M, 5] representing labels. - Each label is of the format: class, x1, y1, x2, y2. - pred_kpts (torch.Tensor, optional): Tensor of shape [N, 51] representing predicted keypoints. - 51 corresponds to 17 keypoints each with 3 values. - gt_kpts (torch.Tensor, optional): Tensor of shape [N, 51] representing ground truth keypoints. + detections (torch.Tensor): Tensor with shape (N, 6) representing detection boxes and scores, where each + detection is of the format (x1, y1, x2, y2, conf, class). + gt_bboxes (torch.Tensor): Tensor with shape (M, 4) representing ground truth bounding boxes, where each + box is of the format (x1, y1, x2, y2). + gt_cls (torch.Tensor): Tensor with shape (M,) representing ground truth class indices. + pred_kpts (torch.Tensor | None): Optional tensor with shape (N, 51) representing predicted keypoints, where + 51 corresponds to 17 keypoints each having 3 values. + gt_kpts (torch.Tensor | None): Optional tensor with shape (N, 51) representing ground truth keypoints. Returns: - torch.Tensor: Correct prediction matrix of shape [N, 10] for 10 IoU levels. + torch.Tensor: A tensor with shape (N, 10) representing the correct prediction matrix for 10 IoU levels, + where N is the number of detections. + + Example: + ```python + detections = torch.rand(100, 6) # 100 predictions: (x1, y1, x2, y2, conf, class) + gt_bboxes = torch.rand(50, 4) # 50 ground truth boxes: (x1, y1, x2, y2) + gt_cls = torch.randint(0, 2, (50,)) # 50 ground truth class indices + pred_kpts = torch.rand(100, 51) # 100 predicted keypoints + gt_kpts = torch.rand(50, 51) # 50 ground truth keypoints + correct_preds = _process_batch(detections, gt_bboxes, gt_cls, pred_kpts, gt_kpts) + ``` + + Note: + `0.53` scale factor used in area computation is referenced from https://github.com/jin-s13/xtcocoapi/blob/master/xtcocotools/cocoeval.py#L384. """ if pred_kpts is not None and gt_kpts is not None: # `0.53` is from https://github.com/jin-s13/xtcocoapi/blob/master/xtcocotools/cocoeval.py#L384 diff --git a/ultralytics/models/yolo/segment/val.py b/ultralytics/models/yolo/segment/val.py index 79acb223..35523778 100644 --- a/ultralytics/models/yolo/segment/val.py +++ b/ultralytics/models/yolo/segment/val.py @@ -164,14 +164,34 @@ class SegmentationValidator(DetectionValidator): def _process_batch(self, detections, gt_bboxes, gt_cls, pred_masks=None, gt_masks=None, overlap=False, masks=False): """ - Return correct prediction matrix. + Compute correct prediction matrix for a batch based on bounding boxes and optional masks. Args: - detections (array[N, 6]), x1, y1, x2, y2, conf, class - labels (array[M, 5]), class, x1, y1, x2, y2 + detections (torch.Tensor): Tensor of shape (N, 6) representing detected bounding boxes and + associated confidence scores and class indices. Each row is of the format [x1, y1, x2, y2, conf, class]. + gt_bboxes (torch.Tensor): Tensor of shape (M, 4) representing ground truth bounding box coordinates. + Each row is of the format [x1, y1, x2, y2]. + gt_cls (torch.Tensor): Tensor of shape (M,) representing ground truth class indices. + pred_masks (torch.Tensor | None): Tensor representing predicted masks, if available. The shape should + match the ground truth masks. + gt_masks (torch.Tensor | None): Tensor of shape (M, H, W) representing ground truth masks, if available. + overlap (bool): Flag indicating if overlapping masks should be considered. + masks (bool): Flag indicating if the batch contains mask data. Returns: - correct (array[N, 10]), for 10 IoU levels + (torch.Tensor): A correct prediction matrix of shape (N, 10), where 10 represents different IoU levels. + + Note: + - If `masks` is True, the function computes IoU between predicted and ground truth masks. + - If `overlap` is True and `masks` is True, overlapping masks are taken into account when computing IoU. + + Example: + ```python + detections = torch.tensor([[25, 30, 200, 300, 0.8, 1], [50, 60, 180, 290, 0.75, 0]]) + gt_bboxes = torch.tensor([[24, 29, 199, 299], [55, 65, 185, 295]]) + gt_cls = torch.tensor([1, 0]) + correct_preds = validator._process_batch(detections, gt_bboxes, gt_cls) + ``` """ if masks: if overlap: