ultralytics 8.0.79 expand Docs reference section (#2053)

Co-authored-by: Ayush Chaurasia <ayush.chaurarsia@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Fri3dChicken <87434761+AmoghDhaliwal@users.noreply.github.com>
2023-04-16 12:28:12 +02:00 · 2023-04-16 12:28:12 +02:00 · 31db8ed163
commit 31db8ed163
parent 47bd8b433b
106 changed files with 2570 additions and 529 deletions
--- a/ultralytics/yolo/utils/tal.py
+++ b/ultralytics/yolo/utils/tal.py
@ -54,6 +54,19 @@ def select_highest_overlaps(mask_pos, overlaps, n_max_boxes):


 class TaskAlignedAssigner(nn.Module):
+    """
+    A task-aligned assigner for object detection.
+
+    This class assigns ground-truth (gt) objects to anchors based on the task-aligned metric,
+    which combines both classification and localization information.
+
+    Attributes:
+        topk (int): The number of top candidates to consider.
+        num_classes (int): The number of object classes.
+        alpha (float): The alpha parameter for the classification component of the task-aligned metric.
+        beta (float): The beta parameter for the localization component of the task-aligned metric.
+        eps (float): A small value to prevent division by zero.
+    """

    def __init__(self, topk=13, num_classes=80, alpha=1.0, beta=6.0, eps=1e-9):
        super().__init__()
@ -66,8 +79,9 @@ class TaskAlignedAssigner(nn.Module):

    @torch.no_grad()
    def forward(self, pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt):
-        """This code referenced to
-           https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/assigner/tal_assigner.py
+        """
+        Compute the task-aligned assignment.
+        Reference https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/assigner/tal_assigner.py

        Args:
            pd_scores (Tensor): shape(bs, num_total_anchors, num_classes)
@ -76,11 +90,13 @@ class TaskAlignedAssigner(nn.Module):
            gt_labels (Tensor): shape(bs, n_max_boxes, 1)
            gt_bboxes (Tensor): shape(bs, n_max_boxes, 4)
            mask_gt (Tensor): shape(bs, n_max_boxes, 1)
+
        Returns:
            target_labels (Tensor): shape(bs, num_total_anchors)
            target_bboxes (Tensor): shape(bs, num_total_anchors, 4)
            target_scores (Tensor): shape(bs, num_total_anchors, num_classes)
            fg_mask (Tensor): shape(bs, num_total_anchors)
+            target_gt_idx (Tensor): shape(bs, num_total_anchors)
        """
        self.bs = pd_scores.size(0)
        self.n_max_boxes = gt_bboxes.size(1)
@ -142,9 +158,19 @@ class TaskAlignedAssigner(nn.Module):

    def select_topk_candidates(self, metrics, largest=True, topk_mask=None):
        """
+        Select the top-k candidates based on the given metrics.
+
        Args:
-            metrics: (b, max_num_obj, h*w).
-            topk_mask: (b, max_num_obj, topk) or None
+            metrics (Tensor): A tensor of shape (b, max_num_obj, h*w), where b is the batch size,
+                              max_num_obj is the maximum number of objects, and h*w represents the
+                              total number of anchor points.
+            largest (bool): If True, select the largest values; otherwise, select the smallest values.
+            topk_mask (Tensor): An optional boolean tensor of shape (b, max_num_obj, topk), where
+                                topk is the number of top candidates to consider. If not provided,
+                                the top-k values are automatically computed based on the given metrics.
+
+        Returns:
+            (Tensor): A tensor of shape (b, max_num_obj, h*w) containing the selected top-k candidates.
        """

        num_anchors = metrics.shape[-1]  # h*w
@ -165,22 +191,38 @@ class TaskAlignedAssigner(nn.Module):

    def get_targets(self, gt_labels, gt_bboxes, target_gt_idx, fg_mask):
        """
+        Compute target labels, target bounding boxes, and target scores for the positive anchor points.
+
        Args:
-            gt_labels: (b, max_num_obj, 1)
-            gt_bboxes: (b, max_num_obj, 4)
-            target_gt_idx: (b, h*w)
-            fg_mask: (b, h*w)
+            gt_labels (Tensor): Ground truth labels of shape (b, max_num_obj, 1), where b is the
+                                batch size and max_num_obj is the maximum number of objects.
+            gt_bboxes (Tensor): Ground truth bounding boxes of shape (b, max_num_obj, 4).
+            target_gt_idx (Tensor): Indices of the assigned ground truth objects for positive
+                                    anchor points, with shape (b, h*w), where h*w is the total
+                                    number of anchor points.
+            fg_mask (Tensor): A boolean tensor of shape (b, h*w) indicating the positive
+                              (foreground) anchor points.
+
+        Returns:
+            (Tuple[Tensor, Tensor, Tensor]): A tuple containing the following tensors:
+                - target_labels (Tensor): Shape (b, h*w), containing the target labels for
+                                          positive anchor points.
+                - target_bboxes (Tensor): Shape (b, h*w, 4), containing the target bounding boxes
+                                          for positive anchor points.
+                - target_scores (Tensor): Shape (b, h*w, num_classes), containing the target scores
+                                          for positive anchor points, where num_classes is the number
+                                          of object classes.
        """

-        # assigned target labels, (b, 1)
+        # Assigned target labels, (b, 1)
        batch_ind = torch.arange(end=self.bs, dtype=torch.int64, device=gt_labels.device)[..., None]
        target_gt_idx = target_gt_idx + batch_ind * self.n_max_boxes  # (b, h*w)
        target_labels = gt_labels.long().flatten()[target_gt_idx]  # (b, h*w)

-        # assigned target boxes, (b, max_num_obj, 4) -> (b, h*w)
+        # Assigned target boxes, (b, max_num_obj, 4) -> (b, h*w)
        target_bboxes = gt_bboxes.view(-1, 4)[target_gt_idx]

-        # assigned target scores
+        # Assigned target scores
        target_labels.clamp(0)
        target_scores = F.one_hot(target_labels, self.num_classes)  # (b, h*w, 80)
        fg_scores_mask = fg_mask[:, :, None].repeat(1, 1, self.num_classes)  # (b, h*w, 80)