ultralytics 8.0.239 Ultralytics Actions and hub-sdk adoption (#7431)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: UltralyticsAssistant <web@ultralytics.com> Co-authored-by: Burhan <62214284+Burhan-Q@users.noreply.github.com> Co-authored-by: Kayzwer <68285002+Kayzwer@users.noreply.github.com>
2024-01-10 03:16:08 +01:00 · 2024-01-10 03:16:08 +01:00 · fe27db2f6e
commit fe27db2f6e
parent e795277391
139 changed files with 6870 additions and 5125 deletions
--- a/ultralytics/models/utils/loss.py
+++ b/ultralytics/models/utils/loss.py
@ -30,14 +30,9 @@ class DETRLoss(nn.Module):
        device (torch.device): Device on which tensors are stored.
    """

-    def __init__(self,
-                 nc=80,
-                 loss_gain=None,
-                 aux_loss=True,
-                 use_fl=True,
-                 use_vfl=False,
-                 use_uni_match=False,
-                 uni_match_ind=0):
+    def __init__(
+        self, nc=80, loss_gain=None, aux_loss=True, use_fl=True, use_vfl=False, use_uni_match=False, uni_match_ind=0
+    ):
        """
        DETR loss function.

@ -52,9 +47,9 @@ class DETRLoss(nn.Module):
        super().__init__()

        if loss_gain is None:
-            loss_gain = {'class': 1, 'bbox': 5, 'giou': 2, 'no_object': 0.1, 'mask': 1, 'dice': 1}
+            loss_gain = {"class": 1, "bbox": 5, "giou": 2, "no_object": 0.1, "mask": 1, "dice": 1}
        self.nc = nc
-        self.matcher = HungarianMatcher(cost_gain={'class': 2, 'bbox': 5, 'giou': 2})
+        self.matcher = HungarianMatcher(cost_gain={"class": 2, "bbox": 5, "giou": 2})
        self.loss_gain = loss_gain
        self.aux_loss = aux_loss
        self.fl = FocalLoss() if use_fl else None
@ -64,10 +59,10 @@ class DETRLoss(nn.Module):
        self.uni_match_ind = uni_match_ind
        self.device = None

-    def _get_loss_class(self, pred_scores, targets, gt_scores, num_gts, postfix=''):
+    def _get_loss_class(self, pred_scores, targets, gt_scores, num_gts, postfix=""):
        """Computes the classification loss based on predictions, target values, and ground truth scores."""
        # Logits: [b, query, num_classes], gt_class: list[[n, 1]]
-        name_class = f'loss_class{postfix}'
+        name_class = f"loss_class{postfix}"
        bs, nq = pred_scores.shape[:2]
        # one_hot = F.one_hot(targets, self.nc + 1)[..., :-1]  # (bs, num_queries, num_classes)
        one_hot = torch.zeros((bs, nq, self.nc + 1), dtype=torch.int64, device=targets.device)
@ -82,28 +77,28 @@ class DETRLoss(nn.Module):
                loss_cls = self.fl(pred_scores, one_hot.float())
            loss_cls /= max(num_gts, 1) / nq
        else:
-            loss_cls = nn.BCEWithLogitsLoss(reduction='none')(pred_scores, gt_scores).mean(1).sum()  # YOLO CLS loss
+            loss_cls = nn.BCEWithLogitsLoss(reduction="none")(pred_scores, gt_scores).mean(1).sum()  # YOLO CLS loss

-        return {name_class: loss_cls.squeeze() * self.loss_gain['class']}
+        return {name_class: loss_cls.squeeze() * self.loss_gain["class"]}

-    def _get_loss_bbox(self, pred_bboxes, gt_bboxes, postfix=''):
+    def _get_loss_bbox(self, pred_bboxes, gt_bboxes, postfix=""):
        """Calculates and returns the bounding box loss and GIoU loss for the predicted and ground truth bounding
        boxes.
        """
        # Boxes: [b, query, 4], gt_bbox: list[[n, 4]]
-        name_bbox = f'loss_bbox{postfix}'
-        name_giou = f'loss_giou{postfix}'
+        name_bbox = f"loss_bbox{postfix}"
+        name_giou = f"loss_giou{postfix}"

        loss = {}
        if len(gt_bboxes) == 0:
-            loss[name_bbox] = torch.tensor(0., device=self.device)
-            loss[name_giou] = torch.tensor(0., device=self.device)
+            loss[name_bbox] = torch.tensor(0.0, device=self.device)
+            loss[name_giou] = torch.tensor(0.0, device=self.device)
            return loss

-        loss[name_bbox] = self.loss_gain['bbox'] * F.l1_loss(pred_bboxes, gt_bboxes, reduction='sum') / len(gt_bboxes)
+        loss[name_bbox] = self.loss_gain["bbox"] * F.l1_loss(pred_bboxes, gt_bboxes, reduction="sum") / len(gt_bboxes)
        loss[name_giou] = 1.0 - bbox_iou(pred_bboxes, gt_bboxes, xywh=True, GIoU=True)
        loss[name_giou] = loss[name_giou].sum() / len(gt_bboxes)
-        loss[name_giou] = self.loss_gain['giou'] * loss[name_giou]
+        loss[name_giou] = self.loss_gain["giou"] * loss[name_giou]
        return {k: v.squeeze() for k, v in loss.items()}

    # This function is for future RT-DETR Segment models
@ -137,50 +132,57 @@ class DETRLoss(nn.Module):
    #     loss = 1 - (numerator + 1) / (denominator + 1)
    #     return loss.sum() / num_gts

-    def _get_loss_aux(self,
-                      pred_bboxes,
-                      pred_scores,
-                      gt_bboxes,
-                      gt_cls,
-                      gt_groups,
-                      match_indices=None,
-                      postfix='',
-                      masks=None,
-                      gt_mask=None):
+    def _get_loss_aux(
+        self,
+        pred_bboxes,
+        pred_scores,
+        gt_bboxes,
+        gt_cls,
+        gt_groups,
+        match_indices=None,
+        postfix="",
+        masks=None,
+        gt_mask=None,
+    ):
        """Get auxiliary losses."""
        # NOTE: loss class, bbox, giou, mask, dice
        loss = torch.zeros(5 if masks is not None else 3, device=pred_bboxes.device)
        if match_indices is None and self.use_uni_match:
-            match_indices = self.matcher(pred_bboxes[self.uni_match_ind],
-                                         pred_scores[self.uni_match_ind],
-                                         gt_bboxes,
-                                         gt_cls,
-                                         gt_groups,
-                                         masks=masks[self.uni_match_ind] if masks is not None else None,
-                                         gt_mask=gt_mask)
+            match_indices = self.matcher(
+                pred_bboxes[self.uni_match_ind],
+                pred_scores[self.uni_match_ind],
+                gt_bboxes,
+                gt_cls,
+                gt_groups,
+                masks=masks[self.uni_match_ind] if masks is not None else None,
+                gt_mask=gt_mask,
+            )
        for i, (aux_bboxes, aux_scores) in enumerate(zip(pred_bboxes, pred_scores)):
            aux_masks = masks[i] if masks is not None else None
-            loss_ = self._get_loss(aux_bboxes,
-                                   aux_scores,
-                                   gt_bboxes,
-                                   gt_cls,
-                                   gt_groups,
-                                   masks=aux_masks,
-                                   gt_mask=gt_mask,
-                                   postfix=postfix,
-                                   match_indices=match_indices)
-            loss[0] += loss_[f'loss_class{postfix}']
-            loss[1] += loss_[f'loss_bbox{postfix}']
-            loss[2] += loss_[f'loss_giou{postfix}']
+            loss_ = self._get_loss(
+                aux_bboxes,
+                aux_scores,
+                gt_bboxes,
+                gt_cls,
+                gt_groups,
+                masks=aux_masks,
+                gt_mask=gt_mask,
+                postfix=postfix,
+                match_indices=match_indices,
+            )
+            loss[0] += loss_[f"loss_class{postfix}"]
+            loss[1] += loss_[f"loss_bbox{postfix}"]
+            loss[2] += loss_[f"loss_giou{postfix}"]
            # if masks is not None and gt_mask is not None:
            #     loss_ = self._get_loss_mask(aux_masks, gt_mask, match_indices, postfix)
            #     loss[3] += loss_[f'loss_mask{postfix}']
            #     loss[4] += loss_[f'loss_dice{postfix}']

        loss = {
-            f'loss_class_aux{postfix}': loss[0],
-            f'loss_bbox_aux{postfix}': loss[1],
-            f'loss_giou_aux{postfix}': loss[2]}
+            f"loss_class_aux{postfix}": loss[0],
+            f"loss_bbox_aux{postfix}": loss[1],
+            f"loss_giou_aux{postfix}": loss[2],
+        }
        # if masks is not None and gt_mask is not None:
        #     loss[f'loss_mask_aux{postfix}'] = loss[3]
        #     loss[f'loss_dice_aux{postfix}'] = loss[4]
@ -196,33 +198,37 @@ class DETRLoss(nn.Module):

    def _get_assigned_bboxes(self, pred_bboxes, gt_bboxes, match_indices):
        """Assigns predicted bounding boxes to ground truth bounding boxes based on the match indices."""
-        pred_assigned = torch.cat([
-            t[I] if len(I) > 0 else torch.zeros(0, t.shape[-1], device=self.device)
-            for t, (I, _) in zip(pred_bboxes, match_indices)])
-        gt_assigned = torch.cat([
-            t[J] if len(J) > 0 else torch.zeros(0, t.shape[-1], device=self.device)
-            for t, (_, J) in zip(gt_bboxes, match_indices)])
+        pred_assigned = torch.cat(
+            [
+                t[I] if len(I) > 0 else torch.zeros(0, t.shape[-1], device=self.device)
+                for t, (I, _) in zip(pred_bboxes, match_indices)
+            ]
+        )
+        gt_assigned = torch.cat(
+            [
+                t[J] if len(J) > 0 else torch.zeros(0, t.shape[-1], device=self.device)
+                for t, (_, J) in zip(gt_bboxes, match_indices)
+            ]
+        )
        return pred_assigned, gt_assigned

-    def _get_loss(self,
-                  pred_bboxes,
-                  pred_scores,
-                  gt_bboxes,
-                  gt_cls,
-                  gt_groups,
-                  masks=None,
-                  gt_mask=None,
-                  postfix='',
-                  match_indices=None):
+    def _get_loss(
+        self,
+        pred_bboxes,
+        pred_scores,
+        gt_bboxes,
+        gt_cls,
+        gt_groups,
+        masks=None,
+        gt_mask=None,
+        postfix="",
+        match_indices=None,
+    ):
        """Get losses."""
        if match_indices is None:
-            match_indices = self.matcher(pred_bboxes,
-                                         pred_scores,
-                                         gt_bboxes,
-                                         gt_cls,
-                                         gt_groups,
-                                         masks=masks,
-                                         gt_mask=gt_mask)
+            match_indices = self.matcher(
+                pred_bboxes, pred_scores, gt_bboxes, gt_cls, gt_groups, masks=masks, gt_mask=gt_mask
+            )

        idx, gt_idx = self._get_index(match_indices)
        pred_bboxes, gt_bboxes = pred_bboxes[idx], gt_bboxes[gt_idx]
@ -242,7 +248,7 @@ class DETRLoss(nn.Module):
        #     loss.update(self._get_loss_mask(masks, gt_mask, match_indices, postfix))
        return loss

-    def forward(self, pred_bboxes, pred_scores, batch, postfix='', **kwargs):
+    def forward(self, pred_bboxes, pred_scores, batch, postfix="", **kwargs):
        """
        Args:
            pred_bboxes (torch.Tensor): [l, b, query, 4]
@ -254,21 +260,19 @@ class DETRLoss(nn.Module):
            postfix (str): postfix of loss name.
        """
        self.device = pred_bboxes.device
-        match_indices = kwargs.get('match_indices', None)
-        gt_cls, gt_bboxes, gt_groups = batch['cls'], batch['bboxes'], batch['gt_groups']
+        match_indices = kwargs.get("match_indices", None)
+        gt_cls, gt_bboxes, gt_groups = batch["cls"], batch["bboxes"], batch["gt_groups"]

-        total_loss = self._get_loss(pred_bboxes[-1],
-                                    pred_scores[-1],
-                                    gt_bboxes,
-                                    gt_cls,
-                                    gt_groups,
-                                    postfix=postfix,
-                                    match_indices=match_indices)
+        total_loss = self._get_loss(
+            pred_bboxes[-1], pred_scores[-1], gt_bboxes, gt_cls, gt_groups, postfix=postfix, match_indices=match_indices
+        )

        if self.aux_loss:
            total_loss.update(
-                self._get_loss_aux(pred_bboxes[:-1], pred_scores[:-1], gt_bboxes, gt_cls, gt_groups, match_indices,
-                                   postfix))
+                self._get_loss_aux(
+                    pred_bboxes[:-1], pred_scores[:-1], gt_bboxes, gt_cls, gt_groups, match_indices, postfix
+                )
+            )

        return total_loss

@ -300,18 +304,18 @@ class RTDETRDetectionLoss(DETRLoss):

        # Check for denoising metadata to compute denoising training loss
        if dn_meta is not None:
-            dn_pos_idx, dn_num_group = dn_meta['dn_pos_idx'], dn_meta['dn_num_group']
-            assert len(batch['gt_groups']) == len(dn_pos_idx)
+            dn_pos_idx, dn_num_group = dn_meta["dn_pos_idx"], dn_meta["dn_num_group"]
+            assert len(batch["gt_groups"]) == len(dn_pos_idx)

            # Get the match indices for denoising
-            match_indices = self.get_dn_match_indices(dn_pos_idx, dn_num_group, batch['gt_groups'])
+            match_indices = self.get_dn_match_indices(dn_pos_idx, dn_num_group, batch["gt_groups"])

            # Compute the denoising training loss
-            dn_loss = super().forward(dn_bboxes, dn_scores, batch, postfix='_dn', match_indices=match_indices)
+            dn_loss = super().forward(dn_bboxes, dn_scores, batch, postfix="_dn", match_indices=match_indices)
            total_loss.update(dn_loss)
        else:
            # If no denoising metadata is provided, set denoising loss to zero
-            total_loss.update({f'{k}_dn': torch.tensor(0., device=self.device) for k in total_loss.keys()})
+            total_loss.update({f"{k}_dn": torch.tensor(0.0, device=self.device) for k in total_loss.keys()})

        return total_loss

@ -334,8 +338,8 @@ class RTDETRDetectionLoss(DETRLoss):
            if num_gt > 0:
                gt_idx = torch.arange(end=num_gt, dtype=torch.long) + idx_groups[i]
                gt_idx = gt_idx.repeat(dn_num_group)
-                assert len(dn_pos_idx[i]) == len(gt_idx), 'Expected the same length, '
-                f'but got {len(dn_pos_idx[i])} and {len(gt_idx)} respectively.'
+                assert len(dn_pos_idx[i]) == len(gt_idx), "Expected the same length, "
+                f"but got {len(dn_pos_idx[i])} and {len(gt_idx)} respectively."
                dn_match_indices.append((dn_pos_idx[i], gt_idx))
            else:
                dn_match_indices.append((torch.zeros([0], dtype=torch.long), torch.zeros([0], dtype=torch.long)))
--- a/ultralytics/models/utils/ops.py
+++ b/ultralytics/models/utils/ops.py
@ -37,7 +37,7 @@ class HungarianMatcher(nn.Module):
        """
        super().__init__()
        if cost_gain is None:
-            cost_gain = {'class': 1, 'bbox': 5, 'giou': 2, 'mask': 1, 'dice': 1}
+            cost_gain = {"class": 1, "bbox": 5, "giou": 2, "mask": 1, "dice": 1}
        self.cost_gain = cost_gain
        self.use_fl = use_fl
        self.with_mask = with_mask
@ -86,7 +86,7 @@ class HungarianMatcher(nn.Module):
        # Compute the classification cost
        pred_scores = pred_scores[:, gt_cls]
        if self.use_fl:
-            neg_cost_class = (1 - self.alpha) * (pred_scores ** self.gamma) * (-(1 - pred_scores + 1e-8).log())
+            neg_cost_class = (1 - self.alpha) * (pred_scores**self.gamma) * (-(1 - pred_scores + 1e-8).log())
            pos_cost_class = self.alpha * ((1 - pred_scores) ** self.gamma) * (-(pred_scores + 1e-8).log())
            cost_class = pos_cost_class - neg_cost_class
        else:
@ -99,9 +99,11 @@ class HungarianMatcher(nn.Module):
        cost_giou = 1.0 - bbox_iou(pred_bboxes.unsqueeze(1), gt_bboxes.unsqueeze(0), xywh=True, GIoU=True).squeeze(-1)

        # Final cost matrix
-        C = self.cost_gain['class'] * cost_class + \
-            self.cost_gain['bbox'] * cost_bbox + \
-            self.cost_gain['giou'] * cost_giou
+        C = (
+            self.cost_gain["class"] * cost_class
+            + self.cost_gain["bbox"] * cost_bbox
+            + self.cost_gain["giou"] * cost_giou
+        )
        # Compute the mask cost and dice cost
        if self.with_mask:
            C += self._cost_mask(bs, gt_groups, masks, gt_mask)
@ -111,10 +113,11 @@ class HungarianMatcher(nn.Module):

        C = C.view(bs, nq, -1).cpu()
        indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(gt_groups, -1))]
-        gt_groups = torch.as_tensor([0, *gt_groups[:-1]]).cumsum_(0)
-        # (idx for queries, idx for gt)
-        return [(torch.tensor(i, dtype=torch.long), torch.tensor(j, dtype=torch.long) + gt_groups[k])
-                for k, (i, j) in enumerate(indices)]
+        gt_groups = torch.as_tensor([0, *gt_groups[:-1]]).cumsum_(0)  # (idx for queries, idx for gt)
+        return [
+            (torch.tensor(i, dtype=torch.long), torch.tensor(j, dtype=torch.long) + gt_groups[k])
+            for k, (i, j) in enumerate(indices)
+        ]

    # This function is for future RT-DETR Segment models
    # def _cost_mask(self, bs, num_gts, masks=None, gt_mask=None):
@ -147,14 +150,9 @@ class HungarianMatcher(nn.Module):
    #     return C


-def get_cdn_group(batch,
-                  num_classes,
-                  num_queries,
-                  class_embed,
-                  num_dn=100,
-                  cls_noise_ratio=0.5,
-                  box_noise_scale=1.0,
-                  training=False):
+def get_cdn_group(
+    batch, num_classes, num_queries, class_embed, num_dn=100, cls_noise_ratio=0.5, box_noise_scale=1.0, training=False
+):
    """
    Get contrastive denoising training group. This function creates a contrastive denoising training group with positive
    and negative samples from the ground truths (gt). It applies noise to the class labels and bounding box coordinates,
@ -180,7 +178,7 @@ def get_cdn_group(batch,

    if (not training) or num_dn <= 0:
        return None, None, None, None
-    gt_groups = batch['gt_groups']
+    gt_groups = batch["gt_groups"]
    total_num = sum(gt_groups)
    max_nums = max(gt_groups)
    if max_nums == 0:
@ -190,9 +188,9 @@ def get_cdn_group(batch,
    num_group = 1 if num_group == 0 else num_group
    # Pad gt to max_num of a batch
    bs = len(gt_groups)
-    gt_cls = batch['cls']  # (bs*num, )
-    gt_bbox = batch['bboxes']  # bs*num, 4
-    b_idx = batch['batch_idx']
+    gt_cls = batch["cls"]  # (bs*num, )
+    gt_bbox = batch["bboxes"]  # bs*num, 4
+    b_idx = batch["batch_idx"]

    # Each group has positive and negative queries.
    dn_cls = gt_cls.repeat(2 * num_group)  # (2*num_group*bs*num, )
@ -245,16 +243,21 @@ def get_cdn_group(batch,
    # Reconstruct cannot see each other
    for i in range(num_group):
        if i == 0:
-            attn_mask[max_nums * 2 * i:max_nums * 2 * (i + 1), max_nums * 2 * (i + 1):num_dn] = True
+            attn_mask[max_nums * 2 * i : max_nums * 2 * (i + 1), max_nums * 2 * (i + 1) : num_dn] = True
        if i == num_group - 1:
-            attn_mask[max_nums * 2 * i:max_nums * 2 * (i + 1), :max_nums * i * 2] = True
+            attn_mask[max_nums * 2 * i : max_nums * 2 * (i + 1), : max_nums * i * 2] = True
        else:
-            attn_mask[max_nums * 2 * i:max_nums * 2 * (i + 1), max_nums * 2 * (i + 1):num_dn] = True
-            attn_mask[max_nums * 2 * i:max_nums * 2 * (i + 1), :max_nums * 2 * i] = True
+            attn_mask[max_nums * 2 * i : max_nums * 2 * (i + 1), max_nums * 2 * (i + 1) : num_dn] = True
+            attn_mask[max_nums * 2 * i : max_nums * 2 * (i + 1), : max_nums * 2 * i] = True
    dn_meta = {
-        'dn_pos_idx': [p.reshape(-1) for p in pos_idx.cpu().split(list(gt_groups), dim=1)],
-        'dn_num_group': num_group,
-        'dn_num_split': [num_dn, num_queries]}
+        "dn_pos_idx": [p.reshape(-1) for p in pos_idx.cpu().split(list(gt_groups), dim=1)],
+        "dn_num_group": num_group,
+        "dn_num_split": [num_dn, num_queries],
+    }

-    return padding_cls.to(class_embed.device), padding_bbox.to(class_embed.device), attn_mask.to(
-        class_embed.device), dn_meta
+    return (
+        padding_cls.to(class_embed.device),
+        padding_bbox.to(class_embed.device),
+        attn_mask.to(class_embed.device),
+        dn_meta,
+    )