ultralytics 8.0.167 Tuner updates and HUB Pose and Classify fixes (#4656)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2023-08-31 01:38:42 +02:00 · 2023-08-31 01:38:42 +02:00 · d2cf7acce0
commit d2cf7acce0
parent 8596ee241f
21 changed files with 174 additions and 144 deletions
--- a/ultralytics/models/utils/loss.py
+++ b/ultralytics/models/utils/loss.py
@ -84,35 +84,36 @@ class DETRLoss(nn.Module):
        loss[name_giou] = self.loss_gain['giou'] * loss[name_giou]
        return {k: v.squeeze() for k, v in loss.items()}

-    def _get_loss_mask(self, masks, gt_mask, match_indices, postfix=''):
-        # masks: [b, query, h, w], gt_mask: list[[n, H, W]]
-        name_mask = f'loss_mask{postfix}'
-        name_dice = f'loss_dice{postfix}'
+    # This function is for future RT-DETR Segment models
+    # def _get_loss_mask(self, masks, gt_mask, match_indices, postfix=''):
+    #     # masks: [b, query, h, w], gt_mask: list[[n, H, W]]
+    #     name_mask = f'loss_mask{postfix}'
+    #     name_dice = f'loss_dice{postfix}'
+    #
+    #     loss = {}
+    #     if sum(len(a) for a in gt_mask) == 0:
+    #         loss[name_mask] = torch.tensor(0., device=self.device)
+    #         loss[name_dice] = torch.tensor(0., device=self.device)
+    #         return loss
+    #
+    #     num_gts = len(gt_mask)
+    #     src_masks, target_masks = self._get_assigned_bboxes(masks, gt_mask, match_indices)
+    #     src_masks = F.interpolate(src_masks.unsqueeze(0), size=target_masks.shape[-2:], mode='bilinear')[0]
+    #     # TODO: torch does not have `sigmoid_focal_loss`, but it's not urgent since we don't use mask branch for now.
+    #     loss[name_mask] = self.loss_gain['mask'] * F.sigmoid_focal_loss(src_masks, target_masks,
+    #                                                                     torch.tensor([num_gts], dtype=torch.float32))
+    #     loss[name_dice] = self.loss_gain['dice'] * self._dice_loss(src_masks, target_masks, num_gts)
+    #     return loss

-        loss = {}
-        if sum(len(a) for a in gt_mask) == 0:
-            loss[name_mask] = torch.tensor(0., device=self.device)
-            loss[name_dice] = torch.tensor(0., device=self.device)
-            return loss
-
-        num_gts = len(gt_mask)
-        src_masks, target_masks = self._get_assigned_bboxes(masks, gt_mask, match_indices)
-        src_masks = F.interpolate(src_masks.unsqueeze(0), size=target_masks.shape[-2:], mode='bilinear')[0]
-        # TODO: torch does not have `sigmoid_focal_loss`, but it's not urgent since we don't use mask branch for now.
-        loss[name_mask] = self.loss_gain['mask'] * F.sigmoid_focal_loss(src_masks, target_masks,
-                                                                        torch.tensor([num_gts], dtype=torch.float32))
-        loss[name_dice] = self.loss_gain['dice'] * self._dice_loss(src_masks, target_masks, num_gts)
-        return loss
-
-    @staticmethod
-    def _dice_loss(inputs, targets, num_gts):
-        inputs = F.sigmoid(inputs)
-        inputs = inputs.flatten(1)
-        targets = targets.flatten(1)
-        numerator = 2 * (inputs * targets).sum(1)
-        denominator = inputs.sum(-1) + targets.sum(-1)
-        loss = 1 - (numerator + 1) / (denominator + 1)
-        return loss.sum() / num_gts
+    # This function is for future RT-DETR Segment models
+    # @staticmethod
+    # def _dice_loss(inputs, targets, num_gts):
+    #     inputs = F.sigmoid(inputs).flatten(1)
+    #     targets = targets.flatten(1)
+    #     numerator = 2 * (inputs * targets).sum(1)
+    #     denominator = inputs.sum(-1) + targets.sum(-1)
+    #     loss = 1 - (numerator + 1) / (denominator + 1)
+    #     return loss.sum() / num_gts

    def _get_loss_aux(self,
                      pred_bboxes,
--- a/ultralytics/models/utils/ops.py
+++ b/ultralytics/models/utils/ops.py
@ -110,34 +110,35 @@ class HungarianMatcher(nn.Module):
        return [(torch.tensor(i, dtype=torch.long), torch.tensor(j, dtype=torch.long) + gt_groups[k])
                for k, (i, j) in enumerate(indices)]

-    def _cost_mask(self, bs, num_gts, masks=None, gt_mask=None):
-        assert masks is not None and gt_mask is not None, 'Make sure the input has `mask` and `gt_mask`'
-        # all masks share the same set of points for efficient matching
-        sample_points = torch.rand([bs, 1, self.num_sample_points, 2])
-        sample_points = 2.0 * sample_points - 1.0
-
-        out_mask = F.grid_sample(masks.detach(), sample_points, align_corners=False).squeeze(-2)
-        out_mask = out_mask.flatten(0, 1)
-
-        tgt_mask = torch.cat(gt_mask).unsqueeze(1)
-        sample_points = torch.cat([a.repeat(b, 1, 1, 1) for a, b in zip(sample_points, num_gts) if b > 0])
-        tgt_mask = F.grid_sample(tgt_mask, sample_points, align_corners=False).squeeze([1, 2])
-
-        with torch.cuda.amp.autocast(False):
-            # binary cross entropy cost
-            pos_cost_mask = F.binary_cross_entropy_with_logits(out_mask, torch.ones_like(out_mask), reduction='none')
-            neg_cost_mask = F.binary_cross_entropy_with_logits(out_mask, torch.zeros_like(out_mask), reduction='none')
-            cost_mask = torch.matmul(pos_cost_mask, tgt_mask.T) + torch.matmul(neg_cost_mask, 1 - tgt_mask.T)
-            cost_mask /= self.num_sample_points
-
-            # dice cost
-            out_mask = F.sigmoid(out_mask)
-            numerator = 2 * torch.matmul(out_mask, tgt_mask.T)
-            denominator = out_mask.sum(-1, keepdim=True) + tgt_mask.sum(-1).unsqueeze(0)
-            cost_dice = 1 - (numerator + 1) / (denominator + 1)
-
-            C = self.cost_gain['mask'] * cost_mask + self.cost_gain['dice'] * cost_dice
-        return C
+    # This function is for future RT-DETR Segment models
+    # def _cost_mask(self, bs, num_gts, masks=None, gt_mask=None):
+    #     assert masks is not None and gt_mask is not None, 'Make sure the input has `mask` and `gt_mask`'
+    #     # all masks share the same set of points for efficient matching
+    #     sample_points = torch.rand([bs, 1, self.num_sample_points, 2])
+    #     sample_points = 2.0 * sample_points - 1.0
+    #
+    #     out_mask = F.grid_sample(masks.detach(), sample_points, align_corners=False).squeeze(-2)
+    #     out_mask = out_mask.flatten(0, 1)
+    #
+    #     tgt_mask = torch.cat(gt_mask).unsqueeze(1)
+    #     sample_points = torch.cat([a.repeat(b, 1, 1, 1) for a, b in zip(sample_points, num_gts) if b > 0])
+    #     tgt_mask = F.grid_sample(tgt_mask, sample_points, align_corners=False).squeeze([1, 2])
+    #
+    #     with torch.cuda.amp.autocast(False):
+    #         # binary cross entropy cost
+    #         pos_cost_mask = F.binary_cross_entropy_with_logits(out_mask, torch.ones_like(out_mask), reduction='none')
+    #         neg_cost_mask = F.binary_cross_entropy_with_logits(out_mask, torch.zeros_like(out_mask), reduction='none')
+    #         cost_mask = torch.matmul(pos_cost_mask, tgt_mask.T) + torch.matmul(neg_cost_mask, 1 - tgt_mask.T)
+    #         cost_mask /= self.num_sample_points
+    #
+    #         # dice cost
+    #         out_mask = F.sigmoid(out_mask)
+    #         numerator = 2 * torch.matmul(out_mask, tgt_mask.T)
+    #         denominator = out_mask.sum(-1, keepdim=True) + tgt_mask.sum(-1).unsqueeze(0)
+    #         cost_dice = 1 - (numerator + 1) / (denominator + 1)
+    #
+    #         C = self.cost_gain['mask'] * cost_mask + self.cost_gain['dice'] * cost_dice
+    #     return C


 def get_cdn_group(batch,