ultralytics 8.0.197 save P, R, F1 curves to metrics (#5354)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: erminkev1 <83356055+erminkev1@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Andy <39454881+yermandy@users.noreply.github.com>
2023-10-13 02:49:31 +02:00 · 2023-10-13 02:49:31 +02:00 · 12e3eef844
commit 12e3eef844
parent 7fd5dcbd86
33 changed files with 337 additions and 195 deletions
--- a/ultralytics/models/fastsam/prompt.py
+++ b/ultralytics/models/fastsam/prompt.py
@ -120,7 +120,7 @@ class FastSAMPrompt:
            result_name = os.path.basename(ann.path)
            image = ann.orig_img[..., ::-1]  # BGR to RGB
            original_h, original_w = ann.orig_shape
-            # for macOS only
+            # For macOS only
            # plt.switch_backend('TkAgg')
            plt.figure(figsize=(original_w / 100, original_h / 100))
            # Add subplot with no margin.
--- a/ultralytics/models/fastsam/utils.py
+++ b/ultralytics/models/fastsam/utils.py
@ -42,23 +42,23 @@ def bbox_iou(box1, boxes, iou_thres=0.9, image_shape=(640, 640), raw_output=Fals
        high_iou_indices (torch.Tensor): Indices of boxes with IoU > thres
    """
    boxes = adjust_bboxes_to_image_border(boxes, image_shape)
-    # obtain coordinates for intersections
+    # Obtain coordinates for intersections
    x1 = torch.max(box1[0], boxes[:, 0])
    y1 = torch.max(box1[1], boxes[:, 1])
    x2 = torch.min(box1[2], boxes[:, 2])
    y2 = torch.min(box1[3], boxes[:, 3])

-    # compute the area of intersection
+    # Compute the area of intersection
    intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)

-    # compute the area of both individual boxes
+    # Compute the area of both individual boxes
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])

-    # compute the area of union
+    # Compute the area of union
    union = box1_area + box2_area - intersection

-    # compute the IoU
+    # Compute the IoU
    iou = intersection / union  # Should be shape (n, )
    if raw_output:
        return 0 if iou.numel() == 0 else iou
--- a/ultralytics/models/rtdetr/val.py
+++ b/ultralytics/models/rtdetr/val.py
@ -99,10 +99,10 @@ class RTDETRValidator(DetectionValidator):
        for i, bbox in enumerate(bboxes):  # (300, 4)
            bbox = ops.xywh2xyxy(bbox)
            score, cls = scores[i].max(-1)  # (300, )
-            # Do not need threshold for evaluation as only got 300 boxes here.
+            # Do not need threshold for evaluation as only got 300 boxes here
            # idx = score > self.args.conf
            pred = torch.cat([bbox, score[..., None], cls[..., None]], dim=-1)  # filter
-            # sort by confidence to correctly get internal metrics.
+            # Sort by confidence to correctly get internal metrics
            pred = pred[score.argsort(descending=True)]
            outputs[i] = pred  # [idx]

--- a/ultralytics/models/sam/modules/encoders.py
+++ b/ultralytics/models/sam/modules/encoders.py
@ -304,11 +304,11 @@ class PositionEmbeddingRandom(nn.Module):

    def _pe_encoding(self, coords: torch.Tensor) -> torch.Tensor:
        """Positionally encode points that are normalized to [0,1]."""
-        # assuming coords are in [0, 1]^2 square and have d_1 x ... x d_n x 2 shape
+        # Assuming coords are in [0, 1]^2 square and have d_1 x ... x d_n x 2 shape
        coords = 2 * coords - 1
        coords = coords @ self.positional_encoding_gaussian_matrix
        coords = 2 * np.pi * coords
-        # outputs d_1 x ... x d_n x C shape
+        # Outputs d_1 x ... x d_n x C shape
        return torch.cat([torch.sin(coords), torch.cos(coords)], dim=-1)

    def forward(self, size: Tuple[int, int]) -> torch.Tensor:
@ -429,7 +429,7 @@ class Attention(nn.Module):
        self.use_rel_pos = use_rel_pos
        if self.use_rel_pos:
            assert (input_size is not None), 'Input size must be provided if using relative positional encoding.'
-            # initialize relative positional embeddings
+            # Initialize relative positional embeddings
            self.rel_pos_h = nn.Parameter(torch.zeros(2 * input_size[0] - 1, head_dim))
            self.rel_pos_w = nn.Parameter(torch.zeros(2 * input_size[1] - 1, head_dim))

--- a/ultralytics/models/sam/modules/tiny_encoder.py
+++ b/ultralytics/models/sam/modules/tiny_encoder.py
@ -172,7 +172,7 @@ class ConvLayer(nn.Module):
        self.depth = depth
        self.use_checkpoint = use_checkpoint

-        # build blocks
+        # Build blocks
        self.blocks = nn.ModuleList([
            MBConv(
                dim,
@ -182,7 +182,7 @@ class ConvLayer(nn.Module):
                drop_path[i] if isinstance(drop_path, list) else drop_path,
            ) for i in range(depth)])

-        # patch merging layer
+        # Patch merging layer
        self.downsample = None if downsample is None else downsample(
            input_resolution, dim=dim, out_dim=out_dim, activation=activation)

@ -393,11 +393,11 @@ class TinyViTBlock(nn.Module):
            pH, pW = H + pad_b, W + pad_r
            nH = pH // self.window_size
            nW = pW // self.window_size
-            # window partition
+            # Window partition
            x = x.view(B, nH, self.window_size, nW, self.window_size,
                       C).transpose(2, 3).reshape(B * nH * nW, self.window_size * self.window_size, C)
            x = self.attn(x)
-            # window reverse
+            # Window reverse
            x = x.view(B, nH, nW, self.window_size, self.window_size, C).transpose(2, 3).reshape(B, pH, pW, C)

            if padding:
@ -467,7 +467,7 @@ class BasicLayer(nn.Module):
        self.depth = depth
        self.use_checkpoint = use_checkpoint

-        # build blocks
+        # Build blocks
        self.blocks = nn.ModuleList([
            TinyViTBlock(
                dim=dim,
@ -481,7 +481,7 @@ class BasicLayer(nn.Module):
                activation=activation,
            ) for i in range(depth)])

-        # patch merging layer
+        # Patch merging layer
        self.downsample = None if downsample is None else downsample(
            input_resolution, dim=dim, out_dim=out_dim, activation=activation)

@ -593,10 +593,10 @@ class TinyViT(nn.Module):
        patches_resolution = self.patch_embed.patches_resolution
        self.patches_resolution = patches_resolution

-        # stochastic depth
+        # Stochastic depth
        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule

-        # build layers
+        # Build layers
        self.layers = nn.ModuleList()
        for i_layer in range(self.num_layers):
            kwargs = dict(
@ -628,7 +628,7 @@ class TinyViT(nn.Module):
        self.norm_head = nn.LayerNorm(embed_dims[-1])
        self.head = nn.Linear(embed_dims[-1], num_classes) if num_classes > 0 else torch.nn.Identity()

-        # init weights
+        # Init weights
        self.apply(self._init_weights)
        self.set_layer_lr_decay(layer_lr_decay)
        self.neck = nn.Sequential(
@ -653,7 +653,7 @@ class TinyViT(nn.Module):
        """Sets the learning rate decay for each layer in the TinyViT model."""
        decay_rate = layer_lr_decay

-        # layers -> blocks (depth)
+        # Layers -> blocks (depth)
        depth = sum(self.depths)
        lr_scales = [decay_rate ** (depth - i - 1) for i in range(depth)]

--- a/ultralytics/models/sam/predict.py
+++ b/ultralytics/models/sam/predict.py
@ -414,8 +414,7 @@ class Predictor(BasePredictor):
            unchanged = unchanged and not changed

            new_masks.append(torch.as_tensor(mask).unsqueeze(0))
-            # Give score=0 to changed masks and score=1 to unchanged masks
-            # so NMS will prefer ones that didn't need postprocessing
+            # Give score=0 to changed masks and 1 to unchanged masks so NMS prefers masks not needing postprocessing
            scores.append(float(unchanged))

        # Recalculate boxes and remove any new duplicates
--- a/ultralytics/models/utils/loss.py
+++ b/ultralytics/models/utils/loss.py
@ -66,7 +66,7 @@ class DETRLoss(nn.Module):

    def _get_loss_class(self, pred_scores, targets, gt_scores, num_gts, postfix=''):
        """Computes the classification loss based on predictions, target values, and ground truth scores."""
-        # logits: [b, query, num_classes], gt_class: list[[n, 1]]
+        # Logits: [b, query, num_classes], gt_class: list[[n, 1]]
        name_class = f'loss_class{postfix}'
        bs, nq = pred_scores.shape[:2]
        # one_hot = F.one_hot(targets, self.nc + 1)[..., :-1]  # (bs, num_queries, num_classes)
@ -90,7 +90,7 @@ class DETRLoss(nn.Module):
        """Calculates and returns the bounding box loss and GIoU loss for the predicted and ground truth bounding
        boxes.
        """
-        # boxes: [b, query, 4], gt_bbox: list[[n, 4]]
+        # Boxes: [b, query, 4], gt_bbox: list[[n, 4]]
        name_bbox = f'loss_bbox{postfix}'
        name_giou = f'loss_giou{postfix}'

--- a/ultralytics/models/utils/ops.py
+++ b/ultralytics/models/utils/ops.py
@ -188,7 +188,7 @@ def get_cdn_group(batch,

    num_group = num_dn // max_nums
    num_group = 1 if num_group == 0 else num_group
-    # pad gt to max_num of a batch
+    # Pad gt to max_num of a batch
    bs = len(gt_groups)
    gt_cls = batch['cls']  # (bs*num, )
    gt_bbox = batch['bboxes']  # bs*num, 4
@ -204,10 +204,10 @@ def get_cdn_group(batch,
    neg_idx = torch.arange(total_num * num_group, dtype=torch.long, device=gt_bbox.device) + num_group * total_num

    if cls_noise_ratio > 0:
-        # half of bbox prob
+        # Half of bbox prob
        mask = torch.rand(dn_cls.shape) < (cls_noise_ratio * 0.5)
        idx = torch.nonzero(mask).squeeze(-1)
-        # randomly put a new one here
+        # Randomly put a new one here
        new_label = torch.randint_like(idx, 0, num_classes, dtype=dn_cls.dtype, device=dn_cls.device)
        dn_cls[idx] = new_label

@ -240,9 +240,9 @@ def get_cdn_group(batch,

    tgt_size = num_dn + num_queries
    attn_mask = torch.zeros([tgt_size, tgt_size], dtype=torch.bool)
-    # match query cannot see the reconstruct
+    # Match query cannot see the reconstruct
    attn_mask[num_dn:, :num_dn] = True
-    # reconstruct cannot see each other
+    # Reconstruct cannot see each other
    for i in range(num_group):
        if i == 0:
            attn_mask[max_nums * 2 * i:max_nums * 2 * (i + 1), max_nums * 2 * (i + 1):num_dn] = True