ultralytics 8.0.197 save P, R, F1 curves to metrics (#5354)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: erminkev1 <83356055+erminkev1@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Andy <39454881+yermandy@users.noreply.github.com>
2023-10-13 02:49:31 +02:00 · 2023-10-13 02:49:31 +02:00 · 12e3eef844
commit 12e3eef844
parent 7fd5dcbd86
33 changed files with 337 additions and 195 deletions
--- a/ultralytics/models/sam/modules/encoders.py
+++ b/ultralytics/models/sam/modules/encoders.py
@ -304,11 +304,11 @@ class PositionEmbeddingRandom(nn.Module):

    def _pe_encoding(self, coords: torch.Tensor) -> torch.Tensor:
        """Positionally encode points that are normalized to [0,1]."""
-        # assuming coords are in [0, 1]^2 square and have d_1 x ... x d_n x 2 shape
+        # Assuming coords are in [0, 1]^2 square and have d_1 x ... x d_n x 2 shape
        coords = 2 * coords - 1
        coords = coords @ self.positional_encoding_gaussian_matrix
        coords = 2 * np.pi * coords
-        # outputs d_1 x ... x d_n x C shape
+        # Outputs d_1 x ... x d_n x C shape
        return torch.cat([torch.sin(coords), torch.cos(coords)], dim=-1)

    def forward(self, size: Tuple[int, int]) -> torch.Tensor:
@ -429,7 +429,7 @@ class Attention(nn.Module):
        self.use_rel_pos = use_rel_pos
        if self.use_rel_pos:
            assert (input_size is not None), 'Input size must be provided if using relative positional encoding.'
-            # initialize relative positional embeddings
+            # Initialize relative positional embeddings
            self.rel_pos_h = nn.Parameter(torch.zeros(2 * input_size[0] - 1, head_dim))
            self.rel_pos_w = nn.Parameter(torch.zeros(2 * input_size[1] - 1, head_dim))

--- a/ultralytics/models/sam/modules/tiny_encoder.py
+++ b/ultralytics/models/sam/modules/tiny_encoder.py
@ -172,7 +172,7 @@ class ConvLayer(nn.Module):
        self.depth = depth
        self.use_checkpoint = use_checkpoint

-        # build blocks
+        # Build blocks
        self.blocks = nn.ModuleList([
            MBConv(
                dim,
@ -182,7 +182,7 @@ class ConvLayer(nn.Module):
                drop_path[i] if isinstance(drop_path, list) else drop_path,
            ) for i in range(depth)])

-        # patch merging layer
+        # Patch merging layer
        self.downsample = None if downsample is None else downsample(
            input_resolution, dim=dim, out_dim=out_dim, activation=activation)

@ -393,11 +393,11 @@ class TinyViTBlock(nn.Module):
            pH, pW = H + pad_b, W + pad_r
            nH = pH // self.window_size
            nW = pW // self.window_size
-            # window partition
+            # Window partition
            x = x.view(B, nH, self.window_size, nW, self.window_size,
                       C).transpose(2, 3).reshape(B * nH * nW, self.window_size * self.window_size, C)
            x = self.attn(x)
-            # window reverse
+            # Window reverse
            x = x.view(B, nH, nW, self.window_size, self.window_size, C).transpose(2, 3).reshape(B, pH, pW, C)

            if padding:
@ -467,7 +467,7 @@ class BasicLayer(nn.Module):
        self.depth = depth
        self.use_checkpoint = use_checkpoint

-        # build blocks
+        # Build blocks
        self.blocks = nn.ModuleList([
            TinyViTBlock(
                dim=dim,
@ -481,7 +481,7 @@ class BasicLayer(nn.Module):
                activation=activation,
            ) for i in range(depth)])

-        # patch merging layer
+        # Patch merging layer
        self.downsample = None if downsample is None else downsample(
            input_resolution, dim=dim, out_dim=out_dim, activation=activation)

@ -593,10 +593,10 @@ class TinyViT(nn.Module):
        patches_resolution = self.patch_embed.patches_resolution
        self.patches_resolution = patches_resolution

-        # stochastic depth
+        # Stochastic depth
        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule

-        # build layers
+        # Build layers
        self.layers = nn.ModuleList()
        for i_layer in range(self.num_layers):
            kwargs = dict(
@ -628,7 +628,7 @@ class TinyViT(nn.Module):
        self.norm_head = nn.LayerNorm(embed_dims[-1])
        self.head = nn.Linear(embed_dims[-1], num_classes) if num_classes > 0 else torch.nn.Identity()

-        # init weights
+        # Init weights
        self.apply(self._init_weights)
        self.set_layer_lr_decay(layer_lr_decay)
        self.neck = nn.Sequential(
@ -653,7 +653,7 @@ class TinyViT(nn.Module):
        """Sets the learning rate decay for each layer in the TinyViT model."""
        decay_rate = layer_lr_decay

-        # layers -> blocks (depth)
+        # Layers -> blocks (depth)
        depth = sum(self.depths)
        lr_scales = [decay_rate ** (depth - i - 1) for i in range(depth)]

--- a/ultralytics/models/sam/predict.py
+++ b/ultralytics/models/sam/predict.py
@ -414,8 +414,7 @@ class Predictor(BasePredictor):
            unchanged = unchanged and not changed

            new_masks.append(torch.as_tensor(mask).unsqueeze(0))
-            # Give score=0 to changed masks and score=1 to unchanged masks
-            # so NMS will prefer ones that didn't need postprocessing
+            # Give score=0 to changed masks and 1 to unchanged masks so NMS prefers masks not needing postprocessing
            scores.append(float(unchanged))

        # Recalculate boxes and remove any new duplicates