ultralytics 8.2.70 Segment Anything Model 2 (SAM 2) (#14813)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: UltralyticsAssistant <web@ultralytics.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
2024-07-30 22:06:49 +08:00 · 2024-07-30 22:06:49 +08:00 · 8648572809
commit 8648572809
parent 80f699ae21
36 changed files with 3276 additions and 77 deletions
--- a/ultralytics/models/sam/modules/decoders.py
+++ b/ultralytics/models/sam/modules/decoders.py
@ -4,9 +4,8 @@ from typing import List, Tuple, Type

 import torch
 from torch import nn
-from torch.nn import functional as F

-from ultralytics.nn.modules import LayerNorm2d
+from ultralytics.nn.modules import MLP, LayerNorm2d


 class MaskDecoder(nn.Module):
@ -28,7 +27,6 @@ class MaskDecoder(nn.Module):

    def __init__(
        self,
-        *,
        transformer_dim: int,
        transformer: nn.Module,
        num_multimask_outputs: int = 3,
@ -149,42 +147,3 @@ class MaskDecoder(nn.Module):
        iou_pred = self.iou_prediction_head(iou_token_out)

        return masks, iou_pred
-
-
-class MLP(nn.Module):
-    """
-    MLP (Multi-Layer Perceptron) model lightly adapted from
-    https://github.com/facebookresearch/MaskFormer/blob/main/mask_former/modeling/transformer/transformer_predictor.py
-    """
-
-    def __init__(
-        self,
-        input_dim: int,
-        hidden_dim: int,
-        output_dim: int,
-        num_layers: int,
-        sigmoid_output: bool = False,
-    ) -> None:
-        """
-        Initializes the MLP (Multi-Layer Perceptron) model.
-
-        Args:
-            input_dim (int): The dimensionality of the input features.
-            hidden_dim (int): The dimensionality of the hidden layers.
-            output_dim (int): The dimensionality of the output layer.
-            num_layers (int): The number of hidden layers.
-            sigmoid_output (bool, optional): Apply a sigmoid activation to the output layer. Defaults to False.
-        """
-        super().__init__()
-        self.num_layers = num_layers
-        h = [hidden_dim] * (num_layers - 1)
-        self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
-        self.sigmoid_output = sigmoid_output
-
-    def forward(self, x):
-        """Executes feedforward within the neural network module and applies activation."""
-        for i, layer in enumerate(self.layers):
-            x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
-        if self.sigmoid_output:
-            x = torch.sigmoid(x)
-        return x