ultralytics 8.1.44 add IS_RASPBERRYPI and constants refactor (#9827)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: UltralyticsAssistant <web@ultralytics.com>
2024-04-07 00:47:12 +02:00 · 2024-04-07 00:47:12 +02:00 · 7d891a4aa4
commit 7d891a4aa4
parent 3f34a7c3af
43 changed files with 146 additions and 141 deletions
--- a/ultralytics/models/rtdetr/val.py
+++ b/ultralytics/models/rtdetr/val.py
@ -125,7 +125,7 @@ class RTDETRValidator(DetectionValidator):
            bbox = ops.xywh2xyxy(bbox)  # target boxes
            bbox[..., [0, 2]] *= ori_shape[1]  # native-space pred
            bbox[..., [1, 3]] *= ori_shape[0]  # native-space pred
-        return dict(cls=cls, bbox=bbox, ori_shape=ori_shape, imgsz=imgsz, ratio_pad=ratio_pad)
+        return {"cls": cls, "bbox": bbox, "ori_shape": ori_shape, "imgsz": imgsz, "ratio_pad": ratio_pad}

    def _prepare_pred(self, pred, pbatch):
        """Prepares and returns a batch with transformed bounding boxes and class labels."""
--- a/ultralytics/models/sam/modules/tiny_encoder.py
+++ b/ultralytics/models/sam/modules/tiny_encoder.py
@ -584,9 +584,9 @@ class TinyViT(nn.Module):
            img_size (int, optional): The input image size. Defaults to 224.
            in_chans (int, optional): Number of input channels. Defaults to 3.
            num_classes (int, optional): Number of classification classes. Defaults to 1000.
-            embed_dims (List[int], optional): List of embedding dimensions for each layer. Defaults to [96, 192, 384, 768].
+            embed_dims (List[int], optional): List of embedding dimensions per layer. Defaults to [96, 192, 384, 768].
            depths (List[int], optional): List of depths for each layer. Defaults to [2, 2, 6, 2].
-            num_heads (List[int], optional): List of number of attention heads for each layer. Defaults to [3, 6, 12, 24].
+            num_heads (List[int], optional): List of number of attention heads per layer. Defaults to [3, 6, 12, 24].
            window_sizes (List[int], optional): List of window sizes for each layer. Defaults to [7, 7, 14, 7].
            mlp_ratio (float, optional): Ratio of MLP hidden dimension to embedding dimension. Defaults to 4.
            drop_rate (float, optional): Dropout rate. Defaults to 0.
--- a/ultralytics/models/sam/modules/transformer.py
+++ b/ultralytics/models/sam/modules/transformer.py
@ -222,7 +222,7 @@ class Attention(nn.Module):
            downsample_rate (int, optional): The factor by which the internal dimensions are downsampled. Defaults to 1.

        Raises:
-            AssertionError: If 'num_heads' does not evenly divide the internal dimension (embedding_dim / downsample_rate).
+            AssertionError: If 'num_heads' does not evenly divide the internal dim (embedding_dim / downsample_rate).
        """
        super().__init__()
        self.embedding_dim = embedding_dim
--- a/ultralytics/models/sam/predict.py
+++ b/ultralytics/models/sam/predict.py
@ -127,10 +127,10 @@ class Predictor(BasePredictor):
        Args:
            im (torch.Tensor): The preprocessed input image in tensor format, with shape (N, C, H, W).
            bboxes (np.ndarray | List, optional): Bounding boxes with shape (N, 4), in XYXY format.
-            points (np.ndarray | List, optional): Points indicating object locations with shape (N, 2), in pixel coordinates.
-            labels (np.ndarray | List, optional): Labels for point prompts, shape (N, ). 1 for foreground and 0 for background.
-            masks (np.ndarray, optional): Low-resolution masks from previous predictions. Shape should be (N, H, W). For SAM, H=W=256.
-            multimask_output (bool, optional): Flag to return multiple masks. Helpful for ambiguous prompts. Defaults to False.
+            points (np.ndarray | List, optional): Points indicating object locations with shape (N, 2), in pixels.
+            labels (np.ndarray | List, optional): Labels for point prompts, shape (N, ). 1 = foreground, 0 = background.
+            masks (np.ndarray, optional): Low-resolution masks from previous predictions shape (N,H,W). For SAM H=W=256.
+            multimask_output (bool, optional): Flag to return multiple masks. Helpful for ambiguous prompts.

        Returns:
            (tuple): Contains the following three elements.
@ -156,10 +156,10 @@ class Predictor(BasePredictor):
        Args:
            im (torch.Tensor): The preprocessed input image in tensor format, with shape (N, C, H, W).
            bboxes (np.ndarray | List, optional): Bounding boxes with shape (N, 4), in XYXY format.
-            points (np.ndarray | List, optional): Points indicating object locations with shape (N, 2), in pixel coordinates.
-            labels (np.ndarray | List, optional): Labels for point prompts, shape (N, ). 1 for foreground and 0 for background.
-            masks (np.ndarray, optional): Low-resolution masks from previous predictions. Shape should be (N, H, W). For SAM, H=W=256.
-            multimask_output (bool, optional): Flag to return multiple masks. Helpful for ambiguous prompts. Defaults to False.
+            points (np.ndarray | List, optional): Points indicating object locations with shape (N, 2), in pixels.
+            labels (np.ndarray | List, optional): Labels for point prompts, shape (N, ). 1 = foreground, 0 = background.
+            masks (np.ndarray, optional): Low-resolution masks from previous predictions shape (N,H,W). For SAM H=W=256.
+            multimask_output (bool, optional): Flag to return multiple masks. Helpful for ambiguous prompts.

        Returns:
            (tuple): Contains the following three elements.
@ -230,7 +230,7 @@ class Predictor(BasePredictor):
            im (torch.Tensor): Input tensor representing the preprocessed image with dimensions (N, C, H, W).
            crop_n_layers (int): Specifies the number of layers for additional mask predictions on image crops.
                                 Each layer produces 2**i_layer number of image crops.
-            crop_overlap_ratio (float): Determines the extent of overlap between crops. Scaled down in subsequent layers.
+            crop_overlap_ratio (float): Determines the overlap between crops. Scaled down in subsequent layers.
            crop_downscale_factor (int): Scaling factor for the number of sampled points-per-side in each layer.
            point_grids (list[np.ndarray], optional): Custom grids for point sampling normalized to [0,1].
                                                      Used in the nth crop layer.
@ -240,7 +240,7 @@ class Predictor(BasePredictor):
            conf_thres (float): Confidence threshold [0,1] for filtering based on the model's mask quality prediction.
            stability_score_thresh (float): Stability threshold [0,1] for mask filtering based on mask stability.
            stability_score_offset (float): Offset value for calculating stability score.
-            crop_nms_thresh (float): IoU cutoff for Non-Maximum Suppression (NMS) to remove duplicate masks between crops.
+            crop_nms_thresh (float): IoU cutoff for NMS to remove duplicate masks between crops.

        Returns:
            (tuple): A tuple containing segmented masks, confidence scores, and bounding boxes.
@ -351,8 +351,8 @@ class Predictor(BasePredictor):
        """
        Post-processes SAM's inference outputs to generate object detection masks and bounding boxes.

-        The method scales masks and boxes to the original image size and applies a threshold to the mask predictions. The
-        SAM model uses advanced architecture and promptable segmentation tasks to achieve real-time performance.
+        The method scales masks and boxes to the original image size and applies a threshold to the mask predictions.
+        The SAM model uses advanced architecture and promptable segmentation tasks to achieve real-time performance.

        Args:
            preds (tuple): The output from SAM model inference, containing masks, scores, and optional bounding boxes.
--- a/ultralytics/models/yolo/detect/val.py
+++ b/ultralytics/models/yolo/detect/val.py
@ -106,7 +106,7 @@ class DetectionValidator(BaseValidator):
        if len(cls):
            bbox = ops.xywh2xyxy(bbox) * torch.tensor(imgsz, device=self.device)[[1, 0, 1, 0]]  # target boxes
            ops.scale_boxes(imgsz, bbox, ori_shape, ratio_pad=ratio_pad)  # native-space labels
-        return dict(cls=cls, bbox=bbox, ori_shape=ori_shape, imgsz=imgsz, ratio_pad=ratio_pad)
+        return {"cls": cls, "bbox": bbox, "ori_shape": ori_shape, "imgsz": imgsz, "ratio_pad": ratio_pad}

    def _prepare_pred(self, pred, pbatch):
        """Prepares a batch of images and annotations for validation."""
--- a/ultralytics/models/yolo/model.py
+++ b/ultralytics/models/yolo/model.py
@ -5,7 +5,7 @@ from pathlib import Path
 from ultralytics.engine.model import Model
 from ultralytics.models import yolo
 from ultralytics.nn.tasks import ClassificationModel, DetectionModel, OBBModel, PoseModel, SegmentationModel, WorldModel
-from ultralytics.utils import yaml_load, ROOT
+from ultralytics.utils import ROOT, yaml_load


 class YOLO(Model):
--- a/ultralytics/models/yolo/obb/val.py
+++ b/ultralytics/models/yolo/obb/val.py
@ -78,7 +78,7 @@ class OBBValidator(DetectionValidator):
        if len(cls):
            bbox[..., :4].mul_(torch.tensor(imgsz, device=self.device)[[1, 0, 1, 0]])  # target boxes
            ops.scale_boxes(imgsz, bbox, ori_shape, ratio_pad=ratio_pad, xywh=True)  # native-space labels
-        return dict(cls=cls, bbox=bbox, ori_shape=ori_shape, imgsz=imgsz, ratio_pad=ratio_pad)
+        return {"cls": cls, "bbox": bbox, "ori_shape": ori_shape, "imgsz": imgsz, "ratio_pad": ratio_pad}

    def _prepare_pred(self, pred, pbatch):
        """Prepares and returns a batch for OBB validation with scaled and padded bounding boxes."""
--- a/ultralytics/models/yolo/world/train_world.py
+++ b/ultralytics/models/yolo/world/train_world.py
@ -1,8 +1,8 @@
-from ultralytics.data import build_yolo_dataset, build_grounding, YOLOConcatDataset
+from ultralytics.data import YOLOConcatDataset, build_grounding, build_yolo_dataset
 from ultralytics.data.utils import check_det_dataset
 from ultralytics.models.yolo.world import WorldTrainer
-from ultralytics.utils.torch_utils import de_parallel
 from ultralytics.utils import DEFAULT_CFG
+from ultralytics.utils.torch_utils import de_parallel


 class WorldTrainerFromScratch(WorldTrainer):