ultralytics 8.1.44 add IS_RASPBERRYPI and constants refactor (#9827)
Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: UltralyticsAssistant <web@ultralytics.com>
This commit is contained in:
parent
3f34a7c3af
commit
7d891a4aa4
43 changed files with 146 additions and 141 deletions
|
|
@ -125,7 +125,7 @@ class RTDETRValidator(DetectionValidator):
|
|||
bbox = ops.xywh2xyxy(bbox) # target boxes
|
||||
bbox[..., [0, 2]] *= ori_shape[1] # native-space pred
|
||||
bbox[..., [1, 3]] *= ori_shape[0] # native-space pred
|
||||
return dict(cls=cls, bbox=bbox, ori_shape=ori_shape, imgsz=imgsz, ratio_pad=ratio_pad)
|
||||
return {"cls": cls, "bbox": bbox, "ori_shape": ori_shape, "imgsz": imgsz, "ratio_pad": ratio_pad}
|
||||
|
||||
def _prepare_pred(self, pred, pbatch):
|
||||
"""Prepares and returns a batch with transformed bounding boxes and class labels."""
|
||||
|
|
|
|||
|
|
@ -584,9 +584,9 @@ class TinyViT(nn.Module):
|
|||
img_size (int, optional): The input image size. Defaults to 224.
|
||||
in_chans (int, optional): Number of input channels. Defaults to 3.
|
||||
num_classes (int, optional): Number of classification classes. Defaults to 1000.
|
||||
embed_dims (List[int], optional): List of embedding dimensions for each layer. Defaults to [96, 192, 384, 768].
|
||||
embed_dims (List[int], optional): List of embedding dimensions per layer. Defaults to [96, 192, 384, 768].
|
||||
depths (List[int], optional): List of depths for each layer. Defaults to [2, 2, 6, 2].
|
||||
num_heads (List[int], optional): List of number of attention heads for each layer. Defaults to [3, 6, 12, 24].
|
||||
num_heads (List[int], optional): List of number of attention heads per layer. Defaults to [3, 6, 12, 24].
|
||||
window_sizes (List[int], optional): List of window sizes for each layer. Defaults to [7, 7, 14, 7].
|
||||
mlp_ratio (float, optional): Ratio of MLP hidden dimension to embedding dimension. Defaults to 4.
|
||||
drop_rate (float, optional): Dropout rate. Defaults to 0.
|
||||
|
|
|
|||
|
|
@ -222,7 +222,7 @@ class Attention(nn.Module):
|
|||
downsample_rate (int, optional): The factor by which the internal dimensions are downsampled. Defaults to 1.
|
||||
|
||||
Raises:
|
||||
AssertionError: If 'num_heads' does not evenly divide the internal dimension (embedding_dim / downsample_rate).
|
||||
AssertionError: If 'num_heads' does not evenly divide the internal dim (embedding_dim / downsample_rate).
|
||||
"""
|
||||
super().__init__()
|
||||
self.embedding_dim = embedding_dim
|
||||
|
|
|
|||
|
|
@ -127,10 +127,10 @@ class Predictor(BasePredictor):
|
|||
Args:
|
||||
im (torch.Tensor): The preprocessed input image in tensor format, with shape (N, C, H, W).
|
||||
bboxes (np.ndarray | List, optional): Bounding boxes with shape (N, 4), in XYXY format.
|
||||
points (np.ndarray | List, optional): Points indicating object locations with shape (N, 2), in pixel coordinates.
|
||||
labels (np.ndarray | List, optional): Labels for point prompts, shape (N, ). 1 for foreground and 0 for background.
|
||||
masks (np.ndarray, optional): Low-resolution masks from previous predictions. Shape should be (N, H, W). For SAM, H=W=256.
|
||||
multimask_output (bool, optional): Flag to return multiple masks. Helpful for ambiguous prompts. Defaults to False.
|
||||
points (np.ndarray | List, optional): Points indicating object locations with shape (N, 2), in pixels.
|
||||
labels (np.ndarray | List, optional): Labels for point prompts, shape (N, ). 1 = foreground, 0 = background.
|
||||
masks (np.ndarray, optional): Low-resolution masks from previous predictions shape (N,H,W). For SAM H=W=256.
|
||||
multimask_output (bool, optional): Flag to return multiple masks. Helpful for ambiguous prompts.
|
||||
|
||||
Returns:
|
||||
(tuple): Contains the following three elements.
|
||||
|
|
@ -156,10 +156,10 @@ class Predictor(BasePredictor):
|
|||
Args:
|
||||
im (torch.Tensor): The preprocessed input image in tensor format, with shape (N, C, H, W).
|
||||
bboxes (np.ndarray | List, optional): Bounding boxes with shape (N, 4), in XYXY format.
|
||||
points (np.ndarray | List, optional): Points indicating object locations with shape (N, 2), in pixel coordinates.
|
||||
labels (np.ndarray | List, optional): Labels for point prompts, shape (N, ). 1 for foreground and 0 for background.
|
||||
masks (np.ndarray, optional): Low-resolution masks from previous predictions. Shape should be (N, H, W). For SAM, H=W=256.
|
||||
multimask_output (bool, optional): Flag to return multiple masks. Helpful for ambiguous prompts. Defaults to False.
|
||||
points (np.ndarray | List, optional): Points indicating object locations with shape (N, 2), in pixels.
|
||||
labels (np.ndarray | List, optional): Labels for point prompts, shape (N, ). 1 = foreground, 0 = background.
|
||||
masks (np.ndarray, optional): Low-resolution masks from previous predictions shape (N,H,W). For SAM H=W=256.
|
||||
multimask_output (bool, optional): Flag to return multiple masks. Helpful for ambiguous prompts.
|
||||
|
||||
Returns:
|
||||
(tuple): Contains the following three elements.
|
||||
|
|
@ -230,7 +230,7 @@ class Predictor(BasePredictor):
|
|||
im (torch.Tensor): Input tensor representing the preprocessed image with dimensions (N, C, H, W).
|
||||
crop_n_layers (int): Specifies the number of layers for additional mask predictions on image crops.
|
||||
Each layer produces 2**i_layer number of image crops.
|
||||
crop_overlap_ratio (float): Determines the extent of overlap between crops. Scaled down in subsequent layers.
|
||||
crop_overlap_ratio (float): Determines the overlap between crops. Scaled down in subsequent layers.
|
||||
crop_downscale_factor (int): Scaling factor for the number of sampled points-per-side in each layer.
|
||||
point_grids (list[np.ndarray], optional): Custom grids for point sampling normalized to [0,1].
|
||||
Used in the nth crop layer.
|
||||
|
|
@ -240,7 +240,7 @@ class Predictor(BasePredictor):
|
|||
conf_thres (float): Confidence threshold [0,1] for filtering based on the model's mask quality prediction.
|
||||
stability_score_thresh (float): Stability threshold [0,1] for mask filtering based on mask stability.
|
||||
stability_score_offset (float): Offset value for calculating stability score.
|
||||
crop_nms_thresh (float): IoU cutoff for Non-Maximum Suppression (NMS) to remove duplicate masks between crops.
|
||||
crop_nms_thresh (float): IoU cutoff for NMS to remove duplicate masks between crops.
|
||||
|
||||
Returns:
|
||||
(tuple): A tuple containing segmented masks, confidence scores, and bounding boxes.
|
||||
|
|
@ -351,8 +351,8 @@ class Predictor(BasePredictor):
|
|||
"""
|
||||
Post-processes SAM's inference outputs to generate object detection masks and bounding boxes.
|
||||
|
||||
The method scales masks and boxes to the original image size and applies a threshold to the mask predictions. The
|
||||
SAM model uses advanced architecture and promptable segmentation tasks to achieve real-time performance.
|
||||
The method scales masks and boxes to the original image size and applies a threshold to the mask predictions.
|
||||
The SAM model uses advanced architecture and promptable segmentation tasks to achieve real-time performance.
|
||||
|
||||
Args:
|
||||
preds (tuple): The output from SAM model inference, containing masks, scores, and optional bounding boxes.
|
||||
|
|
|
|||
|
|
@ -106,7 +106,7 @@ class DetectionValidator(BaseValidator):
|
|||
if len(cls):
|
||||
bbox = ops.xywh2xyxy(bbox) * torch.tensor(imgsz, device=self.device)[[1, 0, 1, 0]] # target boxes
|
||||
ops.scale_boxes(imgsz, bbox, ori_shape, ratio_pad=ratio_pad) # native-space labels
|
||||
return dict(cls=cls, bbox=bbox, ori_shape=ori_shape, imgsz=imgsz, ratio_pad=ratio_pad)
|
||||
return {"cls": cls, "bbox": bbox, "ori_shape": ori_shape, "imgsz": imgsz, "ratio_pad": ratio_pad}
|
||||
|
||||
def _prepare_pred(self, pred, pbatch):
|
||||
"""Prepares a batch of images and annotations for validation."""
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ from pathlib import Path
|
|||
from ultralytics.engine.model import Model
|
||||
from ultralytics.models import yolo
|
||||
from ultralytics.nn.tasks import ClassificationModel, DetectionModel, OBBModel, PoseModel, SegmentationModel, WorldModel
|
||||
from ultralytics.utils import yaml_load, ROOT
|
||||
from ultralytics.utils import ROOT, yaml_load
|
||||
|
||||
|
||||
class YOLO(Model):
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ class OBBValidator(DetectionValidator):
|
|||
if len(cls):
|
||||
bbox[..., :4].mul_(torch.tensor(imgsz, device=self.device)[[1, 0, 1, 0]]) # target boxes
|
||||
ops.scale_boxes(imgsz, bbox, ori_shape, ratio_pad=ratio_pad, xywh=True) # native-space labels
|
||||
return dict(cls=cls, bbox=bbox, ori_shape=ori_shape, imgsz=imgsz, ratio_pad=ratio_pad)
|
||||
return {"cls": cls, "bbox": bbox, "ori_shape": ori_shape, "imgsz": imgsz, "ratio_pad": ratio_pad}
|
||||
|
||||
def _prepare_pred(self, pred, pbatch):
|
||||
"""Prepares and returns a batch for OBB validation with scaled and padded bounding boxes."""
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
from ultralytics.data import build_yolo_dataset, build_grounding, YOLOConcatDataset
|
||||
from ultralytics.data import YOLOConcatDataset, build_grounding, build_yolo_dataset
|
||||
from ultralytics.data.utils import check_det_dataset
|
||||
from ultralytics.models.yolo.world import WorldTrainer
|
||||
from ultralytics.utils.torch_utils import de_parallel
|
||||
from ultralytics.utils import DEFAULT_CFG
|
||||
from ultralytics.utils.torch_utils import de_parallel
|
||||
|
||||
|
||||
class WorldTrainerFromScratch(WorldTrainer):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue