ultralytics 8.3.38 SAM 2 video inference (#14851)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com>
Signed-off-by: UltralyticsAssistant <web@ultralytics.com>
Co-authored-by: UltralyticsAssistant <web@ultralytics.com>
Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
Co-authored-by: Ultralytics Assistant <135830346+UltralyticsAssistant@users.noreply.github.com>
This commit is contained in:
Laughing 2024-11-26 19:38:23 +08:00 committed by GitHub
parent 407815cf9e
commit dcc9bd536f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 917 additions and 124 deletions

View file

@ -552,9 +552,8 @@ class v8PoseLoss(v8DetectionLoss):
pred_kpts (torch.Tensor): Predicted keypoints, shape (BS, N_anchors, N_kpts_per_object, kpts_dim).
Returns:
(tuple): Returns a tuple containing:
- kpts_loss (torch.Tensor): The keypoints loss.
- kpts_obj_loss (torch.Tensor): The keypoints object loss.
kpts_loss (torch.Tensor): The keypoints loss.
kpts_obj_loss (torch.Tensor): The keypoints object loss.
"""
batch_idx = batch_idx.flatten()
batch_size = len(masks)

View file

@ -549,19 +549,18 @@ def ap_per_class(
prefix (str, optional): A prefix string for saving the plot files. Defaults to an empty string.
Returns:
(tuple): A tuple of six arrays and one array of unique classes, where:
tp (np.ndarray): True positive counts at threshold given by max F1 metric for each class.Shape: (nc,).
fp (np.ndarray): False positive counts at threshold given by max F1 metric for each class. Shape: (nc,).
p (np.ndarray): Precision values at threshold given by max F1 metric for each class. Shape: (nc,).
r (np.ndarray): Recall values at threshold given by max F1 metric for each class. Shape: (nc,).
f1 (np.ndarray): F1-score values at threshold given by max F1 metric for each class. Shape: (nc,).
ap (np.ndarray): Average precision for each class at different IoU thresholds. Shape: (nc, 10).
unique_classes (np.ndarray): An array of unique classes that have data. Shape: (nc,).
p_curve (np.ndarray): Precision curves for each class. Shape: (nc, 1000).
r_curve (np.ndarray): Recall curves for each class. Shape: (nc, 1000).
f1_curve (np.ndarray): F1-score curves for each class. Shape: (nc, 1000).
x (np.ndarray): X-axis values for the curves. Shape: (1000,).
prec_values: Precision values at mAP@0.5 for each class. Shape: (nc, 1000).
tp (np.ndarray): True positive counts at threshold given by max F1 metric for each class.Shape: (nc,).
fp (np.ndarray): False positive counts at threshold given by max F1 metric for each class. Shape: (nc,).
p (np.ndarray): Precision values at threshold given by max F1 metric for each class. Shape: (nc,).
r (np.ndarray): Recall values at threshold given by max F1 metric for each class. Shape: (nc,).
f1 (np.ndarray): F1-score values at threshold given by max F1 metric for each class. Shape: (nc,).
ap (np.ndarray): Average precision for each class at different IoU thresholds. Shape: (nc, 10).
unique_classes (np.ndarray): An array of unique classes that have data. Shape: (nc,).
p_curve (np.ndarray): Precision curves for each class. Shape: (nc, 1000).
r_curve (np.ndarray): Recall curves for each class. Shape: (nc, 1000).
f1_curve (np.ndarray): F1-score curves for each class. Shape: (nc, 1000).
x (np.ndarray): X-axis values for the curves. Shape: (1000,).
prec_values (np.ndarray): Precision values at mAP@0.5 for each class. Shape: (nc, 1000).
"""
# Sort by objectness
i = np.argsort(-conf)

View file

@ -317,11 +317,11 @@ def clip_boxes(boxes, shape):
Takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the shape.
Args:
boxes (torch.Tensor): the bounding boxes to clip
shape (tuple): the shape of the image
boxes (torch.Tensor): The bounding boxes to clip.
shape (tuple): The shape of the image.
Returns:
(torch.Tensor | numpy.ndarray): Clipped boxes
(torch.Tensor | numpy.ndarray): The clipped boxes.
"""
if isinstance(boxes, torch.Tensor): # faster individually (WARNING: inplace .clamp_() Apple MPS bug)
boxes[..., 0] = boxes[..., 0].clamp(0, shape[1]) # x1
@ -359,9 +359,9 @@ def scale_image(masks, im0_shape, ratio_pad=None):
Takes a mask, and resizes it to the original image size.
Args:
masks (np.ndarray): resized and padded masks/images, [h, w, num]/[h, w, 3].
im0_shape (tuple): the original image shape
ratio_pad (tuple): the ratio of the padding to the original image.
masks (np.ndarray): Resized and padded masks/images, [h, w, num]/[h, w, 3].
im0_shape (tuple): The original image shape.
ratio_pad (tuple): The ratio of the padding to the original image.
Returns:
masks (np.ndarray): The masks that are being returned with shape [h, w, num].
@ -692,12 +692,12 @@ def process_mask_native(protos, masks_in, bboxes, shape):
Args:
protos (torch.Tensor): [mask_dim, mask_h, mask_w]
masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms
bboxes (torch.Tensor): [n, 4], n is number of masks after nms
shape (tuple): the size of the input image (h,w)
masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms.
bboxes (torch.Tensor): [n, 4], n is number of masks after nms.
shape (tuple): The size of the input image (h,w).
Returns:
masks (torch.Tensor): The returned masks with dimensions [h, w, n]
masks (torch.Tensor): The returned masks with dimensions [h, w, n].
"""
c, mh, mw = protos.shape # CHW
masks = (masks_in @ protos.float().view(c, -1)).view(-1, mh, mw)

View file

@ -584,8 +584,8 @@ class Annotator:
Displays queue counts on an image centered at the points with customizable font size and colors.
Args:
label (str): queue counts label
points (tuple): region points for center point calculation to display text
label (str): Queue counts label.
points (tuple): Region points for center point calculation to display text.
region_color (tuple): RGB queue region color.
txt_color (tuple): RGB text display color.
"""
@ -624,13 +624,13 @@ class Annotator:
Display the bounding boxes labels in parking management app.
Args:
im0 (ndarray): inference image
text (str): object/class name
txt_color (tuple): display color for text foreground
bg_color (tuple): display color for text background
x_center (float): x position center point for bounding box
y_center (float): y position center point for bounding box
margin (int): gap between text and rectangle for better display
im0 (ndarray): Inference image.
text (str): Object/class name.
txt_color (tuple): Display color for text foreground.
bg_color (tuple): Display color for text background.
x_center (float): The x position center point for bounding box.
y_center (float): The y position center point for bounding box.
margin (int): The gap between text and rectangle for better display.
"""
text_size = cv2.getTextSize(text, 0, fontScale=self.sf, thickness=self.tf)[0]
text_x = x_center - text_size[0] // 2
@ -648,11 +648,11 @@ class Annotator:
Display the overall statistics for parking lots.
Args:
im0 (ndarray): inference image
text (dict): labels dictionary
txt_color (tuple): display color for text foreground
bg_color (tuple): display color for text background
margin (int): gap between text and rectangle for better display
im0 (ndarray): Inference image.
text (dict): Labels dictionary.
txt_color (tuple): Display color for text foreground.
bg_color (tuple): Display color for text background.
margin (int): Gap between text and rectangle for better display.
"""
horizontal_gap = int(im0.shape[1] * 0.02)
vertical_gap = int(im0.shape[0] * 0.01)