ultralytics 8.3.38 SAM 2 video inference (#14851)
Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Signed-off-by: UltralyticsAssistant <web@ultralytics.com> Co-authored-by: UltralyticsAssistant <web@ultralytics.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: Ultralytics Assistant <135830346+UltralyticsAssistant@users.noreply.github.com>
This commit is contained in:
parent
407815cf9e
commit
dcc9bd536f
16 changed files with 917 additions and 124 deletions
|
|
@ -552,9 +552,8 @@ class v8PoseLoss(v8DetectionLoss):
|
|||
pred_kpts (torch.Tensor): Predicted keypoints, shape (BS, N_anchors, N_kpts_per_object, kpts_dim).
|
||||
|
||||
Returns:
|
||||
(tuple): Returns a tuple containing:
|
||||
- kpts_loss (torch.Tensor): The keypoints loss.
|
||||
- kpts_obj_loss (torch.Tensor): The keypoints object loss.
|
||||
kpts_loss (torch.Tensor): The keypoints loss.
|
||||
kpts_obj_loss (torch.Tensor): The keypoints object loss.
|
||||
"""
|
||||
batch_idx = batch_idx.flatten()
|
||||
batch_size = len(masks)
|
||||
|
|
|
|||
|
|
@ -549,19 +549,18 @@ def ap_per_class(
|
|||
prefix (str, optional): A prefix string for saving the plot files. Defaults to an empty string.
|
||||
|
||||
Returns:
|
||||
(tuple): A tuple of six arrays and one array of unique classes, where:
|
||||
tp (np.ndarray): True positive counts at threshold given by max F1 metric for each class.Shape: (nc,).
|
||||
fp (np.ndarray): False positive counts at threshold given by max F1 metric for each class. Shape: (nc,).
|
||||
p (np.ndarray): Precision values at threshold given by max F1 metric for each class. Shape: (nc,).
|
||||
r (np.ndarray): Recall values at threshold given by max F1 metric for each class. Shape: (nc,).
|
||||
f1 (np.ndarray): F1-score values at threshold given by max F1 metric for each class. Shape: (nc,).
|
||||
ap (np.ndarray): Average precision for each class at different IoU thresholds. Shape: (nc, 10).
|
||||
unique_classes (np.ndarray): An array of unique classes that have data. Shape: (nc,).
|
||||
p_curve (np.ndarray): Precision curves for each class. Shape: (nc, 1000).
|
||||
r_curve (np.ndarray): Recall curves for each class. Shape: (nc, 1000).
|
||||
f1_curve (np.ndarray): F1-score curves for each class. Shape: (nc, 1000).
|
||||
x (np.ndarray): X-axis values for the curves. Shape: (1000,).
|
||||
prec_values: Precision values at mAP@0.5 for each class. Shape: (nc, 1000).
|
||||
tp (np.ndarray): True positive counts at threshold given by max F1 metric for each class.Shape: (nc,).
|
||||
fp (np.ndarray): False positive counts at threshold given by max F1 metric for each class. Shape: (nc,).
|
||||
p (np.ndarray): Precision values at threshold given by max F1 metric for each class. Shape: (nc,).
|
||||
r (np.ndarray): Recall values at threshold given by max F1 metric for each class. Shape: (nc,).
|
||||
f1 (np.ndarray): F1-score values at threshold given by max F1 metric for each class. Shape: (nc,).
|
||||
ap (np.ndarray): Average precision for each class at different IoU thresholds. Shape: (nc, 10).
|
||||
unique_classes (np.ndarray): An array of unique classes that have data. Shape: (nc,).
|
||||
p_curve (np.ndarray): Precision curves for each class. Shape: (nc, 1000).
|
||||
r_curve (np.ndarray): Recall curves for each class. Shape: (nc, 1000).
|
||||
f1_curve (np.ndarray): F1-score curves for each class. Shape: (nc, 1000).
|
||||
x (np.ndarray): X-axis values for the curves. Shape: (1000,).
|
||||
prec_values (np.ndarray): Precision values at mAP@0.5 for each class. Shape: (nc, 1000).
|
||||
"""
|
||||
# Sort by objectness
|
||||
i = np.argsort(-conf)
|
||||
|
|
|
|||
|
|
@ -317,11 +317,11 @@ def clip_boxes(boxes, shape):
|
|||
Takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the shape.
|
||||
|
||||
Args:
|
||||
boxes (torch.Tensor): the bounding boxes to clip
|
||||
shape (tuple): the shape of the image
|
||||
boxes (torch.Tensor): The bounding boxes to clip.
|
||||
shape (tuple): The shape of the image.
|
||||
|
||||
Returns:
|
||||
(torch.Tensor | numpy.ndarray): Clipped boxes
|
||||
(torch.Tensor | numpy.ndarray): The clipped boxes.
|
||||
"""
|
||||
if isinstance(boxes, torch.Tensor): # faster individually (WARNING: inplace .clamp_() Apple MPS bug)
|
||||
boxes[..., 0] = boxes[..., 0].clamp(0, shape[1]) # x1
|
||||
|
|
@ -359,9 +359,9 @@ def scale_image(masks, im0_shape, ratio_pad=None):
|
|||
Takes a mask, and resizes it to the original image size.
|
||||
|
||||
Args:
|
||||
masks (np.ndarray): resized and padded masks/images, [h, w, num]/[h, w, 3].
|
||||
im0_shape (tuple): the original image shape
|
||||
ratio_pad (tuple): the ratio of the padding to the original image.
|
||||
masks (np.ndarray): Resized and padded masks/images, [h, w, num]/[h, w, 3].
|
||||
im0_shape (tuple): The original image shape.
|
||||
ratio_pad (tuple): The ratio of the padding to the original image.
|
||||
|
||||
Returns:
|
||||
masks (np.ndarray): The masks that are being returned with shape [h, w, num].
|
||||
|
|
@ -692,12 +692,12 @@ def process_mask_native(protos, masks_in, bboxes, shape):
|
|||
|
||||
Args:
|
||||
protos (torch.Tensor): [mask_dim, mask_h, mask_w]
|
||||
masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms
|
||||
bboxes (torch.Tensor): [n, 4], n is number of masks after nms
|
||||
shape (tuple): the size of the input image (h,w)
|
||||
masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms.
|
||||
bboxes (torch.Tensor): [n, 4], n is number of masks after nms.
|
||||
shape (tuple): The size of the input image (h,w).
|
||||
|
||||
Returns:
|
||||
masks (torch.Tensor): The returned masks with dimensions [h, w, n]
|
||||
masks (torch.Tensor): The returned masks with dimensions [h, w, n].
|
||||
"""
|
||||
c, mh, mw = protos.shape # CHW
|
||||
masks = (masks_in @ protos.float().view(c, -1)).view(-1, mh, mw)
|
||||
|
|
|
|||
|
|
@ -584,8 +584,8 @@ class Annotator:
|
|||
Displays queue counts on an image centered at the points with customizable font size and colors.
|
||||
|
||||
Args:
|
||||
label (str): queue counts label
|
||||
points (tuple): region points for center point calculation to display text
|
||||
label (str): Queue counts label.
|
||||
points (tuple): Region points for center point calculation to display text.
|
||||
region_color (tuple): RGB queue region color.
|
||||
txt_color (tuple): RGB text display color.
|
||||
"""
|
||||
|
|
@ -624,13 +624,13 @@ class Annotator:
|
|||
Display the bounding boxes labels in parking management app.
|
||||
|
||||
Args:
|
||||
im0 (ndarray): inference image
|
||||
text (str): object/class name
|
||||
txt_color (tuple): display color for text foreground
|
||||
bg_color (tuple): display color for text background
|
||||
x_center (float): x position center point for bounding box
|
||||
y_center (float): y position center point for bounding box
|
||||
margin (int): gap between text and rectangle for better display
|
||||
im0 (ndarray): Inference image.
|
||||
text (str): Object/class name.
|
||||
txt_color (tuple): Display color for text foreground.
|
||||
bg_color (tuple): Display color for text background.
|
||||
x_center (float): The x position center point for bounding box.
|
||||
y_center (float): The y position center point for bounding box.
|
||||
margin (int): The gap between text and rectangle for better display.
|
||||
"""
|
||||
text_size = cv2.getTextSize(text, 0, fontScale=self.sf, thickness=self.tf)[0]
|
||||
text_x = x_center - text_size[0] // 2
|
||||
|
|
@ -648,11 +648,11 @@ class Annotator:
|
|||
Display the overall statistics for parking lots.
|
||||
|
||||
Args:
|
||||
im0 (ndarray): inference image
|
||||
text (dict): labels dictionary
|
||||
txt_color (tuple): display color for text foreground
|
||||
bg_color (tuple): display color for text background
|
||||
margin (int): gap between text and rectangle for better display
|
||||
im0 (ndarray): Inference image.
|
||||
text (dict): Labels dictionary.
|
||||
txt_color (tuple): Display color for text foreground.
|
||||
bg_color (tuple): Display color for text background.
|
||||
margin (int): Gap between text and rectangle for better display.
|
||||
"""
|
||||
horizontal_gap = int(im0.shape[1] * 0.02)
|
||||
vertical_gap = int(im0.shape[0] * 0.01)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue