PyCharm Code and Docs Inspect fixes v1 (#18461)
Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: UltralyticsAssistant <web@ultralytics.com> Co-authored-by: Ultralytics Assistant <135830346+UltralyticsAssistant@users.noreply.github.com> Co-authored-by: Laughing <61612323+Laughing-q@users.noreply.github.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
This commit is contained in:
parent
126867e355
commit
7f1a50e893
26 changed files with 90 additions and 91 deletions
|
|
@ -642,7 +642,7 @@ class Mosaic(BaseMixTransform):
|
|||
c = s - w, s + h0 - h, s, s + h0
|
||||
|
||||
padw, padh = c[:2]
|
||||
x1, y1, x2, y2 = (max(x, 0) for x in c) # allocate coords
|
||||
x1, y1, x2, y2 = (max(x, 0) for x in c) # allocate coordinates
|
||||
|
||||
img3[y1:y2, x1:x2] = img[y1 - padh :, x1 - padw :] # img3[ymin:ymax, xmin:xmax]
|
||||
# hp, wp = h, w # height, width previous for next iteration
|
||||
|
|
@ -771,7 +771,7 @@ class Mosaic(BaseMixTransform):
|
|||
c = s - w, s + h0 - hp - h, s, s + h0 - hp
|
||||
|
||||
padw, padh = c[:2]
|
||||
x1, y1, x2, y2 = (max(x, 0) for x in c) # allocate coords
|
||||
x1, y1, x2, y2 = (max(x, 0) for x in c) # allocate coordinates
|
||||
|
||||
# Image
|
||||
img9[y1:y2, x1:x2] = img[y1 - padh :, x1 - padw :] # img9[ymin:ymax, xmin:xmax]
|
||||
|
|
@ -1283,7 +1283,7 @@ class RandomPerspective:
|
|||
eps (float): Small epsilon value to prevent division by zero.
|
||||
|
||||
Returns:
|
||||
(numpy.ndarray): Boolean array of shape (n,) indicating which boxes are candidates.
|
||||
(numpy.ndarray): Boolean array of shape (n) indicating which boxes are candidates.
|
||||
True values correspond to boxes that meet all criteria.
|
||||
|
||||
Examples:
|
||||
|
|
@ -1320,7 +1320,7 @@ class RandomHSV:
|
|||
>>> augmenter = RandomHSV(hgain=0.5, sgain=0.5, vgain=0.5)
|
||||
>>> image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
|
||||
>>> labels = {"img": image}
|
||||
>>> augmented_labels = augmenter(labels)
|
||||
>>> augmenter(labels)
|
||||
>>> augmented_image = augmented_labels["img"]
|
||||
"""
|
||||
|
||||
|
|
@ -1337,7 +1337,7 @@ class RandomHSV:
|
|||
|
||||
Examples:
|
||||
>>> hsv_aug = RandomHSV(hgain=0.5, sgain=0.5, vgain=0.5)
|
||||
>>> augmented_image = hsv_aug(image)
|
||||
>>> hsv_aug(image)
|
||||
"""
|
||||
self.hgain = hgain
|
||||
self.sgain = sgain
|
||||
|
|
@ -1419,7 +1419,7 @@ class RandomFlip:
|
|||
|
||||
Examples:
|
||||
>>> flip = RandomFlip(p=0.5, direction="horizontal")
|
||||
>>> flip = RandomFlip(p=0.7, direction="vertical", flip_idx=[1, 0, 3, 2, 5, 4])
|
||||
>>> flip_with_idx = RandomFlip(p=0.7, direction="vertical", flip_idx=[1, 0, 3, 2, 5, 4])
|
||||
"""
|
||||
assert direction in {"horizontal", "vertical"}, f"Support direction `horizontal` or `vertical`, got {direction}"
|
||||
assert 0 <= p <= 1.0, f"The probability should be in range [0, 1], but got {p}."
|
||||
|
|
@ -2022,7 +2022,7 @@ class Format:
|
|||
Returns:
|
||||
(Dict): A dictionary with formatted data, including:
|
||||
- 'img': Formatted image tensor.
|
||||
- 'cls': Class labels tensor.
|
||||
- 'cls': Class label's tensor.
|
||||
- 'bboxes': Bounding boxes tensor in the specified format.
|
||||
- 'masks': Instance masks tensor (if return_mask is True).
|
||||
- 'keypoints': Keypoints tensor (if return_keypoint is True).
|
||||
|
|
|
|||
|
|
@ -241,7 +241,7 @@ def convert_coco(
|
|||
```python
|
||||
from ultralytics.data.converter import convert_coco
|
||||
|
||||
convert_coco("../datasets/coco/annotations/", use_segments=True, use_keypoints=False, cls91to80=True)
|
||||
convert_coco("../datasets/coco/annotations/", use_segments=True, use_keypoints=False, cls91to80=False)
|
||||
convert_coco("../datasets/lvis/annotations/", use_segments=True, use_keypoints=False, cls91to80=False, lvis=True)
|
||||
```
|
||||
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ def load_yolo_dota(data_root, split="train"):
|
|||
|
||||
Args:
|
||||
data_root (str): Data root.
|
||||
split (str): The split data set, could be train or val.
|
||||
split (str): The split data set, could be `train` or `val`.
|
||||
|
||||
Notes:
|
||||
The directory structure assumed for the DOTA dataset:
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ def build_all_layer_point_grids(n_per_side: int, n_layers: int, scale_per_layer:
|
|||
def generate_crop_boxes(
|
||||
im_size: Tuple[int, ...], n_layers: int, overlap_ratio: float
|
||||
) -> Tuple[List[List[int]], List[int]]:
|
||||
"""Generates crop boxes of varying sizes for multi-scale image processing, with layered overlapping regions."""
|
||||
"""Generates crop boxes of varying sizes for multiscale image processing, with layered overlapping regions."""
|
||||
crop_boxes, layer_idxs = [], []
|
||||
im_h, im_w = im_size
|
||||
short_side = min(im_h, im_w)
|
||||
|
|
|
|||
|
|
@ -502,11 +502,11 @@ def do_pool(x: torch.Tensor, pool: nn.Module, norm: nn.Module = None) -> torch.T
|
|||
|
||||
class MultiScaleAttention(nn.Module):
|
||||
"""
|
||||
Implements multi-scale self-attention with optional query pooling for efficient feature extraction.
|
||||
Implements multiscale self-attention with optional query pooling for efficient feature extraction.
|
||||
|
||||
This class provides a flexible implementation of multi-scale attention, allowing for optional
|
||||
This class provides a flexible implementation of multiscale attention, allowing for optional
|
||||
downsampling of query features through pooling. It's designed to enhance the model's ability to
|
||||
capture multi-scale information in visual tasks.
|
||||
capture multiscale information in visual tasks.
|
||||
|
||||
Attributes:
|
||||
dim (int): Input dimension of the feature map.
|
||||
|
|
@ -518,7 +518,7 @@ class MultiScaleAttention(nn.Module):
|
|||
proj (nn.Linear): Output projection.
|
||||
|
||||
Methods:
|
||||
forward: Applies multi-scale attention to the input tensor.
|
||||
forward: Applies multiscale attention to the input tensor.
|
||||
|
||||
Examples:
|
||||
>>> import torch
|
||||
|
|
@ -537,7 +537,7 @@ class MultiScaleAttention(nn.Module):
|
|||
num_heads: int,
|
||||
q_pool: nn.Module = None,
|
||||
):
|
||||
"""Initializes multi-scale attention with optional query pooling for efficient feature extraction."""
|
||||
"""Initializes multiscale attention with optional query pooling for efficient feature extraction."""
|
||||
super().__init__()
|
||||
|
||||
self.dim = dim
|
||||
|
|
@ -552,7 +552,7 @@ class MultiScaleAttention(nn.Module):
|
|||
self.proj = nn.Linear(dim_out, dim_out)
|
||||
|
||||
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||
"""Applies multi-scale attention with optional query pooling to extract multi-scale features."""
|
||||
"""Applies multiscale attention with optional query pooling to extract multiscale features."""
|
||||
B, H, W, _ = x.shape
|
||||
# qkv with shape (B, H * W, 3, nHead, C)
|
||||
qkv = self.qkv(x).reshape(B, H * W, 3, self.num_heads, -1)
|
||||
|
|
@ -582,9 +582,9 @@ class MultiScaleAttention(nn.Module):
|
|||
|
||||
class MultiScaleBlock(nn.Module):
|
||||
"""
|
||||
A multi-scale attention block with window partitioning and query pooling for efficient vision transformers.
|
||||
A multiscale attention block with window partitioning and query pooling for efficient vision transformers.
|
||||
|
||||
This class implements a multi-scale attention mechanism with optional window partitioning and downsampling,
|
||||
This class implements a multiscale attention mechanism with optional window partitioning and downsampling,
|
||||
designed for use in vision transformer architectures.
|
||||
|
||||
Attributes:
|
||||
|
|
@ -601,7 +601,7 @@ class MultiScaleBlock(nn.Module):
|
|||
proj (nn.Linear | None): Projection layer for dimension mismatch.
|
||||
|
||||
Methods:
|
||||
forward: Processes input tensor through the multi-scale block.
|
||||
forward: Processes input tensor through the multiscale block.
|
||||
|
||||
Examples:
|
||||
>>> block = MultiScaleBlock(dim=256, dim_out=512, num_heads=8, window_size=7)
|
||||
|
|
@ -623,7 +623,7 @@ class MultiScaleBlock(nn.Module):
|
|||
act_layer: nn.Module = nn.GELU,
|
||||
window_size: int = 0,
|
||||
):
|
||||
"""Initializes a multi-scale attention block with window partitioning and optional query pooling."""
|
||||
"""Initializes a multiscale attention block with window partitioning and optional query pooling."""
|
||||
super().__init__()
|
||||
|
||||
if isinstance(norm_layer, str):
|
||||
|
|
@ -660,7 +660,7 @@ class MultiScaleBlock(nn.Module):
|
|||
self.proj = nn.Linear(dim, dim_out)
|
||||
|
||||
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||
"""Processes input through multi-scale attention and MLP, with optional windowing and downsampling."""
|
||||
"""Processes input through multiscale attention and MLP, with optional windowing and downsampling."""
|
||||
shortcut = x # B, H, W, C
|
||||
x = self.norm1(x)
|
||||
|
||||
|
|
|
|||
|
|
@ -425,7 +425,7 @@ class SAM2Model(torch.nn.Module):
|
|||
low_res_masks: Tensor of shape (B, 1, H*4, W*4) with the best low-resolution mask.
|
||||
high_res_masks: Tensor of shape (B, 1, H*16, W*16) with the best high-resolution mask.
|
||||
obj_ptr: Tensor of shape (B, C) with object pointer vector for the output mask.
|
||||
object_score_logits: Tensor of shape (B,) with object score logits.
|
||||
object_score_logits: Tensor of shape (B) with object score logits.
|
||||
|
||||
Where M is 3 if multimask_output=True, and 1 if multimask_output=False.
|
||||
|
||||
|
|
@ -643,7 +643,7 @@ class SAM2Model(torch.nn.Module):
|
|||
if not is_init_cond_frame:
|
||||
# Retrieve the memories encoded with the maskmem backbone
|
||||
to_cat_memory, to_cat_memory_pos_embed = [], []
|
||||
# Add conditioning frames's output first (all cond frames have t_pos=0 for
|
||||
# Add conditioning frame's output first (all cond frames have t_pos=0 for
|
||||
# when getting temporal positional embedding below)
|
||||
assert len(output_dict["cond_frame_outputs"]) > 0
|
||||
# Select a maximum number of temporally closest cond frames for cross attention
|
||||
|
|
|
|||
|
|
@ -1096,7 +1096,7 @@ class SAM2VideoPredictor(SAM2Predictor):
|
|||
# to `propagate_in_video_preflight`).
|
||||
consolidated_frame_inds = self.inference_state["consolidated_frame_inds"]
|
||||
for is_cond in {False, True}:
|
||||
# Separately consolidate conditioning and non-conditioning temp outptus
|
||||
# Separately consolidate conditioning and non-conditioning temp outputs
|
||||
storage_key = "cond_frame_outputs" if is_cond else "non_cond_frame_outputs"
|
||||
# Find all the frames that contain temporary outputs for any objects
|
||||
# (these should be the frames that have just received clicks for mask inputs
|
||||
|
|
@ -1161,36 +1161,35 @@ class SAM2VideoPredictor(SAM2Predictor):
|
|||
assert predictor.dataset is not None
|
||||
assert predictor.dataset.mode == "video"
|
||||
|
||||
inference_state = {}
|
||||
inference_state["num_frames"] = predictor.dataset.frames
|
||||
# inputs on each frame
|
||||
inference_state["point_inputs_per_obj"] = {}
|
||||
inference_state["mask_inputs_per_obj"] = {}
|
||||
# values that don't change across frames (so we only need to hold one copy of them)
|
||||
inference_state["constants"] = {}
|
||||
# mapping between client-side object id and model-side object index
|
||||
inference_state["obj_id_to_idx"] = OrderedDict()
|
||||
inference_state["obj_idx_to_id"] = OrderedDict()
|
||||
inference_state["obj_ids"] = []
|
||||
# A storage to hold the model's tracking results and states on each frame
|
||||
inference_state["output_dict"] = {
|
||||
"cond_frame_outputs": {}, # dict containing {frame_idx: <out>}
|
||||
"non_cond_frame_outputs": {}, # dict containing {frame_idx: <out>}
|
||||
inference_state = {
|
||||
"num_frames": predictor.dataset.frames,
|
||||
"point_inputs_per_obj": {}, # inputs points on each frame
|
||||
"mask_inputs_per_obj": {}, # inputs mask on each frame
|
||||
"constants": {}, # values that don't change across frames (so we only need to hold one copy of them)
|
||||
# mapping between client-side object id and model-side object index
|
||||
"obj_id_to_idx": OrderedDict(),
|
||||
"obj_idx_to_id": OrderedDict(),
|
||||
"obj_ids": [],
|
||||
# A storage to hold the model's tracking results and states on each frame
|
||||
"output_dict": {
|
||||
"cond_frame_outputs": {}, # dict containing {frame_idx: <out>}
|
||||
"non_cond_frame_outputs": {}, # dict containing {frame_idx: <out>}
|
||||
},
|
||||
# Slice (view) of each object tracking results, sharing the same memory with "output_dict"
|
||||
"output_dict_per_obj": {},
|
||||
# A temporary storage to hold new outputs when user interact with a frame
|
||||
# to add clicks or mask (it's merged into "output_dict" before propagation starts)
|
||||
"temp_output_dict_per_obj": {},
|
||||
# Frames that already holds consolidated outputs from click or mask inputs
|
||||
# (we directly use their consolidated outputs during tracking)
|
||||
"consolidated_frame_inds": {
|
||||
"cond_frame_outputs": set(), # set containing frame indices
|
||||
"non_cond_frame_outputs": set(), # set containing frame indices
|
||||
},
|
||||
# metadata for each tracking frame (e.g. which direction it's tracked)
|
||||
"tracking_has_started": False,
|
||||
"frames_already_tracked": [],
|
||||
}
|
||||
# Slice (view) of each object tracking results, sharing the same memory with "output_dict"
|
||||
inference_state["output_dict_per_obj"] = {}
|
||||
# A temporary storage to hold new outputs when user interact with a frame
|
||||
# to add clicks or mask (it's merged into "output_dict" before propagation starts)
|
||||
inference_state["temp_output_dict_per_obj"] = {}
|
||||
# Frames that already holds consolidated outputs from click or mask inputs
|
||||
# (we directly use their consolidated outputs during tracking)
|
||||
inference_state["consolidated_frame_inds"] = {
|
||||
"cond_frame_outputs": set(), # set containing frame indices
|
||||
"non_cond_frame_outputs": set(), # set containing frame indices
|
||||
}
|
||||
# metadata for each tracking frame (e.g. which direction it's tracked)
|
||||
inference_state["tracking_has_started"] = False
|
||||
inference_state["frames_already_tracked"] = []
|
||||
predictor.inference_state = inference_state
|
||||
|
||||
def get_im_features(self, im, batch=1):
|
||||
|
|
|
|||
|
|
@ -26,9 +26,9 @@ class GMC:
|
|||
Methods:
|
||||
__init__: Initializes a GMC object with the specified method and downscale factor.
|
||||
apply: Applies the chosen method to a raw frame and optionally uses provided detections.
|
||||
applyEcc: Applies the ECC algorithm to a raw frame.
|
||||
applyFeatures: Applies feature-based methods like ORB or SIFT to a raw frame.
|
||||
applySparseOptFlow: Applies the Sparse Optical Flow method to a raw frame.
|
||||
apply_ecc: Applies the ECC algorithm to a raw frame.
|
||||
apply_features: Applies feature-based methods like ORB or SIFT to a raw frame.
|
||||
apply_sparseoptflow: Applies the Sparse Optical Flow method to a raw frame.
|
||||
reset_params: Resets the internal parameters of the GMC object.
|
||||
|
||||
Examples:
|
||||
|
|
@ -108,15 +108,15 @@ class GMC:
|
|||
(480, 640, 3)
|
||||
"""
|
||||
if self.method in {"orb", "sift"}:
|
||||
return self.applyFeatures(raw_frame, detections)
|
||||
return self.apply_features(raw_frame, detections)
|
||||
elif self.method == "ecc":
|
||||
return self.applyEcc(raw_frame)
|
||||
return self.apply_ecc(raw_frame)
|
||||
elif self.method == "sparseOptFlow":
|
||||
return self.applySparseOptFlow(raw_frame)
|
||||
return self.apply_sparseoptflow(raw_frame)
|
||||
else:
|
||||
return np.eye(2, 3)
|
||||
|
||||
def applyEcc(self, raw_frame: np.array) -> np.array:
|
||||
def apply_ecc(self, raw_frame: np.array) -> np.array:
|
||||
"""
|
||||
Apply the ECC (Enhanced Correlation Coefficient) algorithm to a raw frame for motion compensation.
|
||||
|
||||
|
|
@ -128,7 +128,7 @@ class GMC:
|
|||
|
||||
Examples:
|
||||
>>> gmc = GMC(method="ecc")
|
||||
>>> processed_frame = gmc.applyEcc(np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]))
|
||||
>>> processed_frame = gmc.apply_ecc(np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]))
|
||||
>>> print(processed_frame)
|
||||
[[1. 0. 0.]
|
||||
[0. 1. 0.]]
|
||||
|
|
@ -161,7 +161,7 @@ class GMC:
|
|||
|
||||
return H
|
||||
|
||||
def applyFeatures(self, raw_frame: np.array, detections: list = None) -> np.array:
|
||||
def apply_features(self, raw_frame: np.array, detections: list = None) -> np.array:
|
||||
"""
|
||||
Apply feature-based methods like ORB or SIFT to a raw frame.
|
||||
|
||||
|
|
@ -175,7 +175,7 @@ class GMC:
|
|||
Examples:
|
||||
>>> gmc = GMC(method="orb")
|
||||
>>> raw_frame = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
|
||||
>>> processed_frame = gmc.applyFeatures(raw_frame)
|
||||
>>> processed_frame = gmc.apply_features(raw_frame)
|
||||
>>> print(processed_frame.shape)
|
||||
(2, 3)
|
||||
"""
|
||||
|
|
@ -304,7 +304,7 @@ class GMC:
|
|||
|
||||
return H
|
||||
|
||||
def applySparseOptFlow(self, raw_frame: np.array) -> np.array:
|
||||
def apply_sparseoptflow(self, raw_frame: np.array) -> np.array:
|
||||
"""
|
||||
Apply Sparse Optical Flow method to a raw frame.
|
||||
|
||||
|
|
@ -316,7 +316,7 @@ class GMC:
|
|||
|
||||
Examples:
|
||||
>>> gmc = GMC()
|
||||
>>> result = gmc.applySparseOptFlow(np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]))
|
||||
>>> result = gmc.apply_sparseoptflow(np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]))
|
||||
>>> print(result)
|
||||
[[1. 0. 0.]
|
||||
[0. 1. 0.]]
|
||||
|
|
|
|||
|
|
@ -270,7 +270,7 @@ def batch_probiou(obb1, obb2, eps=1e-7):
|
|||
return 1 - hd
|
||||
|
||||
|
||||
def smooth_BCE(eps=0.1):
|
||||
def smooth_bce(eps=0.1):
|
||||
"""
|
||||
Computes smoothed positive and negative Binary Cross-Entropy targets.
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue