Add docformatter to pre-commit (#5279)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Burhan <62214284+Burhan-Q@users.noreply.github.com>
This commit is contained in:
parent
c7aa83da31
commit
7517667a33
90 changed files with 1396 additions and 497 deletions
|
|
@ -20,16 +20,30 @@ from .utils import polygons2masks, polygons2masks_overlap
|
|||
|
||||
# TODO: we might need a BaseTransform to make all these augments be compatible with both classification and semantic
|
||||
class BaseTransform:
|
||||
"""
|
||||
Base class for image transformations.
|
||||
|
||||
This is a generic transformation class that can be extended for specific image processing needs.
|
||||
The class is designed to be compatible with both classification and semantic segmentation tasks.
|
||||
|
||||
Methods:
|
||||
__init__: Initializes the BaseTransform object.
|
||||
apply_image: Applies image transformation to labels.
|
||||
apply_instances: Applies transformations to object instances in labels.
|
||||
apply_semantic: Applies semantic segmentation to an image.
|
||||
__call__: Applies all label transformations to an image, instances, and semantic masks.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initializes the BaseTransform object."""
|
||||
pass
|
||||
|
||||
def apply_image(self, labels):
|
||||
"""Applies image transformation to labels."""
|
||||
"""Applies image transformations to labels."""
|
||||
pass
|
||||
|
||||
def apply_instances(self, labels):
|
||||
"""Applies transformations to input 'labels' and returns object instances."""
|
||||
"""Applies transformations to object instances in labels."""
|
||||
pass
|
||||
|
||||
def apply_semantic(self, labels):
|
||||
|
|
@ -37,13 +51,14 @@ class BaseTransform:
|
|||
pass
|
||||
|
||||
def __call__(self, labels):
|
||||
"""Applies label transformations to an image, instances and semantic masks."""
|
||||
"""Applies all label transformations to an image, instances, and semantic masks."""
|
||||
self.apply_image(labels)
|
||||
self.apply_instances(labels)
|
||||
self.apply_semantic(labels)
|
||||
|
||||
|
||||
class Compose:
|
||||
"""Class for composing multiple image transformations."""
|
||||
|
||||
def __init__(self, transforms):
|
||||
"""Initializes the Compose object with a list of transforms."""
|
||||
|
|
@ -60,18 +75,23 @@ class Compose:
|
|||
self.transforms.append(transform)
|
||||
|
||||
def tolist(self):
|
||||
"""Converts list of transforms to a standard Python list."""
|
||||
"""Converts the list of transforms to a standard Python list."""
|
||||
return self.transforms
|
||||
|
||||
def __repr__(self):
|
||||
"""Return string representation of object."""
|
||||
"""Returns a string representation of the object."""
|
||||
return f"{self.__class__.__name__}({', '.join([f'{t}' for t in self.transforms])})"
|
||||
|
||||
|
||||
class BaseMixTransform:
|
||||
"""This implementation is from mmyolo."""
|
||||
"""
|
||||
Class for base mix (MixUp/Mosaic) transformations.
|
||||
|
||||
This implementation is from mmyolo.
|
||||
"""
|
||||
|
||||
def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
|
||||
"""Initializes the BaseMixTransform object with dataset, pre_transform, and probability."""
|
||||
self.dataset = dataset
|
||||
self.pre_transform = pre_transform
|
||||
self.p = p
|
||||
|
|
@ -262,8 +282,10 @@ class Mosaic(BaseMixTransform):
|
|||
|
||||
|
||||
class MixUp(BaseMixTransform):
|
||||
"""Class for applying MixUp augmentation to the dataset."""
|
||||
|
||||
def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
|
||||
"""Initializes MixUp object with dataset, pre_transform, and probability of applying MixUp."""
|
||||
super().__init__(dataset=dataset, pre_transform=pre_transform, p=p)
|
||||
|
||||
def get_indexes(self):
|
||||
|
|
@ -271,7 +293,7 @@ class MixUp(BaseMixTransform):
|
|||
return random.randint(0, len(self.dataset) - 1)
|
||||
|
||||
def _mix_transform(self, labels):
|
||||
"""Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf."""
|
||||
"""Applies MixUp augmentation as per https://arxiv.org/pdf/1710.09412.pdf."""
|
||||
r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
|
||||
labels2 = labels['mix_labels'][0]
|
||||
labels['img'] = (labels['img'] * r + labels2['img'] * (1 - r)).astype(np.uint8)
|
||||
|
|
@ -281,6 +303,28 @@ class MixUp(BaseMixTransform):
|
|||
|
||||
|
||||
class RandomPerspective:
|
||||
"""
|
||||
Implements random perspective and affine transformations on images and corresponding bounding boxes, segments, and
|
||||
keypoints. These transformations include rotation, translation, scaling, and shearing. The class also offers the
|
||||
option to apply these transformations conditionally with a specified probability.
|
||||
|
||||
Attributes:
|
||||
degrees (float): Degree range for random rotations.
|
||||
translate (float): Fraction of total width and height for random translation.
|
||||
scale (float): Scaling factor interval, e.g., a scale factor of 0.1 allows a resize between 90%-110%.
|
||||
shear (float): Shear intensity (angle in degrees).
|
||||
perspective (float): Perspective distortion factor.
|
||||
border (tuple): Tuple specifying mosaic border.
|
||||
pre_transform (callable): A function/transform to apply to the image before starting the random transformation.
|
||||
|
||||
Methods:
|
||||
affine_transform(img, border): Applies a series of affine transformations to the image.
|
||||
apply_bboxes(bboxes, M): Transforms bounding boxes using the calculated affine matrix.
|
||||
apply_segments(segments, M): Transforms segments and generates new bounding boxes.
|
||||
apply_keypoints(keypoints, M): Transforms keypoints.
|
||||
__call__(labels): Main method to apply transformations to both images and their corresponding annotations.
|
||||
box_candidates(box1, box2): Filters out bounding boxes that don't meet certain criteria post-transformation.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
degrees=0.0,
|
||||
|
|
@ -290,17 +334,31 @@ class RandomPerspective:
|
|||
perspective=0.0,
|
||||
border=(0, 0),
|
||||
pre_transform=None):
|
||||
"""Initializes RandomPerspective object with transformation parameters."""
|
||||
|
||||
self.degrees = degrees
|
||||
self.translate = translate
|
||||
self.scale = scale
|
||||
self.shear = shear
|
||||
self.perspective = perspective
|
||||
# Mosaic border
|
||||
self.border = border
|
||||
self.border = border # mosaic border
|
||||
self.pre_transform = pre_transform
|
||||
|
||||
def affine_transform(self, img, border):
|
||||
"""Center."""
|
||||
"""
|
||||
Applies a sequence of affine transformations centered around the image center.
|
||||
|
||||
Args:
|
||||
img (ndarray): Input image.
|
||||
border (tuple): Border dimensions.
|
||||
|
||||
Returns:
|
||||
img (ndarray): Transformed image.
|
||||
M (ndarray): Transformation matrix.
|
||||
s (float): Scale factor.
|
||||
"""
|
||||
|
||||
# Center
|
||||
C = np.eye(3, dtype=np.float32)
|
||||
|
||||
C[0, 2] = -img.shape[1] / 2 # x translation (pixels)
|
||||
|
|
@ -462,8 +520,22 @@ class RandomPerspective:
|
|||
labels['resized_shape'] = img.shape[:2]
|
||||
return labels
|
||||
|
||||
def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n)
|
||||
# Compute box candidates: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
|
||||
def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16):
|
||||
"""
|
||||
Compute box candidates based on a set of thresholds. This method compares the characteristics of the boxes
|
||||
before and after augmentation to decide whether a box is a candidate for further processing.
|
||||
|
||||
Args:
|
||||
box1 (numpy.ndarray): The 4,n bounding box before augmentation, represented as [x1, y1, x2, y2].
|
||||
box2 (numpy.ndarray): The 4,n bounding box after augmentation, represented as [x1, y1, x2, y2].
|
||||
wh_thr (float, optional): The width and height threshold in pixels. Default is 2.
|
||||
ar_thr (float, optional): The aspect ratio threshold. Default is 100.
|
||||
area_thr (float, optional): The area ratio threshold. Default is 0.1.
|
||||
eps (float, optional): A small epsilon value to prevent division by zero. Default is 1e-16.
|
||||
|
||||
Returns:
|
||||
(numpy.ndarray): A boolean array indicating which boxes are candidates based on the given thresholds.
|
||||
"""
|
||||
w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
|
||||
w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
|
||||
ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio
|
||||
|
|
@ -471,14 +543,32 @@ class RandomPerspective:
|
|||
|
||||
|
||||
class RandomHSV:
|
||||
"""
|
||||
This class is responsible for performing random adjustments to the Hue, Saturation, and Value (HSV) channels of an
|
||||
image.
|
||||
|
||||
The adjustments are random but within limits set by hgain, sgain, and vgain.
|
||||
"""
|
||||
|
||||
def __init__(self, hgain=0.5, sgain=0.5, vgain=0.5) -> None:
|
||||
"""
|
||||
Initialize RandomHSV class with gains for each HSV channel.
|
||||
|
||||
Args:
|
||||
hgain (float, optional): Maximum variation for hue. Default is 0.5.
|
||||
sgain (float, optional): Maximum variation for saturation. Default is 0.5.
|
||||
vgain (float, optional): Maximum variation for value. Default is 0.5.
|
||||
"""
|
||||
self.hgain = hgain
|
||||
self.sgain = sgain
|
||||
self.vgain = vgain
|
||||
|
||||
def __call__(self, labels):
|
||||
"""Applies image HSV augmentation"""
|
||||
"""
|
||||
Applies random HSV augmentation to an image within the predefined limits.
|
||||
|
||||
The modified image replaces the original image in the input 'labels' dict.
|
||||
"""
|
||||
img = labels['img']
|
||||
if self.hgain or self.sgain or self.vgain:
|
||||
r = np.random.uniform(-1, 1, 3) * [self.hgain, self.sgain, self.vgain] + 1 # random gains
|
||||
|
|
@ -496,9 +586,22 @@ class RandomHSV:
|
|||
|
||||
|
||||
class RandomFlip:
|
||||
"""Applies random horizontal or vertical flip to an image with a given probability."""
|
||||
"""
|
||||
Applies a random horizontal or vertical flip to an image with a given probability.
|
||||
|
||||
Also updates any instances (bounding boxes, keypoints, etc.) accordingly.
|
||||
"""
|
||||
|
||||
def __init__(self, p=0.5, direction='horizontal', flip_idx=None) -> None:
|
||||
"""
|
||||
Initializes the RandomFlip class with probability and direction.
|
||||
|
||||
Args:
|
||||
p (float, optional): The probability of applying the flip. Must be between 0 and 1. Default is 0.5.
|
||||
direction (str, optional): The direction to apply the flip. Must be 'horizontal' or 'vertical'.
|
||||
Default is 'horizontal'.
|
||||
flip_idx (array-like, optional): Index mapping for flipping keypoints, if any.
|
||||
"""
|
||||
assert direction in ['horizontal', 'vertical'], f'Support direction `horizontal` or `vertical`, got {direction}'
|
||||
assert 0 <= p <= 1.0
|
||||
|
||||
|
|
@ -507,7 +610,16 @@ class RandomFlip:
|
|||
self.flip_idx = flip_idx
|
||||
|
||||
def __call__(self, labels):
|
||||
"""Resize image and padding for detection, instance segmentation, pose."""
|
||||
"""
|
||||
Applies random flip to an image and updates any instances like bounding boxes or keypoints accordingly.
|
||||
|
||||
Args:
|
||||
labels (dict): A dictionary containing the keys 'img' and 'instances'. 'img' is the image to be flipped.
|
||||
'instances' is an object containing bounding boxes and optionally keypoints.
|
||||
|
||||
Returns:
|
||||
(dict): The same dict with the flipped image and updated instances under the 'img' and 'instances' keys.
|
||||
"""
|
||||
img = labels['img']
|
||||
instances = labels.pop('instances')
|
||||
instances.convert_bbox(format='xywh')
|
||||
|
|
@ -599,12 +711,38 @@ class LetterBox:
|
|||
|
||||
|
||||
class CopyPaste:
|
||||
"""
|
||||
Implements the Copy-Paste augmentation as described in the paper https://arxiv.org/abs/2012.07177. This class is
|
||||
responsible for applying the Copy-Paste augmentation on images and their corresponding instances.
|
||||
"""
|
||||
|
||||
def __init__(self, p=0.5) -> None:
|
||||
"""
|
||||
Initializes the CopyPaste class with a given probability.
|
||||
|
||||
Args:
|
||||
p (float, optional): The probability of applying the Copy-Paste augmentation. Must be between 0 and 1.
|
||||
Default is 0.5.
|
||||
"""
|
||||
self.p = p
|
||||
|
||||
def __call__(self, labels):
|
||||
"""Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)."""
|
||||
"""
|
||||
Applies the Copy-Paste augmentation to the given image and instances.
|
||||
|
||||
Args:
|
||||
labels (dict): A dictionary containing:
|
||||
- 'img': The image to augment.
|
||||
- 'cls': Class labels associated with the instances.
|
||||
- 'instances': Object containing bounding boxes, and optionally, keypoints and segments.
|
||||
|
||||
Returns:
|
||||
(dict): Dict with augmented image and updated instances under the 'img', 'cls', and 'instances' keys.
|
||||
|
||||
Notes:
|
||||
1. Instances are expected to have 'segments' as one of their attributes for this augmentation to work.
|
||||
2. This method modifies the input dictionary 'labels' in place.
|
||||
"""
|
||||
im = labels['img']
|
||||
cls = labels['cls']
|
||||
h, w = im.shape[:2]
|
||||
|
|
@ -639,9 +777,13 @@ class CopyPaste:
|
|||
|
||||
|
||||
class Albumentations:
|
||||
"""Albumentations transformations. Optional, uninstall package to disable.
|
||||
Applies Blur, Median Blur, convert to grayscale, Contrast Limited Adaptive Histogram Equalization,
|
||||
random change of brightness and contrast, RandomGamma and lowering of image quality by compression."""
|
||||
"""
|
||||
Albumentations transformations.
|
||||
|
||||
Optional, uninstall package to disable. Applies Blur, Median Blur, convert to grayscale, Contrast Limited Adaptive
|
||||
Histogram Equalization, random change of brightness and contrast, RandomGamma and lowering of image quality by
|
||||
compression.
|
||||
"""
|
||||
|
||||
def __init__(self, p=1.0):
|
||||
"""Initialize the transform object for YOLO bbox formatted params."""
|
||||
|
|
@ -690,6 +832,19 @@ class Albumentations:
|
|||
|
||||
# TODO: technically this is not an augmentation, maybe we should put this to another files
|
||||
class Format:
|
||||
"""
|
||||
Formats image annotations for object detection, instance segmentation, and pose estimation tasks. The class
|
||||
standardizes the image and instance annotations to be used by the `collate_fn` in PyTorch DataLoader.
|
||||
|
||||
Attributes:
|
||||
bbox_format (str): Format for bounding boxes. Default is 'xywh'.
|
||||
normalize (bool): Whether to normalize bounding boxes. Default is True.
|
||||
return_mask (bool): Return instance masks for segmentation. Default is False.
|
||||
return_keypoint (bool): Return keypoints for pose estimation. Default is False.
|
||||
mask_ratio (int): Downsample ratio for masks. Default is 4.
|
||||
mask_overlap (bool): Whether to overlap masks. Default is True.
|
||||
batch_idx (bool): Keep batch indexes. Default is True.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
bbox_format='xywh',
|
||||
|
|
@ -699,6 +854,7 @@ class Format:
|
|||
mask_ratio=4,
|
||||
mask_overlap=True,
|
||||
batch_idx=True):
|
||||
"""Initializes the Format class with given parameters."""
|
||||
self.bbox_format = bbox_format
|
||||
self.normalize = normalize
|
||||
self.return_mask = return_mask # set False when training detection only
|
||||
|
|
@ -746,7 +902,7 @@ class Format:
|
|||
return img
|
||||
|
||||
def _format_segments(self, instances, cls, w, h):
|
||||
"""convert polygon points to bitmap."""
|
||||
"""Convert polygon points to bitmap."""
|
||||
segments = instances.segments
|
||||
if self.mask_overlap:
|
||||
masks, sorted_idx = polygons2masks_overlap((h, w), segments, downsample_ratio=self.mask_ratio)
|
||||
|
|
@ -851,35 +1007,75 @@ def classify_albumentations(
|
|||
|
||||
|
||||
class ClassifyLetterBox:
|
||||
"""YOLOv8 LetterBox class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])"""
|
||||
"""
|
||||
YOLOv8 LetterBox class for image preprocessing, designed to be part of a transformation pipeline, e.g.,
|
||||
T.Compose([LetterBox(size), ToTensor()]).
|
||||
|
||||
Attributes:
|
||||
h (int): Target height of the image.
|
||||
w (int): Target width of the image.
|
||||
auto (bool): If True, automatically solves for short side using stride.
|
||||
stride (int): The stride value, used when 'auto' is True.
|
||||
"""
|
||||
|
||||
def __init__(self, size=(640, 640), auto=False, stride=32):
|
||||
"""Resizes image and crops it to center with max dimensions 'h' and 'w'."""
|
||||
"""
|
||||
Initializes the ClassifyLetterBox class with a target size, auto-flag, and stride.
|
||||
|
||||
Args:
|
||||
size (Union[int, Tuple[int, int]]): The target dimensions (height, width) for the letterbox.
|
||||
auto (bool): If True, automatically calculates the short side based on stride.
|
||||
stride (int): The stride value, used when 'auto' is True.
|
||||
"""
|
||||
super().__init__()
|
||||
self.h, self.w = (size, size) if isinstance(size, int) else size
|
||||
self.auto = auto # pass max size integer, automatically solve for short side using stride
|
||||
self.stride = stride # used with auto
|
||||
|
||||
def __call__(self, im): # im = np.array HWC
|
||||
def __call__(self, im):
|
||||
"""
|
||||
Resizes the image and pads it with a letterbox method.
|
||||
|
||||
Args:
|
||||
im (numpy.ndarray): The input image as a numpy array of shape HWC.
|
||||
|
||||
Returns:
|
||||
(numpy.ndarray): The letterboxed and resized image as a numpy array.
|
||||
"""
|
||||
imh, imw = im.shape[:2]
|
||||
r = min(self.h / imh, self.w / imw) # ratio of new/old
|
||||
h, w = round(imh * r), round(imw * r) # resized image
|
||||
r = min(self.h / imh, self.w / imw) # ratio of new/old dimensions
|
||||
h, w = round(imh * r), round(imw * r) # resized image dimensions
|
||||
|
||||
# Calculate padding dimensions
|
||||
hs, ws = (math.ceil(x / self.stride) * self.stride for x in (h, w)) if self.auto else (self.h, self.w)
|
||||
top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1)
|
||||
|
||||
# Create padded image
|
||||
im_out = np.full((hs, ws, 3), 114, dtype=im.dtype)
|
||||
im_out[top:top + h, left:left + w] = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
|
||||
return im_out
|
||||
|
||||
|
||||
class CenterCrop:
|
||||
"""YOLOv8 CenterCrop class for image preprocessing, i.e. T.Compose([CenterCrop(size), ToTensor()])"""
|
||||
"""YOLOv8 CenterCrop class for image preprocessing, designed to be part of a transformation pipeline, e.g.,
|
||||
T.Compose([CenterCrop(size), ToTensor()]).
|
||||
"""
|
||||
|
||||
def __init__(self, size=640):
|
||||
"""Converts an image from numpy array to PyTorch tensor."""
|
||||
super().__init__()
|
||||
self.h, self.w = (size, size) if isinstance(size, int) else size
|
||||
|
||||
def __call__(self, im): # im = np.array HWC
|
||||
def __call__(self, im):
|
||||
"""
|
||||
Resizes and crops the center of the image using a letterbox method.
|
||||
|
||||
Args:
|
||||
im (numpy.ndarray): The input image as a numpy array of shape HWC.
|
||||
|
||||
Returns:
|
||||
(numpy.ndarray): The center-cropped and resized image as a numpy array.
|
||||
"""
|
||||
imh, imw = im.shape[:2]
|
||||
m = min(imh, imw) # min dimension
|
||||
top, left = (imh - m) // 2, (imw - m) // 2
|
||||
|
|
@ -887,14 +1083,23 @@ class CenterCrop:
|
|||
|
||||
|
||||
class ToTensor:
|
||||
"""YOLOv8 ToTensor class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])."""
|
||||
"""YOLOv8 ToTensor class for image preprocessing, i.e., T.Compose([LetterBox(size), ToTensor()])."""
|
||||
|
||||
def __init__(self, half=False):
|
||||
"""Initialize YOLOv8 ToTensor object with optional half-precision support."""
|
||||
super().__init__()
|
||||
self.half = half
|
||||
|
||||
def __call__(self, im): # im = np.array HWC in BGR order
|
||||
def __call__(self, im):
|
||||
"""
|
||||
Transforms an image from a numpy array to a PyTorch tensor, applying optional half-precision and normalization.
|
||||
|
||||
Args:
|
||||
im (numpy.ndarray): Input image as a numpy array with shape (H, W, C) in BGR order.
|
||||
|
||||
Returns:
|
||||
(torch.Tensor): The transformed image as a PyTorch tensor in float32 or float16, normalized to [0, 1].
|
||||
"""
|
||||
im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1]) # HWC to CHW -> BGR to RGB -> contiguous
|
||||
im = torch.from_numpy(im) # to torch
|
||||
im = im.half() if self.half else im.float() # uint8 to fp16/32
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue