Add docformatter to pre-commit (#5279)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Burhan <62214284+Burhan-Q@users.noreply.github.com>
This commit is contained in:
parent
c7aa83da31
commit
7517667a33
90 changed files with 1396 additions and 497 deletions
|
|
@ -20,16 +20,30 @@ from .utils import polygons2masks, polygons2masks_overlap
|
|||
|
||||
# TODO: we might need a BaseTransform to make all these augments be compatible with both classification and semantic
|
||||
class BaseTransform:
|
||||
"""
|
||||
Base class for image transformations.
|
||||
|
||||
This is a generic transformation class that can be extended for specific image processing needs.
|
||||
The class is designed to be compatible with both classification and semantic segmentation tasks.
|
||||
|
||||
Methods:
|
||||
__init__: Initializes the BaseTransform object.
|
||||
apply_image: Applies image transformation to labels.
|
||||
apply_instances: Applies transformations to object instances in labels.
|
||||
apply_semantic: Applies semantic segmentation to an image.
|
||||
__call__: Applies all label transformations to an image, instances, and semantic masks.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initializes the BaseTransform object."""
|
||||
pass
|
||||
|
||||
def apply_image(self, labels):
|
||||
"""Applies image transformation to labels."""
|
||||
"""Applies image transformations to labels."""
|
||||
pass
|
||||
|
||||
def apply_instances(self, labels):
|
||||
"""Applies transformations to input 'labels' and returns object instances."""
|
||||
"""Applies transformations to object instances in labels."""
|
||||
pass
|
||||
|
||||
def apply_semantic(self, labels):
|
||||
|
|
@ -37,13 +51,14 @@ class BaseTransform:
|
|||
pass
|
||||
|
||||
def __call__(self, labels):
|
||||
"""Applies label transformations to an image, instances and semantic masks."""
|
||||
"""Applies all label transformations to an image, instances, and semantic masks."""
|
||||
self.apply_image(labels)
|
||||
self.apply_instances(labels)
|
||||
self.apply_semantic(labels)
|
||||
|
||||
|
||||
class Compose:
|
||||
"""Class for composing multiple image transformations."""
|
||||
|
||||
def __init__(self, transforms):
|
||||
"""Initializes the Compose object with a list of transforms."""
|
||||
|
|
@ -60,18 +75,23 @@ class Compose:
|
|||
self.transforms.append(transform)
|
||||
|
||||
def tolist(self):
|
||||
"""Converts list of transforms to a standard Python list."""
|
||||
"""Converts the list of transforms to a standard Python list."""
|
||||
return self.transforms
|
||||
|
||||
def __repr__(self):
|
||||
"""Return string representation of object."""
|
||||
"""Returns a string representation of the object."""
|
||||
return f"{self.__class__.__name__}({', '.join([f'{t}' for t in self.transforms])})"
|
||||
|
||||
|
||||
class BaseMixTransform:
|
||||
"""This implementation is from mmyolo."""
|
||||
"""
|
||||
Class for base mix (MixUp/Mosaic) transformations.
|
||||
|
||||
This implementation is from mmyolo.
|
||||
"""
|
||||
|
||||
def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
|
||||
"""Initializes the BaseMixTransform object with dataset, pre_transform, and probability."""
|
||||
self.dataset = dataset
|
||||
self.pre_transform = pre_transform
|
||||
self.p = p
|
||||
|
|
@ -262,8 +282,10 @@ class Mosaic(BaseMixTransform):
|
|||
|
||||
|
||||
class MixUp(BaseMixTransform):
|
||||
"""Class for applying MixUp augmentation to the dataset."""
|
||||
|
||||
def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
|
||||
"""Initializes MixUp object with dataset, pre_transform, and probability of applying MixUp."""
|
||||
super().__init__(dataset=dataset, pre_transform=pre_transform, p=p)
|
||||
|
||||
def get_indexes(self):
|
||||
|
|
@ -271,7 +293,7 @@ class MixUp(BaseMixTransform):
|
|||
return random.randint(0, len(self.dataset) - 1)
|
||||
|
||||
def _mix_transform(self, labels):
|
||||
"""Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf."""
|
||||
"""Applies MixUp augmentation as per https://arxiv.org/pdf/1710.09412.pdf."""
|
||||
r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
|
||||
labels2 = labels['mix_labels'][0]
|
||||
labels['img'] = (labels['img'] * r + labels2['img'] * (1 - r)).astype(np.uint8)
|
||||
|
|
@ -281,6 +303,28 @@ class MixUp(BaseMixTransform):
|
|||
|
||||
|
||||
class RandomPerspective:
|
||||
"""
|
||||
Implements random perspective and affine transformations on images and corresponding bounding boxes, segments, and
|
||||
keypoints. These transformations include rotation, translation, scaling, and shearing. The class also offers the
|
||||
option to apply these transformations conditionally with a specified probability.
|
||||
|
||||
Attributes:
|
||||
degrees (float): Degree range for random rotations.
|
||||
translate (float): Fraction of total width and height for random translation.
|
||||
scale (float): Scaling factor interval, e.g., a scale factor of 0.1 allows a resize between 90%-110%.
|
||||
shear (float): Shear intensity (angle in degrees).
|
||||
perspective (float): Perspective distortion factor.
|
||||
border (tuple): Tuple specifying mosaic border.
|
||||
pre_transform (callable): A function/transform to apply to the image before starting the random transformation.
|
||||
|
||||
Methods:
|
||||
affine_transform(img, border): Applies a series of affine transformations to the image.
|
||||
apply_bboxes(bboxes, M): Transforms bounding boxes using the calculated affine matrix.
|
||||
apply_segments(segments, M): Transforms segments and generates new bounding boxes.
|
||||
apply_keypoints(keypoints, M): Transforms keypoints.
|
||||
__call__(labels): Main method to apply transformations to both images and their corresponding annotations.
|
||||
box_candidates(box1, box2): Filters out bounding boxes that don't meet certain criteria post-transformation.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
degrees=0.0,
|
||||
|
|
@ -290,17 +334,31 @@ class RandomPerspective:
|
|||
perspective=0.0,
|
||||
border=(0, 0),
|
||||
pre_transform=None):
|
||||
"""Initializes RandomPerspective object with transformation parameters."""
|
||||
|
||||
self.degrees = degrees
|
||||
self.translate = translate
|
||||
self.scale = scale
|
||||
self.shear = shear
|
||||
self.perspective = perspective
|
||||
# Mosaic border
|
||||
self.border = border
|
||||
self.border = border # mosaic border
|
||||
self.pre_transform = pre_transform
|
||||
|
||||
def affine_transform(self, img, border):
|
||||
"""Center."""
|
||||
"""
|
||||
Applies a sequence of affine transformations centered around the image center.
|
||||
|
||||
Args:
|
||||
img (ndarray): Input image.
|
||||
border (tuple): Border dimensions.
|
||||
|
||||
Returns:
|
||||
img (ndarray): Transformed image.
|
||||
M (ndarray): Transformation matrix.
|
||||
s (float): Scale factor.
|
||||
"""
|
||||
|
||||
# Center
|
||||
C = np.eye(3, dtype=np.float32)
|
||||
|
||||
C[0, 2] = -img.shape[1] / 2 # x translation (pixels)
|
||||
|
|
@ -462,8 +520,22 @@ class RandomPerspective:
|
|||
labels['resized_shape'] = img.shape[:2]
|
||||
return labels
|
||||
|
||||
def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n)
|
||||
# Compute box candidates: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
|
||||
def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16):
|
||||
"""
|
||||
Compute box candidates based on a set of thresholds. This method compares the characteristics of the boxes
|
||||
before and after augmentation to decide whether a box is a candidate for further processing.
|
||||
|
||||
Args:
|
||||
box1 (numpy.ndarray): The 4,n bounding box before augmentation, represented as [x1, y1, x2, y2].
|
||||
box2 (numpy.ndarray): The 4,n bounding box after augmentation, represented as [x1, y1, x2, y2].
|
||||
wh_thr (float, optional): The width and height threshold in pixels. Default is 2.
|
||||
ar_thr (float, optional): The aspect ratio threshold. Default is 100.
|
||||
area_thr (float, optional): The area ratio threshold. Default is 0.1.
|
||||
eps (float, optional): A small epsilon value to prevent division by zero. Default is 1e-16.
|
||||
|
||||
Returns:
|
||||
(numpy.ndarray): A boolean array indicating which boxes are candidates based on the given thresholds.
|
||||
"""
|
||||
w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
|
||||
w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
|
||||
ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio
|
||||
|
|
@ -471,14 +543,32 @@ class RandomPerspective:
|
|||
|
||||
|
||||
class RandomHSV:
|
||||
"""
|
||||
This class is responsible for performing random adjustments to the Hue, Saturation, and Value (HSV) channels of an
|
||||
image.
|
||||
|
||||
The adjustments are random but within limits set by hgain, sgain, and vgain.
|
||||
"""
|
||||
|
||||
def __init__(self, hgain=0.5, sgain=0.5, vgain=0.5) -> None:
|
||||
"""
|
||||
Initialize RandomHSV class with gains for each HSV channel.
|
||||
|
||||
Args:
|
||||
hgain (float, optional): Maximum variation for hue. Default is 0.5.
|
||||
sgain (float, optional): Maximum variation for saturation. Default is 0.5.
|
||||
vgain (float, optional): Maximum variation for value. Default is 0.5.
|
||||
"""
|
||||
self.hgain = hgain
|
||||
self.sgain = sgain
|
||||
self.vgain = vgain
|
||||
|
||||
def __call__(self, labels):
|
||||
"""Applies image HSV augmentation"""
|
||||
"""
|
||||
Applies random HSV augmentation to an image within the predefined limits.
|
||||
|
||||
The modified image replaces the original image in the input 'labels' dict.
|
||||
"""
|
||||
img = labels['img']
|
||||
if self.hgain or self.sgain or self.vgain:
|
||||
r = np.random.uniform(-1, 1, 3) * [self.hgain, self.sgain, self.vgain] + 1 # random gains
|
||||
|
|
@ -496,9 +586,22 @@ class RandomHSV:
|
|||
|
||||
|
||||
class RandomFlip:
|
||||
"""Applies random horizontal or vertical flip to an image with a given probability."""
|
||||
"""
|
||||
Applies a random horizontal or vertical flip to an image with a given probability.
|
||||
|
||||
Also updates any instances (bounding boxes, keypoints, etc.) accordingly.
|
||||
"""
|
||||
|
||||
def __init__(self, p=0.5, direction='horizontal', flip_idx=None) -> None:
|
||||
"""
|
||||
Initializes the RandomFlip class with probability and direction.
|
||||
|
||||
Args:
|
||||
p (float, optional): The probability of applying the flip. Must be between 0 and 1. Default is 0.5.
|
||||
direction (str, optional): The direction to apply the flip. Must be 'horizontal' or 'vertical'.
|
||||
Default is 'horizontal'.
|
||||
flip_idx (array-like, optional): Index mapping for flipping keypoints, if any.
|
||||
"""
|
||||
assert direction in ['horizontal', 'vertical'], f'Support direction `horizontal` or `vertical`, got {direction}'
|
||||
assert 0 <= p <= 1.0
|
||||
|
||||
|
|
@ -507,7 +610,16 @@ class RandomFlip:
|
|||
self.flip_idx = flip_idx
|
||||
|
||||
def __call__(self, labels):
|
||||
"""Resize image and padding for detection, instance segmentation, pose."""
|
||||
"""
|
||||
Applies random flip to an image and updates any instances like bounding boxes or keypoints accordingly.
|
||||
|
||||
Args:
|
||||
labels (dict): A dictionary containing the keys 'img' and 'instances'. 'img' is the image to be flipped.
|
||||
'instances' is an object containing bounding boxes and optionally keypoints.
|
||||
|
||||
Returns:
|
||||
(dict): The same dict with the flipped image and updated instances under the 'img' and 'instances' keys.
|
||||
"""
|
||||
img = labels['img']
|
||||
instances = labels.pop('instances')
|
||||
instances.convert_bbox(format='xywh')
|
||||
|
|
@ -599,12 +711,38 @@ class LetterBox:
|
|||
|
||||
|
||||
class CopyPaste:
|
||||
"""
|
||||
Implements the Copy-Paste augmentation as described in the paper https://arxiv.org/abs/2012.07177. This class is
|
||||
responsible for applying the Copy-Paste augmentation on images and their corresponding instances.
|
||||
"""
|
||||
|
||||
def __init__(self, p=0.5) -> None:
|
||||
"""
|
||||
Initializes the CopyPaste class with a given probability.
|
||||
|
||||
Args:
|
||||
p (float, optional): The probability of applying the Copy-Paste augmentation. Must be between 0 and 1.
|
||||
Default is 0.5.
|
||||
"""
|
||||
self.p = p
|
||||
|
||||
def __call__(self, labels):
|
||||
"""Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)."""
|
||||
"""
|
||||
Applies the Copy-Paste augmentation to the given image and instances.
|
||||
|
||||
Args:
|
||||
labels (dict): A dictionary containing:
|
||||
- 'img': The image to augment.
|
||||
- 'cls': Class labels associated with the instances.
|
||||
- 'instances': Object containing bounding boxes, and optionally, keypoints and segments.
|
||||
|
||||
Returns:
|
||||
(dict): Dict with augmented image and updated instances under the 'img', 'cls', and 'instances' keys.
|
||||
|
||||
Notes:
|
||||
1. Instances are expected to have 'segments' as one of their attributes for this augmentation to work.
|
||||
2. This method modifies the input dictionary 'labels' in place.
|
||||
"""
|
||||
im = labels['img']
|
||||
cls = labels['cls']
|
||||
h, w = im.shape[:2]
|
||||
|
|
@ -639,9 +777,13 @@ class CopyPaste:
|
|||
|
||||
|
||||
class Albumentations:
|
||||
"""Albumentations transformations. Optional, uninstall package to disable.
|
||||
Applies Blur, Median Blur, convert to grayscale, Contrast Limited Adaptive Histogram Equalization,
|
||||
random change of brightness and contrast, RandomGamma and lowering of image quality by compression."""
|
||||
"""
|
||||
Albumentations transformations.
|
||||
|
||||
Optional, uninstall package to disable. Applies Blur, Median Blur, convert to grayscale, Contrast Limited Adaptive
|
||||
Histogram Equalization, random change of brightness and contrast, RandomGamma and lowering of image quality by
|
||||
compression.
|
||||
"""
|
||||
|
||||
def __init__(self, p=1.0):
|
||||
"""Initialize the transform object for YOLO bbox formatted params."""
|
||||
|
|
@ -690,6 +832,19 @@ class Albumentations:
|
|||
|
||||
# TODO: technically this is not an augmentation, maybe we should put this to another files
|
||||
class Format:
|
||||
"""
|
||||
Formats image annotations for object detection, instance segmentation, and pose estimation tasks. The class
|
||||
standardizes the image and instance annotations to be used by the `collate_fn` in PyTorch DataLoader.
|
||||
|
||||
Attributes:
|
||||
bbox_format (str): Format for bounding boxes. Default is 'xywh'.
|
||||
normalize (bool): Whether to normalize bounding boxes. Default is True.
|
||||
return_mask (bool): Return instance masks for segmentation. Default is False.
|
||||
return_keypoint (bool): Return keypoints for pose estimation. Default is False.
|
||||
mask_ratio (int): Downsample ratio for masks. Default is 4.
|
||||
mask_overlap (bool): Whether to overlap masks. Default is True.
|
||||
batch_idx (bool): Keep batch indexes. Default is True.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
bbox_format='xywh',
|
||||
|
|
@ -699,6 +854,7 @@ class Format:
|
|||
mask_ratio=4,
|
||||
mask_overlap=True,
|
||||
batch_idx=True):
|
||||
"""Initializes the Format class with given parameters."""
|
||||
self.bbox_format = bbox_format
|
||||
self.normalize = normalize
|
||||
self.return_mask = return_mask # set False when training detection only
|
||||
|
|
@ -746,7 +902,7 @@ class Format:
|
|||
return img
|
||||
|
||||
def _format_segments(self, instances, cls, w, h):
|
||||
"""convert polygon points to bitmap."""
|
||||
"""Convert polygon points to bitmap."""
|
||||
segments = instances.segments
|
||||
if self.mask_overlap:
|
||||
masks, sorted_idx = polygons2masks_overlap((h, w), segments, downsample_ratio=self.mask_ratio)
|
||||
|
|
@ -851,35 +1007,75 @@ def classify_albumentations(
|
|||
|
||||
|
||||
class ClassifyLetterBox:
|
||||
"""YOLOv8 LetterBox class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])"""
|
||||
"""
|
||||
YOLOv8 LetterBox class for image preprocessing, designed to be part of a transformation pipeline, e.g.,
|
||||
T.Compose([LetterBox(size), ToTensor()]).
|
||||
|
||||
Attributes:
|
||||
h (int): Target height of the image.
|
||||
w (int): Target width of the image.
|
||||
auto (bool): If True, automatically solves for short side using stride.
|
||||
stride (int): The stride value, used when 'auto' is True.
|
||||
"""
|
||||
|
||||
def __init__(self, size=(640, 640), auto=False, stride=32):
|
||||
"""Resizes image and crops it to center with max dimensions 'h' and 'w'."""
|
||||
"""
|
||||
Initializes the ClassifyLetterBox class with a target size, auto-flag, and stride.
|
||||
|
||||
Args:
|
||||
size (Union[int, Tuple[int, int]]): The target dimensions (height, width) for the letterbox.
|
||||
auto (bool): If True, automatically calculates the short side based on stride.
|
||||
stride (int): The stride value, used when 'auto' is True.
|
||||
"""
|
||||
super().__init__()
|
||||
self.h, self.w = (size, size) if isinstance(size, int) else size
|
||||
self.auto = auto # pass max size integer, automatically solve for short side using stride
|
||||
self.stride = stride # used with auto
|
||||
|
||||
def __call__(self, im): # im = np.array HWC
|
||||
def __call__(self, im):
|
||||
"""
|
||||
Resizes the image and pads it with a letterbox method.
|
||||
|
||||
Args:
|
||||
im (numpy.ndarray): The input image as a numpy array of shape HWC.
|
||||
|
||||
Returns:
|
||||
(numpy.ndarray): The letterboxed and resized image as a numpy array.
|
||||
"""
|
||||
imh, imw = im.shape[:2]
|
||||
r = min(self.h / imh, self.w / imw) # ratio of new/old
|
||||
h, w = round(imh * r), round(imw * r) # resized image
|
||||
r = min(self.h / imh, self.w / imw) # ratio of new/old dimensions
|
||||
h, w = round(imh * r), round(imw * r) # resized image dimensions
|
||||
|
||||
# Calculate padding dimensions
|
||||
hs, ws = (math.ceil(x / self.stride) * self.stride for x in (h, w)) if self.auto else (self.h, self.w)
|
||||
top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1)
|
||||
|
||||
# Create padded image
|
||||
im_out = np.full((hs, ws, 3), 114, dtype=im.dtype)
|
||||
im_out[top:top + h, left:left + w] = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
|
||||
return im_out
|
||||
|
||||
|
||||
class CenterCrop:
|
||||
"""YOLOv8 CenterCrop class for image preprocessing, i.e. T.Compose([CenterCrop(size), ToTensor()])"""
|
||||
"""YOLOv8 CenterCrop class for image preprocessing, designed to be part of a transformation pipeline, e.g.,
|
||||
T.Compose([CenterCrop(size), ToTensor()]).
|
||||
"""
|
||||
|
||||
def __init__(self, size=640):
|
||||
"""Converts an image from numpy array to PyTorch tensor."""
|
||||
super().__init__()
|
||||
self.h, self.w = (size, size) if isinstance(size, int) else size
|
||||
|
||||
def __call__(self, im): # im = np.array HWC
|
||||
def __call__(self, im):
|
||||
"""
|
||||
Resizes and crops the center of the image using a letterbox method.
|
||||
|
||||
Args:
|
||||
im (numpy.ndarray): The input image as a numpy array of shape HWC.
|
||||
|
||||
Returns:
|
||||
(numpy.ndarray): The center-cropped and resized image as a numpy array.
|
||||
"""
|
||||
imh, imw = im.shape[:2]
|
||||
m = min(imh, imw) # min dimension
|
||||
top, left = (imh - m) // 2, (imw - m) // 2
|
||||
|
|
@ -887,14 +1083,23 @@ class CenterCrop:
|
|||
|
||||
|
||||
class ToTensor:
|
||||
"""YOLOv8 ToTensor class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])."""
|
||||
"""YOLOv8 ToTensor class for image preprocessing, i.e., T.Compose([LetterBox(size), ToTensor()])."""
|
||||
|
||||
def __init__(self, half=False):
|
||||
"""Initialize YOLOv8 ToTensor object with optional half-precision support."""
|
||||
super().__init__()
|
||||
self.half = half
|
||||
|
||||
def __call__(self, im): # im = np.array HWC in BGR order
|
||||
def __call__(self, im):
|
||||
"""
|
||||
Transforms an image from a numpy array to a PyTorch tensor, applying optional half-precision and normalization.
|
||||
|
||||
Args:
|
||||
im (numpy.ndarray): Input image as a numpy array with shape (H, W, C) in BGR order.
|
||||
|
||||
Returns:
|
||||
(torch.Tensor): The transformed image as a PyTorch tensor in float32 or float16, normalized to [0, 1].
|
||||
"""
|
||||
im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1]) # HWC to CHW -> BGR to RGB -> contiguous
|
||||
im = torch.from_numpy(im) # to torch
|
||||
im = im.half() if self.half else im.float() # uint8 to fp16/32
|
||||
|
|
|
|||
|
|
@ -62,6 +62,7 @@ class BaseDataset(Dataset):
|
|||
classes=None,
|
||||
fraction=1.0):
|
||||
super().__init__()
|
||||
"""Initialize BaseDataset with given configuration and options."""
|
||||
self.img_path = img_path
|
||||
self.imgsz = imgsz
|
||||
self.augment = augment
|
||||
|
|
@ -256,7 +257,7 @@ class BaseDataset(Dataset):
|
|||
return len(self.labels)
|
||||
|
||||
def update_labels_info(self, label):
|
||||
"""custom your label format here."""
|
||||
"""Custom your label format here."""
|
||||
return label
|
||||
|
||||
def build_transforms(self, hyp=None):
|
||||
|
|
|
|||
|
|
@ -20,7 +20,11 @@ from .utils import PIN_MEMORY
|
|||
|
||||
|
||||
class InfiniteDataLoader(dataloader.DataLoader):
|
||||
"""Dataloader that reuses workers. Uses same syntax as vanilla DataLoader."""
|
||||
"""
|
||||
Dataloader that reuses workers.
|
||||
|
||||
Uses same syntax as vanilla DataLoader.
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
"""Dataloader that infinitely recycles workers, inherits from DataLoader."""
|
||||
|
|
@ -38,7 +42,9 @@ class InfiniteDataLoader(dataloader.DataLoader):
|
|||
yield next(self.iterator)
|
||||
|
||||
def reset(self):
|
||||
"""Reset iterator.
|
||||
"""
|
||||
Reset iterator.
|
||||
|
||||
This is useful when we want to modify settings of dataset while training.
|
||||
"""
|
||||
self.iterator = self._get_iterator()
|
||||
|
|
@ -70,7 +76,7 @@ def seed_worker(worker_id): # noqa
|
|||
|
||||
|
||||
def build_yolo_dataset(cfg, img_path, batch, data, mode='train', rect=False, stride=32):
|
||||
"""Build YOLO Dataset"""
|
||||
"""Build YOLO Dataset."""
|
||||
return YOLODataset(
|
||||
img_path=img_path,
|
||||
imgsz=cfg.imgsz,
|
||||
|
|
|
|||
|
|
@ -12,7 +12,8 @@ from ultralytics.utils import TQDM
|
|||
|
||||
|
||||
def coco91_to_coco80_class():
|
||||
"""Converts 91-index COCO class IDs to 80-index COCO class IDs.
|
||||
"""
|
||||
Converts 91-index COCO class IDs to 80-index COCO class IDs.
|
||||
|
||||
Returns:
|
||||
(list): A list of 91 class IDs where the index represents the 80-index class ID and the value is the
|
||||
|
|
@ -51,7 +52,8 @@ def convert_coco(labels_dir='../coco/annotations/',
|
|||
use_segments=False,
|
||||
use_keypoints=False,
|
||||
cls91to80=True):
|
||||
"""Converts COCO dataset annotations to a format suitable for training YOLOv5 models.
|
||||
"""
|
||||
Converts COCO dataset annotations to a format suitable for training YOLOv5 models.
|
||||
|
||||
Args:
|
||||
labels_dir (str, optional): Path to directory containing COCO dataset annotation files.
|
||||
|
|
@ -203,6 +205,7 @@ def convert_dota_to_yolo_obb(dota_root_path: str):
|
|||
'helipad': 17}
|
||||
|
||||
def convert_label(image_name, image_width, image_height, orig_label_dir, save_dir):
|
||||
"""Converts a single image's DOTA annotation to YOLO OBB format and saves it to a specified directory."""
|
||||
orig_label_path = orig_label_dir / f'{image_name}.txt'
|
||||
save_path = save_dir / f'{image_name}.txt'
|
||||
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ class YOLODataset(BaseDataset):
|
|||
"""
|
||||
|
||||
def __init__(self, *args, data=None, use_segments=False, use_keypoints=False, **kwargs):
|
||||
"""Initializes the YOLODataset with optional configurations for segments and keypoints."""
|
||||
self.use_segments = use_segments
|
||||
self.use_keypoints = use_keypoints
|
||||
self.data = data
|
||||
|
|
@ -40,7 +41,9 @@ class YOLODataset(BaseDataset):
|
|||
super().__init__(*args, **kwargs)
|
||||
|
||||
def cache_labels(self, path=Path('./labels.cache')):
|
||||
"""Cache dataset labels, check images and read shapes.
|
||||
"""
|
||||
Cache dataset labels, check images and read shapes.
|
||||
|
||||
Args:
|
||||
path (Path): path where to save the cache file (default: Path('./labels.cache')).
|
||||
Returns:
|
||||
|
|
@ -157,7 +160,7 @@ class YOLODataset(BaseDataset):
|
|||
self.transforms = self.build_transforms(hyp)
|
||||
|
||||
def update_labels_info(self, label):
|
||||
"""custom your label format here."""
|
||||
"""Custom your label format here."""
|
||||
# NOTE: cls is not with bboxes now, classification and semantic segmentation need an independent cls label
|
||||
# we can make it also support classification and semantic segmentation by add or remove some dict keys there.
|
||||
bboxes = label.pop('bboxes')
|
||||
|
|
@ -254,6 +257,7 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
|
|||
return {'img': sample, 'cls': j}
|
||||
|
||||
def __len__(self) -> int:
|
||||
"""Return the total number of samples in the dataset."""
|
||||
return len(self.samples)
|
||||
|
||||
def verify_images(self):
|
||||
|
|
@ -320,6 +324,16 @@ def save_dataset_cache_file(prefix, path, x):
|
|||
|
||||
# TODO: support semantic segmentation
|
||||
class SemanticDataset(BaseDataset):
|
||||
"""
|
||||
Semantic Segmentation Dataset.
|
||||
|
||||
This class is responsible for handling datasets used for semantic segmentation tasks. It inherits functionalities
|
||||
from the BaseDataset class.
|
||||
|
||||
Note:
|
||||
This class is currently a placeholder and needs to be populated with methods and attributes for supporting
|
||||
semantic segmentation tasks.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize a SemanticDataset object."""
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ from ultralytics.utils.checks import check_requirements
|
|||
|
||||
@dataclass
|
||||
class SourceTypes:
|
||||
"""Class to represent various types of input sources for predictions."""
|
||||
webcam: bool = False
|
||||
screenshot: bool = False
|
||||
from_img: bool = False
|
||||
|
|
@ -29,7 +30,34 @@ class SourceTypes:
|
|||
|
||||
|
||||
class LoadStreams:
|
||||
"""Stream Loader, i.e. `yolo predict source='rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP, TCP streams`."""
|
||||
"""
|
||||
Stream Loader for various types of video streams.
|
||||
|
||||
Suitable for use with `yolo predict source='rtsp://example.com/media.mp4'`, supports RTSP, RTMP, HTTP, and TCP streams.
|
||||
|
||||
Attributes:
|
||||
sources (str): The source input paths or URLs for the video streams.
|
||||
imgsz (int): The image size for processing, defaults to 640.
|
||||
vid_stride (int): Video frame-rate stride, defaults to 1.
|
||||
buffer (bool): Whether to buffer input streams, defaults to False.
|
||||
running (bool): Flag to indicate if the streaming thread is running.
|
||||
mode (str): Set to 'stream' indicating real-time capture.
|
||||
imgs (list): List of image frames for each stream.
|
||||
fps (list): List of FPS for each stream.
|
||||
frames (list): List of total frames for each stream.
|
||||
threads (list): List of threads for each stream.
|
||||
shape (list): List of shapes for each stream.
|
||||
caps (list): List of cv2.VideoCapture objects for each stream.
|
||||
bs (int): Batch size for processing.
|
||||
|
||||
Methods:
|
||||
__init__: Initialize the stream loader.
|
||||
update: Read stream frames in daemon thread.
|
||||
close: Close stream loader and release resources.
|
||||
__iter__: Returns an iterator object for the class.
|
||||
__next__: Returns source paths, transformed, and original images for processing.
|
||||
__len__: Return the length of the sources object.
|
||||
"""
|
||||
|
||||
def __init__(self, sources='file.streams', imgsz=640, vid_stride=1, buffer=False):
|
||||
"""Initialize instance variables and check for consistent input stream shapes."""
|
||||
|
|
@ -149,10 +177,33 @@ class LoadStreams:
|
|||
|
||||
|
||||
class LoadScreenshots:
|
||||
"""YOLOv8 screenshot dataloader, i.e. `yolo predict source=screen`."""
|
||||
"""
|
||||
YOLOv8 screenshot dataloader.
|
||||
|
||||
This class manages the loading of screenshot images for processing with YOLOv8.
|
||||
Suitable for use with `yolo predict source=screen`.
|
||||
|
||||
Attributes:
|
||||
source (str): The source input indicating which screen to capture.
|
||||
imgsz (int): The image size for processing, defaults to 640.
|
||||
screen (int): The screen number to capture.
|
||||
left (int): The left coordinate for screen capture area.
|
||||
top (int): The top coordinate for screen capture area.
|
||||
width (int): The width of the screen capture area.
|
||||
height (int): The height of the screen capture area.
|
||||
mode (str): Set to 'stream' indicating real-time capture.
|
||||
frame (int): Counter for captured frames.
|
||||
sct (mss.mss): Screen capture object from `mss` library.
|
||||
bs (int): Batch size, set to 1.
|
||||
monitor (dict): Monitor configuration details.
|
||||
|
||||
Methods:
|
||||
__iter__: Returns an iterator object.
|
||||
__next__: Captures the next screenshot and returns it.
|
||||
"""
|
||||
|
||||
def __init__(self, source, imgsz=640):
|
||||
"""source = [screen_number left top width height] (pixels)."""
|
||||
"""Source = [screen_number left top width height] (pixels)."""
|
||||
check_requirements('mss')
|
||||
import mss # noqa
|
||||
|
||||
|
|
@ -192,7 +243,28 @@ class LoadScreenshots:
|
|||
|
||||
|
||||
class LoadImages:
|
||||
"""YOLOv8 image/video dataloader, i.e. `yolo predict source=image.jpg/vid.mp4`."""
|
||||
"""
|
||||
YOLOv8 image/video dataloader.
|
||||
|
||||
This class manages the loading and pre-processing of image and video data for YOLOv8. It supports loading from
|
||||
various formats, including single image files, video files, and lists of image and video paths.
|
||||
|
||||
Attributes:
|
||||
imgsz (int): Image size, defaults to 640.
|
||||
files (list): List of image and video file paths.
|
||||
nf (int): Total number of files (images and videos).
|
||||
video_flag (list): Flags indicating whether a file is a video (True) or an image (False).
|
||||
mode (str): Current mode, 'image' or 'video'.
|
||||
vid_stride (int): Stride for video frame-rate, defaults to 1.
|
||||
bs (int): Batch size, set to 1 for this class.
|
||||
cap (cv2.VideoCapture): Video capture object for OpenCV.
|
||||
frame (int): Frame counter for video.
|
||||
frames (int): Total number of frames in the video.
|
||||
count (int): Counter for iteration, initialized at 0 during `__iter__()`.
|
||||
|
||||
Methods:
|
||||
_new_video(path): Create a new cv2.VideoCapture object for a given video path.
|
||||
"""
|
||||
|
||||
def __init__(self, path, imgsz=640, vid_stride=1):
|
||||
"""Initialize the Dataloader and raise FileNotFoundError if file not found."""
|
||||
|
|
@ -285,6 +357,24 @@ class LoadImages:
|
|||
|
||||
|
||||
class LoadPilAndNumpy:
|
||||
"""
|
||||
Load images from PIL and Numpy arrays for batch processing.
|
||||
|
||||
This class is designed to manage loading and pre-processing of image data from both PIL and Numpy formats.
|
||||
It performs basic validation and format conversion to ensure that the images are in the required format for
|
||||
downstream processing.
|
||||
|
||||
Attributes:
|
||||
paths (list): List of image paths or autogenerated filenames.
|
||||
im0 (list): List of images stored as Numpy arrays.
|
||||
imgsz (int): Image size, defaults to 640.
|
||||
mode (str): Type of data being processed, defaults to 'image'.
|
||||
bs (int): Batch size, equivalent to the length of `im0`.
|
||||
count (int): Counter for iteration, initialized at 0 during `__iter__()`.
|
||||
|
||||
Methods:
|
||||
_single_check(im): Validate and format a single image to a Numpy array.
|
||||
"""
|
||||
|
||||
def __init__(self, im0, imgsz=640):
|
||||
"""Initialize PIL and Numpy Dataloader."""
|
||||
|
|
@ -326,8 +416,24 @@ class LoadPilAndNumpy:
|
|||
|
||||
|
||||
class LoadTensor:
|
||||
"""
|
||||
Load images from torch.Tensor data.
|
||||
|
||||
This class manages the loading and pre-processing of image data from PyTorch tensors for further processing.
|
||||
|
||||
Attributes:
|
||||
im0 (torch.Tensor): The input tensor containing the image(s).
|
||||
bs (int): Batch size, inferred from the shape of `im0`.
|
||||
mode (str): Current mode, set to 'image'.
|
||||
paths (list): List of image paths or filenames.
|
||||
count (int): Counter for iteration, initialized at 0 during `__iter__()`.
|
||||
|
||||
Methods:
|
||||
_single_check(im, stride): Validate and possibly modify the input tensor.
|
||||
"""
|
||||
|
||||
def __init__(self, im0) -> None:
|
||||
"""Initialize Tensor Dataloader."""
|
||||
self.im0 = self._single_check(im0)
|
||||
self.bs = self.im0.shape[0]
|
||||
self.mode = 'image'
|
||||
|
|
@ -370,9 +476,7 @@ class LoadTensor:
|
|||
|
||||
|
||||
def autocast_list(source):
|
||||
"""
|
||||
Merges a list of source of different types into a list of numpy arrays or PIL images
|
||||
"""
|
||||
"""Merges a list of source of different types into a list of numpy arrays or PIL images."""
|
||||
files = []
|
||||
for im in source:
|
||||
if isinstance(im, (str, Path)): # filename or uri
|
||||
|
|
|
|||
|
|
@ -547,9 +547,9 @@ class HUBDatasetStats:
|
|||
|
||||
def compress_one_image(f, f_new=None, max_dim=1920, quality=50):
|
||||
"""
|
||||
Compresses a single image file to reduced size while preserving its aspect ratio and quality using either the
|
||||
Python Imaging Library (PIL) or OpenCV library. If the input image is smaller than the maximum dimension, it will
|
||||
not be resized.
|
||||
Compresses a single image file to reduced size while preserving its aspect ratio and quality using either the Python
|
||||
Imaging Library (PIL) or OpenCV library. If the input image is smaller than the maximum dimension, it will not be
|
||||
resized.
|
||||
|
||||
Args:
|
||||
f (str): The path to the input image file.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue