Add docformatter to pre-commit (#5279)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Burhan <62214284+Burhan-Q@users.noreply.github.com>
2023-10-09 02:25:22 +02:00 · 2023-10-09 02:25:22 +02:00 · 7517667a33
commit 7517667a33
parent c7aa83da31
90 changed files with 1396 additions and 497 deletions
--- a/ultralytics/data/augment.py
+++ b/ultralytics/data/augment.py
@ -20,16 +20,30 @@ from .utils import polygons2masks, polygons2masks_overlap

 # TODO: we might need a BaseTransform to make all these augments be compatible with both classification and semantic
 class BaseTransform:
+    """
+    Base class for image transformations.
+
+    This is a generic transformation class that can be extended for specific image processing needs.
+    The class is designed to be compatible with both classification and semantic segmentation tasks.
+
+    Methods:
+        __init__: Initializes the BaseTransform object.
+        apply_image: Applies image transformation to labels.
+        apply_instances: Applies transformations to object instances in labels.
+        apply_semantic: Applies semantic segmentation to an image.
+        __call__: Applies all label transformations to an image, instances, and semantic masks.
+    """

    def __init__(self) -> None:
+        """Initializes the BaseTransform object."""
        pass

    def apply_image(self, labels):
-        """Applies image transformation to labels."""
+        """Applies image transformations to labels."""
        pass

    def apply_instances(self, labels):
-        """Applies transformations to input 'labels' and returns object instances."""
+        """Applies transformations to object instances in labels."""
        pass

    def apply_semantic(self, labels):
@ -37,13 +51,14 @@ class BaseTransform:
        pass

    def __call__(self, labels):
-        """Applies label transformations to an image, instances and semantic masks."""
+        """Applies all label transformations to an image, instances, and semantic masks."""
        self.apply_image(labels)
        self.apply_instances(labels)
        self.apply_semantic(labels)


 class Compose:
+    """Class for composing multiple image transformations."""

    def __init__(self, transforms):
        """Initializes the Compose object with a list of transforms."""
@ -60,18 +75,23 @@ class Compose:
        self.transforms.append(transform)

    def tolist(self):
-        """Converts list of transforms to a standard Python list."""
+        """Converts the list of transforms to a standard Python list."""
        return self.transforms

    def __repr__(self):
-        """Return string representation of object."""
+        """Returns a string representation of the object."""
        return f"{self.__class__.__name__}({', '.join([f'{t}' for t in self.transforms])})"


 class BaseMixTransform:
-    """This implementation is from mmyolo."""
+    """
+    Class for base mix (MixUp/Mosaic) transformations.
+
+    This implementation is from mmyolo.
+    """

    def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
+        """Initializes the BaseMixTransform object with dataset, pre_transform, and probability."""
        self.dataset = dataset
        self.pre_transform = pre_transform
        self.p = p
@ -262,8 +282,10 @@ class Mosaic(BaseMixTransform):


 class MixUp(BaseMixTransform):
+    """Class for applying MixUp augmentation to the dataset."""

    def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
+        """Initializes MixUp object with dataset, pre_transform, and probability of applying MixUp."""
        super().__init__(dataset=dataset, pre_transform=pre_transform, p=p)

    def get_indexes(self):
@ -271,7 +293,7 @@ class MixUp(BaseMixTransform):
        return random.randint(0, len(self.dataset) - 1)

    def _mix_transform(self, labels):
-        """Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf."""
+        """Applies MixUp augmentation as per https://arxiv.org/pdf/1710.09412.pdf."""
        r = np.random.beta(32.0, 32.0)  # mixup ratio, alpha=beta=32.0
        labels2 = labels['mix_labels'][0]
        labels['img'] = (labels['img'] * r + labels2['img'] * (1 - r)).astype(np.uint8)
@ -281,6 +303,28 @@ class MixUp(BaseMixTransform):


 class RandomPerspective:
+    """
+    Implements random perspective and affine transformations on images and corresponding bounding boxes, segments, and
+    keypoints. These transformations include rotation, translation, scaling, and shearing. The class also offers the
+    option to apply these transformations conditionally with a specified probability.
+
+    Attributes:
+        degrees (float): Degree range for random rotations.
+        translate (float): Fraction of total width and height for random translation.
+        scale (float): Scaling factor interval, e.g., a scale factor of 0.1 allows a resize between 90%-110%.
+        shear (float): Shear intensity (angle in degrees).
+        perspective (float): Perspective distortion factor.
+        border (tuple): Tuple specifying mosaic border.
+        pre_transform (callable): A function/transform to apply to the image before starting the random transformation.
+
+    Methods:
+        affine_transform(img, border): Applies a series of affine transformations to the image.
+        apply_bboxes(bboxes, M): Transforms bounding boxes using the calculated affine matrix.
+        apply_segments(segments, M): Transforms segments and generates new bounding boxes.
+        apply_keypoints(keypoints, M): Transforms keypoints.
+        __call__(labels): Main method to apply transformations to both images and their corresponding annotations.
+        box_candidates(box1, box2): Filters out bounding boxes that don't meet certain criteria post-transformation.
+    """

    def __init__(self,
                 degrees=0.0,
@ -290,17 +334,31 @@ class RandomPerspective:
                 perspective=0.0,
                 border=(0, 0),
                 pre_transform=None):
+        """Initializes RandomPerspective object with transformation parameters."""
+
        self.degrees = degrees
        self.translate = translate
        self.scale = scale
        self.shear = shear
        self.perspective = perspective
-        # Mosaic border
-        self.border = border
+        self.border = border  # mosaic border
        self.pre_transform = pre_transform

    def affine_transform(self, img, border):
-        """Center."""
+        """
+        Applies a sequence of affine transformations centered around the image center.
+
+        Args:
+            img (ndarray): Input image.
+            border (tuple): Border dimensions.
+
+        Returns:
+            img (ndarray): Transformed image.
+            M (ndarray): Transformation matrix.
+            s (float): Scale factor.
+        """
+
+        # Center
        C = np.eye(3, dtype=np.float32)

        C[0, 2] = -img.shape[1] / 2  # x translation (pixels)
@ -462,8 +520,22 @@ class RandomPerspective:
        labels['resized_shape'] = img.shape[:2]
        return labels

-    def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16):  # box1(4,n), box2(4,n)
-        # Compute box candidates: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
+    def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16):
+        """
+        Compute box candidates based on a set of thresholds. This method compares the characteristics of the boxes
+        before and after augmentation to decide whether a box is a candidate for further processing.
+
+        Args:
+            box1 (numpy.ndarray): The 4,n bounding box before augmentation, represented as [x1, y1, x2, y2].
+            box2 (numpy.ndarray): The 4,n bounding box after augmentation, represented as [x1, y1, x2, y2].
+            wh_thr (float, optional): The width and height threshold in pixels. Default is 2.
+            ar_thr (float, optional): The aspect ratio threshold. Default is 100.
+            area_thr (float, optional): The area ratio threshold. Default is 0.1.
+            eps (float, optional): A small epsilon value to prevent division by zero. Default is 1e-16.
+
+        Returns:
+            (numpy.ndarray): A boolean array indicating which boxes are candidates based on the given thresholds.
+        """
        w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
        w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
        ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps))  # aspect ratio
@ -471,14 +543,32 @@ class RandomPerspective:


 class RandomHSV:
+    """
+    This class is responsible for performing random adjustments to the Hue, Saturation, and Value (HSV) channels of an
+    image.
+
+    The adjustments are random but within limits set by hgain, sgain, and vgain.
+    """

    def __init__(self, hgain=0.5, sgain=0.5, vgain=0.5) -> None:
+        """
+        Initialize RandomHSV class with gains for each HSV channel.
+
+        Args:
+            hgain (float, optional): Maximum variation for hue. Default is 0.5.
+            sgain (float, optional): Maximum variation for saturation. Default is 0.5.
+            vgain (float, optional): Maximum variation for value. Default is 0.5.
+        """
        self.hgain = hgain
        self.sgain = sgain
        self.vgain = vgain

    def __call__(self, labels):
-        """Applies image HSV augmentation"""
+        """
+        Applies random HSV augmentation to an image within the predefined limits.
+
+        The modified image replaces the original image in the input 'labels' dict.
+        """
        img = labels['img']
        if self.hgain or self.sgain or self.vgain:
            r = np.random.uniform(-1, 1, 3) * [self.hgain, self.sgain, self.vgain] + 1  # random gains
@ -496,9 +586,22 @@ class RandomHSV:


 class RandomFlip:
-    """Applies random horizontal or vertical flip to an image with a given probability."""
+    """
+    Applies a random horizontal or vertical flip to an image with a given probability.
+
+    Also updates any instances (bounding boxes, keypoints, etc.) accordingly.
+    """

    def __init__(self, p=0.5, direction='horizontal', flip_idx=None) -> None:
+        """
+        Initializes the RandomFlip class with probability and direction.
+
+        Args:
+            p (float, optional): The probability of applying the flip. Must be between 0 and 1. Default is 0.5.
+            direction (str, optional): The direction to apply the flip. Must be 'horizontal' or 'vertical'.
+                Default is 'horizontal'.
+            flip_idx (array-like, optional): Index mapping for flipping keypoints, if any.
+        """
        assert direction in ['horizontal', 'vertical'], f'Support direction `horizontal` or `vertical`, got {direction}'
        assert 0 <= p <= 1.0

@ -507,7 +610,16 @@ class RandomFlip:
        self.flip_idx = flip_idx

    def __call__(self, labels):
-        """Resize image and padding for detection, instance segmentation, pose."""
+        """
+        Applies random flip to an image and updates any instances like bounding boxes or keypoints accordingly.
+
+        Args:
+            labels (dict): A dictionary containing the keys 'img' and 'instances'. 'img' is the image to be flipped.
+                           'instances' is an object containing bounding boxes and optionally keypoints.
+
+        Returns:
+            (dict): The same dict with the flipped image and updated instances under the 'img' and 'instances' keys.
+        """
        img = labels['img']
        instances = labels.pop('instances')
        instances.convert_bbox(format='xywh')
@ -599,12 +711,38 @@ class LetterBox:


 class CopyPaste:
+    """
+    Implements the Copy-Paste augmentation as described in the paper https://arxiv.org/abs/2012.07177. This class is
+    responsible for applying the Copy-Paste augmentation on images and their corresponding instances.
+    """

    def __init__(self, p=0.5) -> None:
+        """
+        Initializes the CopyPaste class with a given probability.
+
+        Args:
+            p (float, optional): The probability of applying the Copy-Paste augmentation. Must be between 0 and 1.
+                                 Default is 0.5.
+        """
        self.p = p

    def __call__(self, labels):
-        """Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)."""
+        """
+        Applies the Copy-Paste augmentation to the given image and instances.
+
+        Args:
+            labels (dict): A dictionary containing:
+                           - 'img': The image to augment.
+                           - 'cls': Class labels associated with the instances.
+                           - 'instances': Object containing bounding boxes, and optionally, keypoints and segments.
+
+        Returns:
+            (dict): Dict with augmented image and updated instances under the 'img', 'cls', and 'instances' keys.
+
+        Notes:
+            1. Instances are expected to have 'segments' as one of their attributes for this augmentation to work.
+            2. This method modifies the input dictionary 'labels' in place.
+        """
        im = labels['img']
        cls = labels['cls']
        h, w = im.shape[:2]
@ -639,9 +777,13 @@ class CopyPaste:


 class Albumentations:
-    """Albumentations transformations. Optional, uninstall package to disable.
-    Applies Blur, Median Blur, convert to grayscale, Contrast Limited Adaptive Histogram Equalization,
-    random change of brightness and contrast, RandomGamma and lowering of image quality by compression."""
+    """
+    Albumentations transformations.
+
+    Optional, uninstall package to disable. Applies Blur, Median Blur, convert to grayscale, Contrast Limited Adaptive
+    Histogram Equalization, random change of brightness and contrast, RandomGamma and lowering of image quality by
+    compression.
+    """

    def __init__(self, p=1.0):
        """Initialize the transform object for YOLO bbox formatted params."""
@ -690,6 +832,19 @@ class Albumentations:

 # TODO: technically this is not an augmentation, maybe we should put this to another files
 class Format:
+    """
+    Formats image annotations for object detection, instance segmentation, and pose estimation tasks. The class
+    standardizes the image and instance annotations to be used by the `collate_fn` in PyTorch DataLoader.
+
+    Attributes:
+        bbox_format (str): Format for bounding boxes. Default is 'xywh'.
+        normalize (bool): Whether to normalize bounding boxes. Default is True.
+        return_mask (bool): Return instance masks for segmentation. Default is False.
+        return_keypoint (bool): Return keypoints for pose estimation. Default is False.
+        mask_ratio (int): Downsample ratio for masks. Default is 4.
+        mask_overlap (bool): Whether to overlap masks. Default is True.
+        batch_idx (bool): Keep batch indexes. Default is True.
+    """

    def __init__(self,
                 bbox_format='xywh',
@ -699,6 +854,7 @@ class Format:
                 mask_ratio=4,
                 mask_overlap=True,
                 batch_idx=True):
+        """Initializes the Format class with given parameters."""
        self.bbox_format = bbox_format
        self.normalize = normalize
        self.return_mask = return_mask  # set False when training detection only
@ -746,7 +902,7 @@ class Format:
        return img

    def _format_segments(self, instances, cls, w, h):
-        """convert polygon points to bitmap."""
+        """Convert polygon points to bitmap."""
        segments = instances.segments
        if self.mask_overlap:
            masks, sorted_idx = polygons2masks_overlap((h, w), segments, downsample_ratio=self.mask_ratio)
@ -851,35 +1007,75 @@ def classify_albumentations(


 class ClassifyLetterBox:
-    """YOLOv8 LetterBox class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])"""
+    """
+    YOLOv8 LetterBox class for image preprocessing, designed to be part of a transformation pipeline, e.g.,
+    T.Compose([LetterBox(size), ToTensor()]).
+
+    Attributes:
+        h (int): Target height of the image.
+        w (int): Target width of the image.
+        auto (bool): If True, automatically solves for short side using stride.
+        stride (int): The stride value, used when 'auto' is True.
+    """

    def __init__(self, size=(640, 640), auto=False, stride=32):
-        """Resizes image and crops it to center with max dimensions 'h' and 'w'."""
+        """
+        Initializes the ClassifyLetterBox class with a target size, auto-flag, and stride.
+
+        Args:
+            size (Union[int, Tuple[int, int]]): The target dimensions (height, width) for the letterbox.
+            auto (bool): If True, automatically calculates the short side based on stride.
+            stride (int): The stride value, used when 'auto' is True.
+        """
        super().__init__()
        self.h, self.w = (size, size) if isinstance(size, int) else size
        self.auto = auto  # pass max size integer, automatically solve for short side using stride
        self.stride = stride  # used with auto

-    def __call__(self, im):  # im = np.array HWC
+    def __call__(self, im):
+        """
+        Resizes the image and pads it with a letterbox method.
+
+        Args:
+            im (numpy.ndarray): The input image as a numpy array of shape HWC.
+
+        Returns:
+            (numpy.ndarray): The letterboxed and resized image as a numpy array.
+        """
        imh, imw = im.shape[:2]
-        r = min(self.h / imh, self.w / imw)  # ratio of new/old
-        h, w = round(imh * r), round(imw * r)  # resized image
+        r = min(self.h / imh, self.w / imw)  # ratio of new/old dimensions
+        h, w = round(imh * r), round(imw * r)  # resized image dimensions
+
+        # Calculate padding dimensions
        hs, ws = (math.ceil(x / self.stride) * self.stride for x in (h, w)) if self.auto else (self.h, self.w)
        top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1)
+
+        # Create padded image
        im_out = np.full((hs, ws, 3), 114, dtype=im.dtype)
        im_out[top:top + h, left:left + w] = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
        return im_out


 class CenterCrop:
-    """YOLOv8 CenterCrop class for image preprocessing, i.e. T.Compose([CenterCrop(size), ToTensor()])"""
+    """YOLOv8 CenterCrop class for image preprocessing, designed to be part of a transformation pipeline, e.g.,
+    T.Compose([CenterCrop(size), ToTensor()]).
+    """

    def __init__(self, size=640):
        """Converts an image from numpy array to PyTorch tensor."""
        super().__init__()
        self.h, self.w = (size, size) if isinstance(size, int) else size

-    def __call__(self, im):  # im = np.array HWC
+    def __call__(self, im):
+        """
+        Resizes and crops the center of the image using a letterbox method.
+
+        Args:
+            im (numpy.ndarray): The input image as a numpy array of shape HWC.
+
+        Returns:
+            (numpy.ndarray): The center-cropped and resized image as a numpy array.
+        """
        imh, imw = im.shape[:2]
        m = min(imh, imw)  # min dimension
        top, left = (imh - m) // 2, (imw - m) // 2
@ -887,14 +1083,23 @@ class CenterCrop:


 class ToTensor:
-    """YOLOv8 ToTensor class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])."""
+    """YOLOv8 ToTensor class for image preprocessing, i.e., T.Compose([LetterBox(size), ToTensor()])."""

    def __init__(self, half=False):
        """Initialize YOLOv8 ToTensor object with optional half-precision support."""
        super().__init__()
        self.half = half

-    def __call__(self, im):  # im = np.array HWC in BGR order
+    def __call__(self, im):
+        """
+        Transforms an image from a numpy array to a PyTorch tensor, applying optional half-precision and normalization.
+
+        Args:
+            im (numpy.ndarray): Input image as a numpy array with shape (H, W, C) in BGR order.
+
+        Returns:
+            (torch.Tensor): The transformed image as a PyTorch tensor in float32 or float16, normalized to [0, 1].
+        """
        im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1])  # HWC to CHW -> BGR to RGB -> contiguous
        im = torch.from_numpy(im)  # to torch
        im = im.half() if self.half else im.float()  # uint8 to fp16/32
--- a/ultralytics/data/base.py
+++ b/ultralytics/data/base.py
@ -62,6 +62,7 @@ class BaseDataset(Dataset):
                 classes=None,
                 fraction=1.0):
        super().__init__()
+        """Initialize BaseDataset with given configuration and options."""
        self.img_path = img_path
        self.imgsz = imgsz
        self.augment = augment
@ -256,7 +257,7 @@ class BaseDataset(Dataset):
        return len(self.labels)

    def update_labels_info(self, label):
-        """custom your label format here."""
+        """Custom your label format here."""
        return label

    def build_transforms(self, hyp=None):
--- a/ultralytics/data/build.py
+++ b/ultralytics/data/build.py
@ -20,7 +20,11 @@ from .utils import PIN_MEMORY


 class InfiniteDataLoader(dataloader.DataLoader):
-    """Dataloader that reuses workers. Uses same syntax as vanilla DataLoader."""
+    """
+    Dataloader that reuses workers.
+
+    Uses same syntax as vanilla DataLoader.
+    """

    def __init__(self, *args, **kwargs):
        """Dataloader that infinitely recycles workers, inherits from DataLoader."""
@ -38,7 +42,9 @@ class InfiniteDataLoader(dataloader.DataLoader):
            yield next(self.iterator)

    def reset(self):
-        """Reset iterator.
+        """
+        Reset iterator.
+
        This is useful when we want to modify settings of dataset while training.
        """
        self.iterator = self._get_iterator()
@ -70,7 +76,7 @@ def seed_worker(worker_id):  # noqa


 def build_yolo_dataset(cfg, img_path, batch, data, mode='train', rect=False, stride=32):
-    """Build YOLO Dataset"""
+    """Build YOLO Dataset."""
    return YOLODataset(
        img_path=img_path,
        imgsz=cfg.imgsz,
--- a/ultralytics/data/converter.py
+++ b/ultralytics/data/converter.py
@ -12,7 +12,8 @@ from ultralytics.utils import TQDM


 def coco91_to_coco80_class():
-    """Converts 91-index COCO class IDs to 80-index COCO class IDs.
+    """
+    Converts 91-index COCO class IDs to 80-index COCO class IDs.

    Returns:
        (list): A list of 91 class IDs where the index represents the 80-index class ID and the value is the
@ -51,7 +52,8 @@ def convert_coco(labels_dir='../coco/annotations/',
                 use_segments=False,
                 use_keypoints=False,
                 cls91to80=True):
-    """Converts COCO dataset annotations to a format suitable for training YOLOv5 models.
+    """
+    Converts COCO dataset annotations to a format suitable for training YOLOv5 models.

    Args:
        labels_dir (str, optional): Path to directory containing COCO dataset annotation files.
@ -203,6 +205,7 @@ def convert_dota_to_yolo_obb(dota_root_path: str):
        'helipad': 17}

    def convert_label(image_name, image_width, image_height, orig_label_dir, save_dir):
+        """Converts a single image's DOTA annotation to YOLO OBB format and saves it to a specified directory."""
        orig_label_path = orig_label_dir / f'{image_name}.txt'
        save_path = save_dir / f'{image_name}.txt'

--- a/ultralytics/data/dataset.py
+++ b/ultralytics/data/dataset.py
@ -33,6 +33,7 @@ class YOLODataset(BaseDataset):
    """

    def __init__(self, *args, data=None, use_segments=False, use_keypoints=False, **kwargs):
+        """Initializes the YOLODataset with optional configurations for segments and keypoints."""
        self.use_segments = use_segments
        self.use_keypoints = use_keypoints
        self.data = data
@ -40,7 +41,9 @@ class YOLODataset(BaseDataset):
        super().__init__(*args, **kwargs)

    def cache_labels(self, path=Path('./labels.cache')):
-        """Cache dataset labels, check images and read shapes.
+        """
+        Cache dataset labels, check images and read shapes.
+
        Args:
            path (Path): path where to save the cache file (default: Path('./labels.cache')).
        Returns:
@ -157,7 +160,7 @@ class YOLODataset(BaseDataset):
        self.transforms = self.build_transforms(hyp)

    def update_labels_info(self, label):
-        """custom your label format here."""
+        """Custom your label format here."""
        # NOTE: cls is not with bboxes now, classification and semantic segmentation need an independent cls label
        # we can make it also support classification and semantic segmentation by add or remove some dict keys there.
        bboxes = label.pop('bboxes')
@ -254,6 +257,7 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
        return {'img': sample, 'cls': j}

    def __len__(self) -> int:
+        """Return the total number of samples in the dataset."""
        return len(self.samples)

    def verify_images(self):
@ -320,6 +324,16 @@ def save_dataset_cache_file(prefix, path, x):

 # TODO: support semantic segmentation
 class SemanticDataset(BaseDataset):
+    """
+    Semantic Segmentation Dataset.
+
+    This class is responsible for handling datasets used for semantic segmentation tasks. It inherits functionalities
+    from the BaseDataset class.
+
+    Note:
+        This class is currently a placeholder and needs to be populated with methods and attributes for supporting
+        semantic segmentation tasks.
+    """

    def __init__(self):
        """Initialize a SemanticDataset object."""
--- a/ultralytics/data/loaders.py
+++ b/ultralytics/data/loaders.py
@ -22,6 +22,7 @@ from ultralytics.utils.checks import check_requirements

@dataclass
 class SourceTypes:
+    """Class to represent various types of input sources for predictions."""
    webcam: bool = False
    screenshot: bool = False
    from_img: bool = False
@ -29,7 +30,34 @@ class SourceTypes:


 class LoadStreams:
-    """Stream  Loader, i.e. `yolo predict source='rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP, TCP streams`."""
+    """
+    Stream Loader for various types of video streams.
+
+    Suitable for use with `yolo predict source='rtsp://example.com/media.mp4'`, supports RTSP, RTMP, HTTP, and TCP streams.
+
+    Attributes:
+        sources (str): The source input paths or URLs for the video streams.
+        imgsz (int): The image size for processing, defaults to 640.
+        vid_stride (int): Video frame-rate stride, defaults to 1.
+        buffer (bool): Whether to buffer input streams, defaults to False.
+        running (bool): Flag to indicate if the streaming thread is running.
+        mode (str): Set to 'stream' indicating real-time capture.
+        imgs (list): List of image frames for each stream.
+        fps (list): List of FPS for each stream.
+        frames (list): List of total frames for each stream.
+        threads (list): List of threads for each stream.
+        shape (list): List of shapes for each stream.
+        caps (list): List of cv2.VideoCapture objects for each stream.
+        bs (int): Batch size for processing.
+
+    Methods:
+        __init__: Initialize the stream loader.
+        update: Read stream frames in daemon thread.
+        close: Close stream loader and release resources.
+        __iter__: Returns an iterator object for the class.
+        __next__: Returns source paths, transformed, and original images for processing.
+        __len__: Return the length of the sources object.
+    """

    def __init__(self, sources='file.streams', imgsz=640, vid_stride=1, buffer=False):
        """Initialize instance variables and check for consistent input stream shapes."""
@ -149,10 +177,33 @@ class LoadStreams:


 class LoadScreenshots:
-    """YOLOv8 screenshot dataloader, i.e. `yolo predict source=screen`."""
+    """
+    YOLOv8 screenshot dataloader.
+
+    This class manages the loading of screenshot images for processing with YOLOv8.
+    Suitable for use with `yolo predict source=screen`.
+
+    Attributes:
+        source (str): The source input indicating which screen to capture.
+        imgsz (int): The image size for processing, defaults to 640.
+        screen (int): The screen number to capture.
+        left (int): The left coordinate for screen capture area.
+        top (int): The top coordinate for screen capture area.
+        width (int): The width of the screen capture area.
+        height (int): The height of the screen capture area.
+        mode (str): Set to 'stream' indicating real-time capture.
+        frame (int): Counter for captured frames.
+        sct (mss.mss): Screen capture object from `mss` library.
+        bs (int): Batch size, set to 1.
+        monitor (dict): Monitor configuration details.
+
+    Methods:
+        __iter__: Returns an iterator object.
+        __next__: Captures the next screenshot and returns it.
+    """

    def __init__(self, source, imgsz=640):
-        """source = [screen_number left top width height] (pixels)."""
+        """Source = [screen_number left top width height] (pixels)."""
        check_requirements('mss')
        import mss  # noqa

@ -192,7 +243,28 @@ class LoadScreenshots:


 class LoadImages:
-    """YOLOv8 image/video dataloader, i.e. `yolo predict source=image.jpg/vid.mp4`."""
+    """
+    YOLOv8 image/video dataloader.
+
+    This class manages the loading and pre-processing of image and video data for YOLOv8. It supports loading from
+    various formats, including single image files, video files, and lists of image and video paths.
+
+    Attributes:
+        imgsz (int): Image size, defaults to 640.
+        files (list): List of image and video file paths.
+        nf (int): Total number of files (images and videos).
+        video_flag (list): Flags indicating whether a file is a video (True) or an image (False).
+        mode (str): Current mode, 'image' or 'video'.
+        vid_stride (int): Stride for video frame-rate, defaults to 1.
+        bs (int): Batch size, set to 1 for this class.
+        cap (cv2.VideoCapture): Video capture object for OpenCV.
+        frame (int): Frame counter for video.
+        frames (int): Total number of frames in the video.
+        count (int): Counter for iteration, initialized at 0 during `__iter__()`.
+
+    Methods:
+        _new_video(path): Create a new cv2.VideoCapture object for a given video path.
+    """

    def __init__(self, path, imgsz=640, vid_stride=1):
        """Initialize the Dataloader and raise FileNotFoundError if file not found."""
@ -285,6 +357,24 @@ class LoadImages:


 class LoadPilAndNumpy:
+    """
+    Load images from PIL and Numpy arrays for batch processing.
+
+    This class is designed to manage loading and pre-processing of image data from both PIL and Numpy formats.
+    It performs basic validation and format conversion to ensure that the images are in the required format for
+    downstream processing.
+
+    Attributes:
+        paths (list): List of image paths or autogenerated filenames.
+        im0 (list): List of images stored as Numpy arrays.
+        imgsz (int): Image size, defaults to 640.
+        mode (str): Type of data being processed, defaults to 'image'.
+        bs (int): Batch size, equivalent to the length of `im0`.
+        count (int): Counter for iteration, initialized at 0 during `__iter__()`.
+
+    Methods:
+        _single_check(im): Validate and format a single image to a Numpy array.
+    """

    def __init__(self, im0, imgsz=640):
        """Initialize PIL and Numpy Dataloader."""
@ -326,8 +416,24 @@ class LoadPilAndNumpy:


 class LoadTensor:
+    """
+    Load images from torch.Tensor data.
+
+    This class manages the loading and pre-processing of image data from PyTorch tensors for further processing.
+
+    Attributes:
+        im0 (torch.Tensor): The input tensor containing the image(s).
+        bs (int): Batch size, inferred from the shape of `im0`.
+        mode (str): Current mode, set to 'image'.
+        paths (list): List of image paths or filenames.
+        count (int): Counter for iteration, initialized at 0 during `__iter__()`.
+
+    Methods:
+        _single_check(im, stride): Validate and possibly modify the input tensor.
+    """

    def __init__(self, im0) -> None:
+        """Initialize Tensor Dataloader."""
        self.im0 = self._single_check(im0)
        self.bs = self.im0.shape[0]
        self.mode = 'image'
@ -370,9 +476,7 @@ class LoadTensor:


 def autocast_list(source):
-    """
-    Merges a list of source of different types into a list of numpy arrays or PIL images
-    """
+    """Merges a list of source of different types into a list of numpy arrays or PIL images."""
    files = []
    for im in source:
        if isinstance(im, (str, Path)):  # filename or uri
--- a/ultralytics/data/utils.py
+++ b/ultralytics/data/utils.py
@ -547,9 +547,9 @@ class HUBDatasetStats:

 def compress_one_image(f, f_new=None, max_dim=1920, quality=50):
    """
-    Compresses a single image file to reduced size while preserving its aspect ratio and quality using either the
-    Python Imaging Library (PIL) or OpenCV library. If the input image is smaller than the maximum dimension, it will
-    not be resized.
+    Compresses a single image file to reduced size while preserving its aspect ratio and quality using either the Python
+    Imaging Library (PIL) or OpenCV library. If the input image is smaller than the maximum dimension, it will not be
+    resized.

    Args:
        f (str): The path to the input image file.