ultralytics 8.2.62 add Explorer CLI model and data args (#14581)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com>
Co-authored-by: Mohammed Yasin <32206511+Y-T-G@users.noreply.github.com>
Co-authored-by: UltralyticsAssistant <web@ultralytics.com>
This commit is contained in:
Glenn Jocher 2024-07-22 03:29:44 +02:00 committed by GitHub
parent f4af1bccc6
commit 3b81b95e1c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 153 additions and 127 deletions

View file

@ -192,7 +192,7 @@ def convert_plaintext_links_to_html(content):
for paragraph in main_content.find_all(["p", "li"]): # Focus on paragraphs and list items for paragraph in main_content.find_all(["p", "li"]): # Focus on paragraphs and list items
for text_node in paragraph.find_all(string=True, recursive=False): for text_node in paragraph.find_all(string=True, recursive=False):
if text_node.parent.name not in {"a", "code"}: # Ignore links and code blocks if text_node.parent.name not in {"a", "code"}: # Ignore links and code blocks
new_text = re.sub(r"(https?://\S+)", r'<a href="\1">\1</a>', str(text_node)) # note: reject http? new_text = re.sub(r"(https?://\S+?)(?=[,.!?;:]?\s|[,.!?;:]?$)", r'<a href="\1">\1</a>', str(text_node))
if "<a" in new_text: if "<a" in new_text:
new_soup = BeautifulSoup(new_text, "html.parser") new_soup = BeautifulSoup(new_text, "html.parser")
text_node.replace_with(new_soup) text_node.replace_with(new_soup)

View file

@ -34,6 +34,7 @@ muhammadrizwanmunawar123@gmail.com: RizwanMunawar
not.committed.yet: null not.committed.yet: null
plashchynski@gmail.com: plashchynski plashchynski@gmail.com: plashchynski
priytosh.revolution@live.com: priytosh-tripathi priytosh.revolution@live.com: priytosh-tripathi
rulosanti@gmail.com: null
shuizhuyuanluo@126.com: null shuizhuyuanluo@126.com: null
stormsson@users.noreply.github.com: stormsson stormsson@users.noreply.github.com: stormsson
waxmann.sergiu@me.com: sergiuwaxmann waxmann.sergiu@me.com: sergiuwaxmann

View file

@ -1,6 +1,6 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license # Ultralytics YOLO 🚀, AGPL-3.0 license
__version__ = "8.2.61" __version__ = "8.2.62"
import os import os

View file

@ -79,7 +79,7 @@ CLI_HELP_MSG = f"""
yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128 yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128
5. Explore your datasets using semantic search and SQL with a simple GUI powered by Ultralytics Explorer API 5. Explore your datasets using semantic search and SQL with a simple GUI powered by Ultralytics Explorer API
yolo explorer yolo explorer data=data.yaml model=yolov8n.pt
6. Streamlit real-time object detection on your webcam with Ultralytics YOLOv8 6. Streamlit real-time object detection on your webcam with Ultralytics YOLOv8
yolo streamlit-predict yolo streamlit-predict
@ -233,7 +233,7 @@ def get_cfg(cfg: Union[str, Path, Dict, SimpleNamespace] = DEFAULT_CFG_DICT, ove
(SimpleNamespace): Namespace containing the merged configuration arguments. (SimpleNamespace): Namespace containing the merged configuration arguments.
Examples: Examples:
>>> from ultralytics import get_cfg >>> from ultralytics.cfg import get_cfg
>>> config = get_cfg() # Load default configuration >>> config = get_cfg() # Load default configuration
>>> config = get_cfg('path/to/config.yaml', overrides={'epochs': 50, 'batch_size': 16}) >>> config = get_cfg('path/to/config.yaml', overrides={'epochs': 50, 'batch_size': 16})
@ -546,16 +546,19 @@ def handle_yolo_settings(args: List[str]) -> None:
LOGGER.warning(f"WARNING ⚠️ settings error: '{e}'. Please see {url} for help.") LOGGER.warning(f"WARNING ⚠️ settings error: '{e}'. Please see {url} for help.")
def handle_explorer(): def handle_explorer(args: List[str]):
""" """
Open the Ultralytics Explorer GUI for dataset exploration and analysis. This function launches a graphical user interface that provides tools for interacting with and analyzing datasets
using the Ultralytics Explorer API. It checks for the required 'streamlit' package and informs the user that the
Explorer dashboard is loading.
This function launches a graphical user interface that provides tools for interacting with and analyzing Args:
datasets using the Ultralytics Explorer API. It checks for the required 'streamlit' package and informs args (List[str]): A list of optional command line arguments.
the user that the Explorer dashboard is loading.
Examples: Examples:
>>> handle_explorer() ```bash
yolo explorer data=data.yaml model=yolov8n.pt
```
Notes: Notes:
- Requires 'streamlit' package version 1.29.0 or higher. - Requires 'streamlit' package version 1.29.0 or higher.
@ -564,7 +567,12 @@ def handle_explorer():
""" """
checks.check_requirements("streamlit>=1.29.0") checks.check_requirements("streamlit>=1.29.0")
LOGGER.info("💡 Loading Explorer dashboard...") LOGGER.info("💡 Loading Explorer dashboard...")
subprocess.run(["streamlit", "run", ROOT / "data/explorer/gui/dash.py", "--server.maxMessageSize", "2048"]) cmd = ["streamlit", "run", ROOT / "data/explorer/gui/dash.py", "--server.maxMessageSize", "2048"]
new = dict(parse_key_value_pair(a) for a in args)
check_dict_alignment(base={k: DEFAULT_CFG_DICT[k] for k in ["model", "data"]}, custom=new)
for k, v in new.items():
cmd += [k, v]
subprocess.run(cmd)
def handle_streamlit_inference(): def handle_streamlit_inference():
@ -587,7 +595,7 @@ def handle_streamlit_inference():
subprocess.run(["streamlit", "run", ROOT / "solutions/streamlit_inference.py", "--server.headless", "true"]) subprocess.run(["streamlit", "run", ROOT / "solutions/streamlit_inference.py", "--server.headless", "true"])
def parse_key_value_pair(pair): def parse_key_value_pair(pair: str = "key=value"):
""" """
Parses a key-value pair string into separate key and value components. Parses a key-value pair string into separate key and value components.
@ -650,7 +658,7 @@ def smart_value(v):
Notes: Notes:
- The function uses a case-insensitive comparison for boolean and None values. - The function uses a case-insensitive comparison for boolean and None values.
- For other types, it attempts to use Python's eval() function, which can be unsafe if used with untrusted input. - For other types, it attempts to use Python's eval() function, which can be unsafe if used on untrusted input.
- If no conversion is possible, the original string is returned. - If no conversion is possible, the original string is returned.
""" """
v_lower = v.lower() v_lower = v.lower()
@ -705,7 +713,7 @@ def entrypoint(debug=""):
"hub": lambda: handle_yolo_hub(args[1:]), "hub": lambda: handle_yolo_hub(args[1:]),
"login": lambda: handle_yolo_hub(args), "login": lambda: handle_yolo_hub(args),
"copy-cfg": copy_default_cfg, "copy-cfg": copy_default_cfg,
"explorer": lambda: handle_explorer(), "explorer": lambda: handle_explorer(args[1:]),
"streamlit-predict": lambda: handle_streamlit_inference(), "streamlit-predict": lambda: handle_streamlit_inference(),
} }
full_args_dict = {**DEFAULT_CFG_DICT, **{k: None for k in TASKS}, **{k: None for k in MODES}, **special} full_args_dict = {**DEFAULT_CFG_DICT, **{k: None for k in TASKS}, **{k: None for k in MODES}, **special}

View file

@ -9,20 +9,24 @@ def auto_annotate(data, det_model="yolov8x.pt", sam_model="sam_b.pt", device="",
""" """
Automatically annotates images using a YOLO object detection model and a SAM segmentation model. Automatically annotates images using a YOLO object detection model and a SAM segmentation model.
This function processes images in a specified directory, detects objects using a YOLO model, and then generates
segmentation masks using a SAM model. The resulting annotations are saved as text files.
Args: Args:
data (str): Path to a folder containing images to be annotated. data (str): Path to a folder containing images to be annotated.
det_model (str, optional): Pre-trained YOLO detection model. Defaults to 'yolov8x.pt'. det_model (str): Path or name of the pre-trained YOLO detection model.
sam_model (str, optional): Pre-trained SAM segmentation model. Defaults to 'sam_b.pt'. sam_model (str): Path or name of the pre-trained SAM segmentation model.
device (str, optional): Device to run the models on. Defaults to an empty string (CPU or GPU, if available). device (str): Device to run the models on (e.g., 'cpu', 'cuda', '0').
output_dir (str | None | optional): Directory to save the annotated results. output_dir (str | None): Directory to save the annotated results. If None, a default directory is created.
Defaults to a 'labels' folder in the same directory as 'data'.
Example: Examples:
```python >>> from ultralytics.data.annotator import auto_annotate
from ultralytics.data.annotator import auto_annotate >>> auto_annotate(data='ultralytics/assets', det_model='yolov8n.pt', sam_model='mobile_sam.pt')
auto_annotate(data='ultralytics/assets', det_model='yolov8n.pt', sam_model='mobile_sam.pt') Notes:
``` - The function creates a new directory for output if not specified.
- Annotation results are saved as text files with the same names as the input images.
- Each line in the output text file represents a detected object with its class ID and segmentation points.
""" """
det_model = YOLO(det_model) det_model = YOLO(det_model)
sam_model = SAM(sam_model) sam_model = SAM(sam_model)

View file

@ -159,11 +159,11 @@ class Compose:
tolist: Converts the list of transforms to a standard Python list. tolist: Converts the list of transforms to a standard Python list.
Examples: Examples:
>>> transforms = [RandomFlip(), RandomRotate(30), RandomCrop((224, 224))] >>> transforms = [RandomFlip(), RandomPerspective(30)]
>>> compose = Compose(transforms) >>> compose = Compose(transforms)
>>> transformed_data = compose(data) >>> transformed_data = compose(data)
>>> compose.append(Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])) >>> compose.append(CenterCrop((224, 224)))
>>> compose.insert(0, Resize((256, 256))) >>> compose.insert(0, RandomFlip())
""" """
def __init__(self, transforms): def __init__(self, transforms):
@ -174,8 +174,8 @@ class Compose:
transforms (List[Callable]): A list of callable transform objects to be applied sequentially. transforms (List[Callable]): A list of callable transform objects to be applied sequentially.
Examples: Examples:
>>> from ultralytics.data.augment import Compose, Resize, RandomFlip >>> from ultralytics.data.augment import Compose, RandomHSV, RandomFlip
>>> transforms = [Resize(640), RandomFlip()] >>> transforms = [RandomHSV(), RandomFlip()]
>>> compose = Compose(transforms) >>> compose = Compose(transforms)
""" """
self.transforms = transforms if isinstance(transforms, list) else [transforms] self.transforms = transforms if isinstance(transforms, list) else [transforms]
@ -209,7 +209,7 @@ class Compose:
transform (BaseTransform): The transformation to be added to the composition. transform (BaseTransform): The transformation to be added to the composition.
Examples: Examples:
>>> compose = Compose([RandomFlip(), RandomRotate()]) >>> compose = Compose([RandomFlip(), RandomPerspective()])
>>> compose.append(RandomHSV()) >>> compose.append(RandomHSV())
""" """
self.transforms.append(transform) self.transforms.append(transform)
@ -232,7 +232,7 @@ class Compose:
def __getitem__(self, index: Union[list, int]) -> "Compose": def __getitem__(self, index: Union[list, int]) -> "Compose":
""" """
Retrieve a specific transform or a set of transforms using indexing. Retrieves a specific transform or a set of transforms using indexing.
Args: Args:
index (int | List[int]): Index or list of indices of the transforms to retrieve. index (int | List[int]): Index or list of indices of the transforms to retrieve.
@ -244,10 +244,10 @@ class Compose:
AssertionError: If the index is not of type int or list. AssertionError: If the index is not of type int or list.
Examples: Examples:
>>> transforms = [RandomFlip(), RandomRotate(10), RandomHSV(0.5, 0.5, 0.5)] >>> transforms = [RandomFlip(), RandomPerspective(10), RandomHSV(0.5, 0.5, 0.5)]
>>> compose = Compose(transforms) >>> compose = Compose(transforms)
>>> single_transform = compose[1] # Returns a Compose object with only RandomRotate >>> single_transform = compose[1] # Returns a Compose object with only RandomPerspective
>>> multiple_transforms = compose[0:2] # Returns a Compose object with RandomFlip and RandomRotate >>> multiple_transforms = compose[0:2] # Returns a Compose object with RandomFlip and RandomPerspective
""" """
assert isinstance(index, (int, list)), f"The indices should be either list or int type but got {type(index)}" assert isinstance(index, (int, list)), f"The indices should be either list or int type but got {type(index)}"
index = [index] if isinstance(index, int) else index index = [index] if isinstance(index, int) else index
@ -288,7 +288,7 @@ class Compose:
(List): A list containing all the transform objects in the Compose instance. (List): A list containing all the transform objects in the Compose instance.
Examples: Examples:
>>> transforms = [RandomFlip(), RandomRotate(10), RandomCrop()] >>> transforms = [RandomFlip(), RandomPerspective(10), CenterCrop()]
>>> compose = Compose(transforms) >>> compose = Compose(transforms)
>>> transform_list = compose.tolist() >>> transform_list = compose.tolist()
>>> print(len(transform_list)) >>> print(len(transform_list))
@ -304,12 +304,12 @@ class Compose:
(str): A string representation of the Compose object, including the list of transforms. (str): A string representation of the Compose object, including the list of transforms.
Examples: Examples:
>>> transforms = [RandomFlip(), RandomAffine(degrees=10, translate=0.1, scale=0.1)] >>> transforms = [RandomFlip(), RandomPerspective(degrees=10, translate=0.1, scale=0.1)]
>>> compose = Compose(transforms) >>> compose = Compose(transforms)
>>> print(compose) >>> print(compose)
Compose([ Compose([
RandomFlip(), RandomFlip(),
RandomAffine(degrees=10, translate=0.1, scale=0.1) RandomPerspective(degrees=10, translate=0.1, scale=0.1)
]) ])
""" """
return f"{self.__class__.__name__}({', '.join([f'{t}' for t in self.transforms])})" return f"{self.__class__.__name__}({', '.join([f'{t}' for t in self.transforms])})"
@ -353,12 +353,12 @@ class BaseMixTransform:
Args: Args:
dataset (Any): The dataset object containing images and labels for mixing. dataset (Any): The dataset object containing images and labels for mixing.
pre_transform (Callable | None): Optional transform to apply before mixing. If None, no pre-transform is applied. pre_transform (Callable | None): Optional transform to apply before mixing.
p (float): Probability of applying the mix transformation. Should be in the range [0.0, 1.0]. p (float): Probability of applying the mix transformation. Should be in the range [0.0, 1.0].
Examples: Examples:
>>> dataset = YOLODataset("path/to/data") >>> dataset = YOLODataset("path/to/data")
>>> pre_transform = Compose([RandomFlip(), RandomRotate()]) >>> pre_transform = Compose([RandomFlip(), RandomPerspective()])
>>> mix_transform = BaseMixTransform(dataset, pre_transform, p=0.5) >>> mix_transform = BaseMixTransform(dataset, pre_transform, p=0.5)
""" """
self.dataset = dataset self.dataset = dataset
@ -420,7 +420,7 @@ class BaseMixTransform:
(Dict): The modified labels dictionary with augmented data after applying the mix transform. (Dict): The modified labels dictionary with augmented data after applying the mix transform.
Examples: Examples:
>>> transform = MixUpTransform(dataset) >>> transform = BaseMixTransform(dataset)
>>> labels = {'image': img, 'bboxes': boxes, 'mix_labels': [{'image': img2, 'bboxes': boxes2}]} >>> labels = {'image': img, 'bboxes': boxes, 'mix_labels': [{'image': img2, 'bboxes': boxes2}]}
>>> augmented_labels = transform._mix_transform(labels) >>> augmented_labels = transform._mix_transform(labels)
""" """
@ -662,13 +662,12 @@ class Mosaic(BaseMixTransform):
updates the corresponding labels for each image in the mosaic. updates the corresponding labels for each image in the mosaic.
Args: Args:
labels (Dict): A dictionary containing image data and labels for the base image (index 0) and labels (Dict): A dictionary containing image data and labels for the base image (index 0) and three
three additional images (indices 1-3) in the 'mix_labels' key. additional images (indices 1-3) in the 'mix_labels' key.
Returns: Returns:
(Dict): A dictionary containing the mosaic image and updated labels. The 'img' key contains the (Dict): A dictionary containing the mosaic image and updated labels. The 'img' key contains the mosaic
mosaic image as a numpy array, and other keys contain the combined and adjusted labels for image as a numpy array, and other keys contain the combined and adjusted labels for all four images.
all four images.
Examples: Examples:
>>> mosaic = Mosaic(dataset, imgsz=640, p=1.0, n=4) >>> mosaic = Mosaic(dataset, imgsz=640, p=1.0, n=4)
@ -963,7 +962,7 @@ class RandomPerspective:
shear (float): Maximum shear angle in degrees. shear (float): Maximum shear angle in degrees.
perspective (float): Perspective distortion factor. perspective (float): Perspective distortion factor.
border (Tuple[int, int]): Mosaic border size as (x, y). border (Tuple[int, int]): Mosaic border size as (x, y).
pre_transform (callable): Optional transform to apply before the random perspective. pre_transform (Callable | None): Optional transform to apply before the random perspective.
Methods: Methods:
affine_transform: Applies affine transformations to the input image. affine_transform: Applies affine transformations to the input image.
@ -988,9 +987,8 @@ class RandomPerspective:
""" """
Initializes RandomPerspective object with transformation parameters. Initializes RandomPerspective object with transformation parameters.
This class implements random perspective and affine transformations on images and corresponding This class implements random perspective and affine transformations on images and corresponding bounding boxes,
bounding boxes, segments, and keypoints. Transformations include rotation, translation, scaling, segments, and keypoints. Transformations include rotation, translation, scaling, and shearing.
and shearing.
Args: Args:
degrees (float): Degree range for random rotations. degrees (float): Degree range for random rotations.
@ -999,8 +997,8 @@ class RandomPerspective:
shear (float): Shear intensity (angle in degrees). shear (float): Shear intensity (angle in degrees).
perspective (float): Perspective distortion factor. perspective (float): Perspective distortion factor.
border (Tuple[int, int]): Tuple specifying mosaic border (top/bottom, left/right). border (Tuple[int, int]): Tuple specifying mosaic border (top/bottom, left/right).
pre_transform (Callable | None): Function/transform to apply to the image before starting the pre_transform (Callable | None): Function/transform to apply to the image before starting the random
random transformation. transformation.
Examples: Examples:
>>> transform = RandomPerspective(degrees=10.0, translate=0.1, scale=0.5, shear=5.0) >>> transform = RandomPerspective(degrees=10.0, translate=0.1, scale=0.5, shear=5.0)
@ -1121,8 +1119,8 @@ class RandomPerspective:
the transformed segments. It clips the transformed segments to fit within the new bounding boxes. the transformed segments. It clips the transformed segments to fit within the new bounding boxes.
Args: Args:
segments (np.ndarray): Input segments with shape (N, M, 2), where N is the number of segments and M segments (np.ndarray): Input segments with shape (N, M, 2), where N is the number of segments and M is the
is the number of points in each segment. number of points in each segment.
M (np.ndarray): Affine transformation matrix with shape (3, 3). M (np.ndarray): Affine transformation matrix with shape (3, 3).
Returns: Returns:
@ -1203,10 +1201,10 @@ class RandomPerspective:
Returns: Returns:
(Dict): Transformed labels dictionary containing: (Dict): Transformed labels dictionary containing:
'img' (ndarray): The transformed image. - 'img' (np.ndarray): The transformed image.
'cls' (ndarray): Updated class labels. - 'cls' (np.ndarray): Updated class labels.
'instances' (Instances): Updated object instances. - 'instances' (Instances): Updated object instances.
'resized_shape' (Tuple[int, int]): New image shape after transformation. - 'resized_shape' (Tuple[int, int]): New image shape after transformation.
Examples: Examples:
>>> transform = RandomPerspective() >>> transform = RandomPerspective()
@ -1271,9 +1269,9 @@ class RandomPerspective:
been overly distorted or reduced by the augmentation process. been overly distorted or reduced by the augmentation process.
Args: Args:
box1 (numpy.ndarray): Original boxes before augmentation, shape (4, n) where n is the box1 (numpy.ndarray): Original boxes before augmentation, shape (4, N) where n is the
number of boxes. Format is [x1, y1, x2, y2] in absolute coordinates. number of boxes. Format is [x1, y1, x2, y2] in absolute coordinates.
box2 (numpy.ndarray): Augmented boxes after transformation, shape (4, n). Format is box2 (numpy.ndarray): Augmented boxes after transformation, shape (4, N). Format is
[x1, y1, x2, y2] in absolute coordinates. [x1, y1, x2, y2] in absolute coordinates.
wh_thr (float): Width and height threshold in pixels. Boxes smaller than this in either wh_thr (float): Width and height threshold in pixels. Boxes smaller than this in either
dimension are rejected. dimension are rejected.
@ -1411,9 +1409,8 @@ class RandomFlip:
It also updates any instances (bounding boxes, keypoints, etc.) accordingly. It also updates any instances (bounding boxes, keypoints, etc.) accordingly.
Args: Args:
p (float): The probability of applying the flip. Must be between 0 and 1. Default is 0.5. p (float): The probability of applying the flip. Must be between 0 and 1.
direction (str): The direction to apply the flip. Must be 'horizontal' or 'vertical'. direction (str): The direction to apply the flip. Must be 'horizontal' or 'vertical'.
Default is 'horizontal'.
flip_idx (List[int] | None): Index mapping for flipping keypoints, if any. flip_idx (List[int] | None): Index mapping for flipping keypoints, if any.
Raises: Raises:
@ -1538,15 +1535,15 @@ class LetterBox:
""" """
Resizes and pads an image for object detection, instance segmentation, or pose estimation tasks. Resizes and pads an image for object detection, instance segmentation, or pose estimation tasks.
This method applies letterboxing to the input image, which involves resizing the image while maintaining its aspect This method applies letterboxing to the input image, which involves resizing the image while maintaining its
ratio and adding padding to fit the new shape. It also updates any associated labels accordingly. aspect ratio and adding padding to fit the new shape. It also updates any associated labels accordingly.
Args: Args:
labels (dict | None): A dictionary containing image data and associated labels. If None, an empty dict is used. labels (Dict | None): A dictionary containing image data and associated labels, or empty dict if None.
image (numpy.ndarray | None): The input image as a numpy array. If None, the image is taken from 'labels'. image (np.ndarray | None): The input image as a numpy array. If None, the image is taken from 'labels'.
Returns: Returns:
(dict | tuple): If 'labels' is provided, returns an updated dictionary with the resized and padded image, (Dict | Tuple): If 'labels' is provided, returns an updated dictionary with the resized and padded image,
updated labels, and additional metadata. If 'labels' is empty, returns a tuple containing the resized updated labels, and additional metadata. If 'labels' is empty, returns a tuple containing the resized
and padded image, and a tuple of (ratio, (left_pad, top_pad)). and padded image, and a tuple of (ratio, (left_pad, top_pad)).
@ -1675,17 +1672,16 @@ class CopyPaste:
Applies Copy-Paste augmentation to an image and its instances. Applies Copy-Paste augmentation to an image and its instances.
Args: Args:
labels (dict): A dictionary containing: labels (Dict): A dictionary containing:
- 'img' (numpy.ndarray): The image to augment. - 'img' (np.ndarray): The image to augment.
- 'cls' (numpy.ndarray): Class labels for the instances. - 'cls' (np.ndarray): Class labels for the instances.
- 'instances' (ultralytics.engine.results.Instances): Object containing bounding boxes, segments, etc. - 'instances' (ultralytics.engine.results.Instances): Object containing bounding boxes, segments, etc.
Returns: Returns:
(dict): Dictionary with augmented image and updated instances under 'img', 'cls', and 'instances' keys. (Dict): Dictionary with augmented image and updated instances under 'img', 'cls', and 'instances' keys.
Examples: Examples:
>>> labels = {'img': np.random.rand(640, 640, 3), 'cls': np.array([0, 1, 2]), >>> labels = {'img': np.random.rand(640, 640, 3), 'cls': np.array([0, 1, 2]), 'instances': Instances(...)}
... 'instances': Instances(...)}
>>> augmenter = CopyPaste(p=0.5) >>> augmenter = CopyPaste(p=0.5)
>>> augmented_labels = augmenter(labels) >>> augmented_labels = augmenter(labels)
""" """
@ -1874,8 +1870,12 @@ class Albumentations:
Examples: Examples:
>>> transform = Albumentations(p=0.5) >>> transform = Albumentations(p=0.5)
>>> augmented = transform({"img": np.random.rand(640, 640, 3), "cls": np.array([0, 1]), >>> labels = {
... "instances": Instances(bboxes=np.array([[0, 0, 1, 1], [0.5, 0.5, 0.8, 0.8]]))}) ... "img": np.random.rand(640, 640, 3),
... "cls": np.array([0, 1]),
... "instances": Instances(bboxes=np.array([[0, 0, 1, 1], [0.5, 0.5, 0.8, 0.8]]))
... }
>>> augmented = transform(labels)
>>> assert augmented["img"].shape == (640, 640, 3) >>> assert augmented["img"].shape == (640, 640, 3)
Notes: Notes:
@ -1974,6 +1974,7 @@ class Format:
mask_ratio (int): Downsample ratio for masks. mask_ratio (int): Downsample ratio for masks.
mask_overlap (bool): Whether masks can overlap. mask_overlap (bool): Whether masks can overlap.
batch_idx (bool): Whether to keep batch indexes. batch_idx (bool): Whether to keep batch indexes.
bgr (float): The probability to return BGR images.
Examples: Examples:
>>> format = Format(bbox_format='xyxy', return_mask=True, return_keypoint=False) >>> format = Format(bbox_format='xyxy', return_mask=True, return_keypoint=False)
@ -1994,9 +1995,9 @@ class Format:
""" """
Formats image annotations for object detection, instance segmentation, and pose estimation tasks. Formats image annotations for object detection, instance segmentation, and pose estimation tasks.
This method standardizes the image and instance annotations to be used by the `collate_fn` in PyTorch DataLoader. This method standardizes the image and instance annotations to be used by the `collate_fn` in PyTorch
It processes the input labels dictionary, converting annotations to the specified format and applying DataLoader. It processes the input labels dictionary, converting annotations to the specified format and
normalization if required. applying normalization if required.
Args: Args:
labels (Dict): A dictionary containing image and annotation data with the following keys: labels (Dict): A dictionary containing image and annotation data with the following keys:
@ -2069,7 +2070,7 @@ class Format:
5. Converts the Numpy array to a PyTorch tensor. 5. Converts the Numpy array to a PyTorch tensor.
Args: Args:
img (ndarray): Input image as a Numpy array with shape (H, W, C) or (H, W). img (np.ndarray): Input image as a Numpy array with shape (H, W, C) or (H, W).
Returns: Returns:
(torch.Tensor): Formatted image as a PyTorch tensor with shape (C, H, W). (torch.Tensor): Formatted image as a PyTorch tensor with shape (C, H, W).
@ -2130,11 +2131,11 @@ class RandomLoadText:
to reflect the sampled texts and can optionally pad the text list to a fixed length. to reflect the sampled texts and can optionally pad the text list to a fixed length.
Attributes: Attributes:
prompt_format (str): Format string for text prompts. Default is '{}'. prompt_format (str): Format string for text prompts.
neg_samples (Tuple[int, int]): Range for randomly sampling negative texts. Default is (80, 80). neg_samples (Tuple[int, int]): Range for randomly sampling negative texts.
max_samples (int): Maximum number of different text samples in one image. Default is 80. max_samples (int): Maximum number of different text samples in one image.
padding (bool): Whether to pad texts to max_samples. Default is False. padding (bool): Whether to pad texts to max_samples.
padding_value (str): The text used for padding when padding is True. Default is "". padding_value (str): The text used for padding when padding is True.
Methods: Methods:
__call__: Processes the input labels and returns updated classes and texts. __call__: Processes the input labels and returns updated classes and texts.
@ -2268,15 +2269,15 @@ def v8_transforms(dataset, imgsz, hyp, stretch=False):
Args: Args:
dataset (Dataset): The dataset object containing image data and annotations. dataset (Dataset): The dataset object containing image data and annotations.
imgsz (int): The target image size for resizing. imgsz (int): The target image size for resizing.
hyp (dict): A dictionary of hyperparameters controlling various aspects of the transformations. hyp (Dict): A dictionary of hyperparameters controlling various aspects of the transformations.
stretch (bool): If True, applies stretching to the image. If False, uses LetterBox resizing. stretch (bool): If True, applies stretching to the image. If False, uses LetterBox resizing.
Returns: Returns:
(Compose): A composition of image transformations to be applied to the dataset. (Compose): A composition of image transformations to be applied to the dataset.
Examples: Examples:
>>> from ultralytics.data.dataset import Dataset >>> from ultralytics.data.dataset import YOLODataset
>>> dataset = Dataset(img_path='path/to/images', imgsz=640) >>> dataset = YOLODataset(img_path='path/to/images', imgsz=640)
>>> hyp = {'mosaic': 1.0, 'copy_paste': 0.5, 'degrees': 10.0, 'translate': 0.2, 'scale': 0.9} >>> hyp = {'mosaic': 1.0, 'copy_paste': 0.5, 'degrees': 10.0, 'translate': 0.2, 'scale': 0.9}
>>> transforms = v8_transforms(dataset, imgsz=640, hyp=hyp) >>> transforms = v8_transforms(dataset, imgsz=640, hyp=hyp)
>>> augmented_data = transforms(dataset[0]) >>> augmented_data = transforms(dataset[0])
@ -2332,15 +2333,12 @@ def classify_transforms(
center cropping, conversion to tensor, and normalization. center cropping, conversion to tensor, and normalization.
Args: Args:
size (int | tuple): The target size for the transformed image. If an int, it defines the size (int | tuple): The target size for the transformed image. If an int, it defines the shortest edge. If a
shortest edge. If a tuple, it defines (height, width). tuple, it defines (height, width).
mean (tuple): Mean values for each RGB channel used in normalization. Defaults to mean (tuple): Mean values for each RGB channel used in normalization.
DEFAULT_MEAN.
std (tuple): Standard deviation values for each RGB channel used in normalization. std (tuple): Standard deviation values for each RGB channel used in normalization.
Defaults to DEFAULT_STD. interpolation (int): Interpolation method for resizing.
interpolation (int): Interpolation method for resizing. Defaults to Image.BILINEAR. crop_fraction (float): Fraction of the image to be cropped.
crop_fraction (float): Fraction of the image to be cropped. Defaults to
DEFAULT_CROP_FRACTION.
Returns: Returns:
(torchvision.transforms.Compose): A composition of torchvision transforms. (torchvision.transforms.Compose): A composition of torchvision transforms.
@ -2482,7 +2480,7 @@ def classify_augmentations(
# NOTE: keep this class for backward compatibility # NOTE: keep this class for backward compatibility
class ClassifyLetterBox: class ClassifyLetterBox:
""" """
YOLOv8 LetterBox class for image preprocessing in classification tasks. A class for resizing and padding images for classification tasks.
This class is designed to be part of a transformation pipeline, e.g., T.Compose([LetterBox(size), ToTensor()]). This class is designed to be part of a transformation pipeline, e.g., T.Compose([LetterBox(size), ToTensor()]).
It resizes and pads images to a specified size while maintaining the original aspect ratio. It resizes and pads images to a specified size while maintaining the original aspect ratio.
@ -2512,8 +2510,8 @@ class ClassifyLetterBox:
pads images to a specified size while maintaining the original aspect ratio. pads images to a specified size while maintaining the original aspect ratio.
Args: Args:
size (Union[int, Tuple[int, int]]): Target size for the letterboxed image. If int, a square image of size (int | Tuple[int, int]): Target size for the letterboxed image. If an int, a square image of
(size, size) is created. If tuple, it should be (height, width). (size, size) is created. If a tuple, it should be (height, width).
auto (bool): If True, automatically calculates the short side based on stride. Default is False. auto (bool): If True, automatically calculates the short side based on stride. Default is False.
stride (int): The stride value, used when 'auto' is True. Default is 32. stride (int): The stride value, used when 'auto' is True. Default is 32.

View file

@ -1,5 +1,6 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license # Ultralytics YOLO 🚀, AGPL-3.0 license
import sys
import time import time
from threading import Thread from threading import Thread
@ -17,7 +18,8 @@ def _get_explorer():
"""Initializes and returns an instance of the Explorer class.""" """Initializes and returns an instance of the Explorer class."""
exp = Explorer(data=st.session_state.get("dataset"), model=st.session_state.get("model")) exp = Explorer(data=st.session_state.get("dataset"), model=st.session_state.get("model"))
thread = Thread( thread = Thread(
target=exp.create_embeddings_table, kwargs={"force": st.session_state.get("force_recreate_embeddings")} target=exp.create_embeddings_table,
kwargs={"force": st.session_state.get("force_recreate_embeddings"), "split": st.session_state.get("split")},
) )
thread.start() thread.start()
progress_bar = st.progress(0, text="Creating embeddings table...") progress_bar = st.progress(0, text="Creating embeddings table...")
@ -29,33 +31,45 @@ def _get_explorer():
progress_bar.empty() progress_bar.empty()
def init_explorer_form(): def init_explorer_form(data=None, model=None):
"""Initializes an Explorer instance and creates embeddings table with progress tracking.""" """Initializes an Explorer instance and creates embeddings table with progress tracking."""
datasets = ROOT / "cfg" / "datasets" if data is None:
ds = [d.name for d in datasets.glob("*.yaml")] datasets = ROOT / "cfg" / "datasets"
models = [ ds = [d.name for d in datasets.glob("*.yaml")]
"yolov8n.pt", else:
"yolov8s.pt", ds = [data]
"yolov8m.pt",
"yolov8l.pt", if model is None:
"yolov8x.pt", models = [
"yolov8n-seg.pt", "yolov8n.pt",
"yolov8s-seg.pt", "yolov8s.pt",
"yolov8m-seg.pt", "yolov8m.pt",
"yolov8l-seg.pt", "yolov8l.pt",
"yolov8x-seg.pt", "yolov8x.pt",
"yolov8n-pose.pt", "yolov8n-seg.pt",
"yolov8s-pose.pt", "yolov8s-seg.pt",
"yolov8m-pose.pt", "yolov8m-seg.pt",
"yolov8l-pose.pt", "yolov8l-seg.pt",
"yolov8x-pose.pt", "yolov8x-seg.pt",
] "yolov8n-pose.pt",
"yolov8s-pose.pt",
"yolov8m-pose.pt",
"yolov8l-pose.pt",
"yolov8x-pose.pt",
]
else:
models = [model]
splits = ["train", "val", "test"]
with st.form(key="explorer_init_form"): with st.form(key="explorer_init_form"):
col1, col2 = st.columns(2) col1, col2, col3 = st.columns(3)
with col1: with col1:
st.selectbox("Select dataset", ds, key="dataset", index=ds.index("coco128.yaml")) st.selectbox("Select dataset", ds, key="dataset")
with col2: with col2:
st.selectbox("Select model", models, key="model") st.selectbox("Select model", models, key="model")
with col3:
st.selectbox("Select split", splits, key="split")
st.checkbox("Force recreate embeddings", key="force_recreate_embeddings") st.checkbox("Force recreate embeddings", key="force_recreate_embeddings")
st.form_submit_button("Explore", on_click=_get_explorer) st.form_submit_button("Explore", on_click=_get_explorer)
@ -182,13 +196,13 @@ def utralytics_explorer_docs_callback():
st.link_button("Ultrlaytics Explorer API", "https://docs.ultralytics.com/datasets/explorer/") st.link_button("Ultrlaytics Explorer API", "https://docs.ultralytics.com/datasets/explorer/")
def layout(): def layout(data=None, model=None):
"""Resets explorer session variables and provides documentation with a link to API docs.""" """Resets explorer session variables and provides documentation with a link to API docs."""
st.set_page_config(layout="wide", initial_sidebar_state="collapsed") st.set_page_config(layout="wide", initial_sidebar_state="collapsed")
st.markdown("<h1 style='text-align: center;'>Ultralytics Explorer Demo</h1>", unsafe_allow_html=True) st.markdown("<h1 style='text-align: center;'>Ultralytics Explorer Demo</h1>", unsafe_allow_html=True)
if st.session_state.get("explorer") is None: if st.session_state.get("explorer") is None:
init_explorer_form() init_explorer_form(data, model)
return return
st.button(":arrow_backward: Select Dataset", on_click=reset_explorer) st.button(":arrow_backward: Select Dataset", on_click=reset_explorer)
@ -264,4 +278,5 @@ def layout():
if __name__ == "__main__": if __name__ == "__main__":
layout() kwargs = dict(zip(sys.argv[1::2], sys.argv[2::2]))
layout(**kwargs)

View file

@ -583,7 +583,7 @@ class Model(nn.Module):
**kwargs (Any): Additional keyword arguments for configuring the tracking process. **kwargs (Any): Additional keyword arguments for configuring the tracking process.
Returns: Returns:
(List[ultralytics.engine.results.Results]): A list of tracking results, each encapsulated in a Results object. (List[ultralytics.engine.results.Results]): A list of tracking results, each a Results object.
Raises: Raises:
AttributeError: If the predictor does not have registered trackers. AttributeError: If the predictor does not have registered trackers.
@ -1028,8 +1028,8 @@ class Model(nn.Module):
The default callbacks are defined in the 'callbacks.default_callbacks' dictionary, which contains predefined The default callbacks are defined in the 'callbacks.default_callbacks' dictionary, which contains predefined
functions for various events in the model's lifecycle, such as on_train_start, on_epoch_end, etc. functions for various events in the model's lifecycle, such as on_train_start, on_epoch_end, etc.
This method is useful when you want to revert to the original set of callbacks after making custom modifications, This method is useful when you want to revert to the original set of callbacks after making custom
ensuring consistent behavior across different runs or experiments. modifications, ensuring consistent behavior across different runs or experiments.
Examples: Examples:
>>> model = YOLO('yolov8n.pt') >>> model = YOLO('yolov8n.pt')
@ -1122,7 +1122,7 @@ class Model(nn.Module):
nested dictionaries. Each nested dictionary has keys 'model', 'trainer', 'validator', and nested dictionaries. Each nested dictionary has keys 'model', 'trainer', 'validator', and
'predictor', mapping to their respective class implementations. 'predictor', mapping to their respective class implementations.
Example: Examples:
>>> model = Model() >>> model = Model()
>>> task_map = model.task_map >>> task_map = model.task_map
>>> detect_class_map = task_map['detect'] >>> detect_class_map = task_map['detect']