Update Results and CFG docstrings (#14139)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com>
Co-authored-by: UltralyticsAssistant <web@ultralytics.com>
This commit is contained in:
Glenn Jocher 2024-07-01 21:18:55 +02:00 committed by GitHub
parent 08bc98812c
commit 0f2bee4cc6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 299 additions and 103 deletions

View file

@ -187,10 +187,31 @@ def cfg2dict(cfg):
Convert a configuration object to a dictionary, whether it is a file path, a string, or a SimpleNamespace object.
Args:
cfg (str | Path | dict | SimpleNamespace): Configuration object to be converted to a dictionary.
cfg (str | Path | dict | SimpleNamespace): Configuration object to be converted to a dictionary. This may be a
path to a configuration file, a dictionary, or a SimpleNamespace object.
Returns:
cfg (dict): Configuration object in dictionary format.
(dict): Configuration object in dictionary format.
Example:
```python
from ultralytics.cfg import cfg2dict
from types import SimpleNamespace
# Example usage with a file path
config_dict = cfg2dict('config.yaml')
# Example usage with a SimpleNamespace
config_sn = SimpleNamespace(param1='value1', param2='value2')
config_dict = cfg2dict(config_sn)
# Example usage with a dictionary (returns the same dictionary)
config_dict = cfg2dict({'param1': 'value1', 'param2': 'value2'})
```
Notes:
- If `cfg` is a path or a string, it will be loaded as YAML and converted to a dictionary.
- If `cfg` is a SimpleNamespace object, it will be converted to a dictionary using `vars()`.
"""
if isinstance(cfg, (str, Path)):
cfg = yaml_load(cfg) # load dict
@ -201,14 +222,36 @@ def cfg2dict(cfg):
def get_cfg(cfg: Union[str, Path, Dict, SimpleNamespace] = DEFAULT_CFG_DICT, overrides: Dict = None):
"""
Load and merge configuration data from a file or dictionary.
Load and merge configuration data from a file or dictionary, with optional overrides.
Args:
cfg (str | Path | Dict | SimpleNamespace): Configuration data.
overrides (str | Dict | optional): Overrides in the form of a file name or a dictionary. Default is None.
cfg (str | Path | dict | SimpleNamespace, optional): Configuration data source. Defaults to `DEFAULT_CFG_DICT`.
overrides (dict | None, optional): Dictionary containing key-value pairs to override the base configuration.
Defaults to None.
Returns:
(SimpleNamespace): Training arguments namespace.
(SimpleNamespace): Namespace containing the merged training arguments.
Notes:
- If both `cfg` and `overrides` are provided, the values in `overrides` will take precedence.
- Special handling ensures alignment and correctness of the configuration, such as converting numeric `project`
and `name` to strings and validating the configuration keys and values.
Example:
```python
from ultralytics.cfg import get_cfg
# Load default configuration
config = get_cfg()
# Load from a custom file with overrides
config = get_cfg('path/to/config.yaml', overrides={'epochs': 50, 'batch_size': 16})
```
Configuration dictionary merged with overrides:
```python
{'epochs': 50, 'batch_size': 16, ...}
```
"""
cfg = cfg2dict(cfg)
@ -236,7 +279,7 @@ def get_cfg(cfg: Union[str, Path, Dict, SimpleNamespace] = DEFAULT_CFG_DICT, ove
def check_cfg(cfg, hard=True):
"""Check Ultralytics configuration argument types and values."""
"""Validate Ultralytics configuration argument types and values, converting them if necessary."""
for k, v in cfg.items():
if v is not None: # None values may be from optional args
if k in CFG_FLOAT_KEYS and not isinstance(v, (int, float)):
@ -272,7 +315,7 @@ def check_cfg(cfg, hard=True):
def get_save_dir(args, name=None):
"""Return save_dir as created from train/val/predict arguments."""
"""Returns the directory path for saving outputs, derived from arguments or default settings."""
if getattr(args, "save_dir", None):
save_dir = args.save_dir
@ -287,7 +330,7 @@ def get_save_dir(args, name=None):
def _handle_deprecation(custom):
"""Hardcoded function to handle deprecated config keys."""
"""Handles deprecated configuration keys by mapping them to current equivalents with deprecation warnings."""
for key in custom.copy().keys():
if key == "boxes":
@ -308,13 +351,35 @@ def _handle_deprecation(custom):
def check_dict_alignment(base: Dict, custom: Dict, e=None):
"""
This function checks for any mismatched keys between a custom configuration list and a base configuration list. If
any mismatched keys are found, the function prints out similar keys from the base list and exits the program.
Check for key alignment between custom and base configuration dictionaries, catering for deprecated keys and
providing informative error messages for mismatched keys.
Args:
custom (dict): a dictionary of custom configuration options
base (dict): a dictionary of base configuration options
e (Error, optional): An optional error that is passed by the calling function.
base (dict): The base configuration dictionary containing valid keys.
custom (dict): The custom configuration dictionary to be checked for alignment.
e (Exception, optional): An optional error instance passed by the calling function. Default is None.
Raises:
SystemExit: Terminates the program execution if mismatched keys are found.
Notes:
- The function provides suggestions for mismatched keys based on their similarity to valid keys in the
base configuration.
- Deprecated keys in the custom configuration are automatically handled and replaced with their updated
equivalents.
- A detailed error message is printed for each mismatched key, helping users to quickly identify and correct
their custom configurations.
Example:
```python
base_cfg = {'epochs': 50, 'lr0': 0.01, 'batch_size': 16}
custom_cfg = {'epoch': 100, 'lr': 0.02, 'batch_size': 32}
try:
check_dict_alignment(base_cfg, custom_cfg)
except SystemExit:
# Handle the error or correct the configuration
```
"""
custom = _handle_deprecation(custom)
base_keys, custom_keys = (set(x.keys()) for x in (base, custom))
@ -341,6 +406,22 @@ def merge_equals_args(args: List[str]) -> List[str]:
Returns:
(List[str]): A list of strings where the arguments around isolated '=' are merged.
Example:
The function modifies the argument list as follows:
```python
args = ["arg1", "=", "value"]
new_args = merge_equals_args(args)
print(new_args) # Output: ["arg1=value"]
args = ["arg1=", "value"]
new_args = merge_equals_args(args)
print(new_args) # Output: ["arg1=value"]
args = ["arg1", "=value"]
new_args = merge_equals_args(args)
print(new_args) # Output: ["arg1=value"]
```
"""
new_args = []
for i, arg in enumerate(args):
@ -361,15 +442,18 @@ def handle_yolo_hub(args: List[str]) -> None:
"""
Handle Ultralytics HUB command-line interface (CLI) commands.
This function processes Ultralytics HUB CLI commands such as login and logout.
It should be called when executing a script with arguments related to HUB authentication.
This function processes Ultralytics HUB CLI commands such as login and logout. It should be called when executing
a script with arguments related to HUB authentication.
Args:
args (List[str]): A list of command line arguments
args (List[str]): A list of command line arguments.
Returns:
None
Example:
```bash
python my_script.py hub login your_api_key
yolo hub login YOUR_API_KEY
```
"""
from ultralytics import hub
@ -387,16 +471,23 @@ def handle_yolo_settings(args: List[str]) -> None:
"""
Handle YOLO settings command-line interface (CLI) commands.
This function processes YOLO settings CLI commands such as reset.
It should be called when executing a script with arguments related to YOLO settings management.
This function processes YOLO settings CLI commands such as reset. It should be called when executing a script with
arguments related to YOLO settings management.
Args:
args (List[str]): A list of command line arguments for YOLO settings management.
Returns:
None
Example:
```bash
python my_script.py yolo settings reset
yolo settings reset
```
Notes:
For more information on handling YOLO settings, visit:
https://docs.ultralytics.com/quickstart/#ultralytics-settings
"""
url = "https://docs.ultralytics.com/quickstart/#ultralytics-settings" # help URL
try:
@ -417,7 +508,7 @@ def handle_yolo_settings(args: List[str]) -> None:
def handle_explorer():
"""Open the Ultralytics Explorer GUI."""
"""Open the Ultralytics Explorer GUI for dataset exploration and analysis."""
checks.check_requirements("streamlit")
LOGGER.info("💡 Loading Explorer dashboard...")
subprocess.run(["streamlit", "run", ROOT / "data/explorer/gui/dash.py", "--server.maxMessageSize", "2048"])
@ -432,7 +523,7 @@ def parse_key_value_pair(pair):
def smart_value(v):
"""Convert a string to an underlying type such as int, float, bool, etc."""
"""Convert a string to its appropriate type (int, float, bool, None, etc.)."""
v_lower = v.lower()
if v_lower == "none":
return None
@ -448,18 +539,33 @@ def smart_value(v):
def entrypoint(debug=""):
"""
This function is the ultralytics package entrypoint, it's responsible for parsing the command line arguments passed
to the package.
Ultralytics entrypoint function for parsing and executing command-line arguments.
This function allows for:
- passing mandatory YOLO args as a list of strings
- specifying the task to be performed, either 'detect', 'segment' or 'classify'
- specifying the mode, either 'train', 'val', 'test', or 'predict'
- running special modes like 'checks'
- passing overrides to the package's configuration
This function serves as the main entry point for the Ultralytics CLI, parsing command-line arguments and
executing the corresponding tasks such as training, validation, prediction, exporting models, and more.
It uses the package's default cfg and initializes it using the passed overrides.
Then it calls the CLI function with the composed cfg
Args:
debug (str, optional): Space-separated string of command-line arguments for debugging purposes. Default is "".
Returns:
(None): This function does not return any value.
Notes:
- For a list of all available commands and their arguments, see the provided help messages and the Ultralytics
documentation at https://docs.ultralytics.com.
- If no arguments are passed, the function will display the usage help message.
Example:
```python
# Train a detection model for 10 epochs with an initial learning_rate of 0.01
entrypoint("train data=coco8.yaml model=yolov8n.pt epochs=10 lr0=0.01")
# Predict a YouTube video using a pretrained segmentation model at image size 320
entrypoint("predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320")
# Validate a pretrained detection model at batch-size 1 and image size 640
entrypoint("val model=yolov8n.pt data=coco8.yaml batch=1 imgsz=640")
```
"""
args = (debug.split(" ") if debug else ARGV)[1:]
if not args: # no arguments passed
@ -596,7 +702,7 @@ def entrypoint(debug=""):
# Special modes --------------------------------------------------------------------------------------------------------
def copy_default_cfg():
"""Copy and create a new default configuration file with '_copy' appended to its name."""
"""Copy and create a new default configuration file with '_copy' appended to its name, providing usage example."""
new_file = Path.cwd() / DEFAULT_CFG_PATH.name.replace(".yaml", "_copy.yaml")
shutil.copy2(DEFAULT_CFG_PATH, new_file)
LOGGER.info(

View file

@ -23,11 +23,24 @@ class BaseTensor(SimpleClass):
def __init__(self, data, orig_shape) -> None:
"""
Initialize BaseTensor with data and original shape.
Initialize BaseTensor with prediction data and the original shape of the image.
Args:
data (torch.Tensor | np.ndarray): Predictions, such as bboxes, masks and keypoints.
orig_shape (tuple): Original shape of image.
data (torch.Tensor | np.ndarray): Prediction data such as bounding boxes, masks, or keypoints.
orig_shape (tuple): Original shape of the image, typically in the format (height, width).
Returns:
(None)
Example:
```python
import torch
from ultralytics.engine.results import BaseTensor
data = torch.tensor([[1, 2, 3], [4, 5, 6]])
orig_shape = (720, 1280)
base_tensor = BaseTensor(data, orig_shape)
```
"""
assert isinstance(data, (torch.Tensor, np.ndarray))
self.data = data
@ -35,19 +48,19 @@ class BaseTensor(SimpleClass):
@property
def shape(self):
"""Return the shape of the data tensor."""
"""Returns the shape of the underlying data tensor for easier manipulation and device handling."""
return self.data.shape
def cpu(self):
"""Return a copy of the tensor on CPU memory."""
"""Return a copy of the tensor stored in CPU memory."""
return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.cpu(), self.orig_shape)
def numpy(self):
"""Return a copy of the tensor as a numpy array."""
"""Returns a copy of the tensor as a numpy array for efficient numerical operations."""
return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.numpy(), self.orig_shape)
def cuda(self):
"""Return a copy of the tensor on GPU memory."""
"""Moves the tensor to GPU memory, returning a new instance if necessary."""
return self.__class__(torch.as_tensor(self.data).cuda(), self.orig_shape)
def to(self, *args, **kwargs):
@ -55,11 +68,11 @@ class BaseTensor(SimpleClass):
return self.__class__(torch.as_tensor(self.data).to(*args, **kwargs), self.orig_shape)
def __len__(self): # override len(results)
"""Return the length of the data tensor."""
"""Return the length of the underlying data tensor."""
return len(self.data)
def __getitem__(self, idx):
"""Return a BaseTensor with the specified index of the data tensor."""
"""Return a new BaseTensor instance containing the specified indexed elements of the data tensor."""
return self.__class__(self.data[idx], self.orig_shape)
@ -98,7 +111,7 @@ class Results(SimpleClass):
self, orig_img, path, names, boxes=None, masks=None, probs=None, keypoints=None, obb=None, speed=None
) -> None:
"""
Initialize the Results class.
Initialize the Results class for storing and manipulating inference results.
Args:
orig_img (numpy.ndarray): The original image as a numpy array.
@ -109,6 +122,15 @@ class Results(SimpleClass):
probs (torch.tensor, optional): A 1D tensor of probabilities of each class for classification task.
keypoints (torch.tensor, optional): A 2D tensor of keypoint coordinates for each detection.
obb (torch.tensor, optional): A 2D tensor of oriented bounding box coordinates for each detection.
speed (dict, optional): A dictionary containing preprocess, inference, and postprocess speeds (ms/image).
Returns:
None
Example:
```python
results = model("path/to/image.jpg")
```
"""
self.orig_img = orig_img
self.orig_shape = orig_img.shape[:2]
@ -124,18 +146,18 @@ class Results(SimpleClass):
self._keys = "boxes", "masks", "probs", "keypoints", "obb"
def __getitem__(self, idx):
"""Return a Results object for the specified index."""
"""Return a Results object for a specific index of inference results."""
return self._apply("__getitem__", idx)
def __len__(self):
"""Return the number of detections in the Results object."""
"""Return the number of detections in the Results object from a non-empty attribute set (boxes, masks, etc.)."""
for k in self._keys:
v = getattr(self, k)
if v is not None:
return len(v)
def update(self, boxes=None, masks=None, probs=None, obb=None):
"""Update the boxes, masks, and probs attributes of the Results object."""
"""Updates detection results attributes including boxes, masks, probs, and obb with new data."""
if boxes is not None:
self.boxes = Boxes(ops.clip_boxes(boxes, self.orig_shape), self.orig_shape)
if masks is not None:
@ -156,7 +178,15 @@ class Results(SimpleClass):
**kwargs: Arbitrary keyword arguments to pass to the function.
Returns:
Results: A new Results object with attributes modified by the applied function.
(Results): A new Results object with attributes modified by the applied function.
Example:
```python
results = model("path/to/image.jpg")
for result in results:
result_cuda = result.cuda()
result_cpu = result.cpu()
```
"""
r = self.new()
for k in self._keys:
@ -166,23 +196,23 @@ class Results(SimpleClass):
return r
def cpu(self):
"""Return a copy of the Results object with all tensors on CPU memory."""
"""Returns a copy of the Results object with all its tensors moved to CPU memory."""
return self._apply("cpu")
def numpy(self):
"""Return a copy of the Results object with all tensors as numpy arrays."""
"""Returns a copy of the Results object with all tensors as numpy arrays."""
return self._apply("numpy")
def cuda(self):
"""Return a copy of the Results object with all tensors on GPU memory."""
"""Moves all tensors in the Results object to GPU memory."""
return self._apply("cuda")
def to(self, *args, **kwargs):
"""Return a copy of the Results object with tensors on the specified device and dtype."""
"""Moves all tensors in the Results object to the specified device and dtype."""
return self._apply("to", *args, **kwargs)
def new(self):
"""Return a new Results object with the same image, path, names and speed."""
"""Returns a new Results object with the same image, path, names, and speed attributes."""
return Results(orig_img=self.orig_img, path=self.path, names=self.names, speed=self.speed)
def plot(
@ -220,7 +250,7 @@ class Results(SimpleClass):
labels (bool): Whether to plot the label of bounding boxes.
boxes (bool): Whether to plot the bounding boxes.
masks (bool): Whether to plot the masks.
probs (bool): Whether to plot classification probability
probs (bool): Whether to plot classification probability.
show (bool): Whether to display the annotated image directly.
save (bool): Whether to save the annotated image to `filename`.
filename (str): Filename to save image to if save is True.
@ -304,18 +334,18 @@ class Results(SimpleClass):
return annotator.result()
def show(self, *args, **kwargs):
"""Show annotated results image."""
"""Show the image with annotated inference results."""
self.plot(show=True, *args, **kwargs)
def save(self, filename=None, *args, **kwargs):
"""Save annotated results image."""
"""Save annotated inference results image to file."""
if not filename:
filename = f"results_{Path(self.path).name}"
self.plot(save=True, filename=filename, *args, **kwargs)
return filename
def verbose(self):
"""Return log string for each task."""
"""Returns a log string for each task in the results, detailing detection and classification outcomes."""
log_string = ""
probs = self.probs
boxes = self.boxes
@ -331,11 +361,35 @@ class Results(SimpleClass):
def save_txt(self, txt_file, save_conf=False):
"""
Save predictions into txt file.
Save detection results to a text file.
Args:
txt_file (str): txt file path.
save_conf (bool): save confidence score or not.
txt_file (str): Path to the output text file.
save_conf (bool): Whether to include confidence scores in the output.
Returns:
(str): Path to the saved text file.
Example:
```python
from ultralytics import YOLO
model = YOLO('yolov8n.pt')
results = model("path/to/image.jpg")
for result in results:
result.save_txt("output.txt")
```
Notes:
- The file will contain one line per detection or classification with the following structure:
- For detections: `class confidence x_center y_center width height`
- For classifications: `confidence class_name`
- For masks and keypoints, the specific formats will vary accordingly.
- The function will create the output directory if it does not exist.
- If save_conf is False, the confidence scores will be excluded from the output.
- Existing contents of the file will not be overwritten; new results will be appended.
"""
is_obb = self.obb is not None
boxes = self.obb if is_obb else self.boxes
@ -367,11 +421,27 @@ class Results(SimpleClass):
def save_crop(self, save_dir, file_name=Path("im.jpg")):
"""
Save cropped predictions to `save_dir/cls/file_name.jpg`.
Save cropped detection images to `save_dir/cls/file_name.jpg`.
Args:
save_dir (str | pathlib.Path): Save path.
file_name (str | pathlib.Path): File name.
save_dir (str | pathlib.Path): Directory path where the cropped images should be saved.
file_name (str | pathlib.Path): Filename for the saved cropped image.
Notes:
This function does not support Classify or Oriented Bounding Box (OBB) tasks. It will warn and exit if
called for such tasks.
Example:
```python
from ultralytics import YOLO
model = YOLO("yolov8n.pt")
results = model("path/to/image.jpg")
# Save cropped images to the specified directory
for result in results:
result.save_crop(save_dir="path/to/save/crops", file_name="crop")
```
"""
if self.probs is not None:
LOGGER.warning("WARNING ⚠️ Classify task do not support `save_crop`.")
@ -388,7 +458,7 @@ class Results(SimpleClass):
)
def summary(self, normalize=False, decimals=5):
"""Convert the results to a summarized format."""
"""Convert inference results to a summarized dictionary with optional normalization for box coordinates."""
# Create list of detection dictionaries
results = []
if self.probs is not None:
@ -432,7 +502,7 @@ class Results(SimpleClass):
return results
def tojson(self, normalize=False, decimals=5):
"""Convert the results to JSON format."""
"""Converts detection results to JSON format."""
import json
return json.dumps(self.summary(normalize=normalize, decimals=decimals), indent=2)
@ -449,7 +519,7 @@ class Boxes(BaseTensor):
orig_shape (tuple): The original image size as a tuple (height, width), used for normalization.
is_track (bool): Indicates whether tracking IDs are included in the box data.
Properties:
Attributes:
xyxy (torch.Tensor | numpy.ndarray): Boxes in [x1, y1, x2, y2] format.
conf (torch.Tensor | numpy.ndarray): Confidence scores for each box.
cls (torch.Tensor | numpy.ndarray): Class labels for each box.
@ -467,13 +537,16 @@ class Boxes(BaseTensor):
def __init__(self, boxes, orig_shape) -> None:
"""
Initialize the Boxes class.
Initialize the Boxes class with detection box data and the original image shape.
Args:
boxes (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the detection boxes, with
shape (num_boxes, 6) or (num_boxes, 7). The last two columns contain confidence and class values.
If present, the third last column contains track IDs.
orig_shape (tuple): Original image size, in the format (height, width).
boxes (torch.Tensor | np.ndarray): A tensor or numpy array with detection boxes of shape (num_boxes, 6)
or (num_boxes, 7). Columns should contain [x1, y1, x2, y2, confidence, class, (optional) track_id].
The track ID column is included if present.
orig_shape (tuple): The original image shape as (height, width). Used for normalization.
Returns:
(None)
"""
if boxes.ndim == 1:
boxes = boxes[None, :]
@ -485,34 +558,34 @@ class Boxes(BaseTensor):
@property
def xyxy(self):
"""Return the boxes in xyxy format."""
"""Returns bounding boxes in [x1, y1, x2, y2] format."""
return self.data[:, :4]
@property
def conf(self):
"""Return the confidence values of the boxes."""
"""Returns the confidence scores for each detection box."""
return self.data[:, -2]
@property
def cls(self):
"""Return the class values of the boxes."""
"""Class ID tensor representing category predictions for each bounding box."""
return self.data[:, -1]
@property
def id(self):
"""Return the track IDs of the boxes (if available)."""
"""Return the tracking IDs for each box if available."""
return self.data[:, -3] if self.is_track else None
@property
@lru_cache(maxsize=2) # maxsize 1 should suffice
def xywh(self):
"""Return the boxes in xywh format."""
"""Returns boxes in [x, y, width, height] format."""
return ops.xyxy2xywh(self.xyxy)
@property
@lru_cache(maxsize=2)
def xyxyn(self):
"""Return the boxes in xyxy format normalized by original image size."""
"""Normalize box coordinates to [x1, y1, x2, y2] relative to the original image size."""
xyxy = self.xyxy.clone() if isinstance(self.xyxy, torch.Tensor) else np.copy(self.xyxy)
xyxy[..., [0, 2]] /= self.orig_shape[1]
xyxy[..., [1, 3]] /= self.orig_shape[0]
@ -521,7 +594,7 @@ class Boxes(BaseTensor):
@property
@lru_cache(maxsize=2)
def xywhn(self):
"""Return the boxes in xywh format normalized by original image size."""
"""Returns normalized bounding boxes in [x, y, width, height] format."""
xywh = ops.xyxy2xywh(self.xyxy)
xywh[..., [0, 2]] /= self.orig_shape[1]
xywh[..., [1, 3]] /= self.orig_shape[0]
@ -544,7 +617,7 @@ class Masks(BaseTensor):
"""
def __init__(self, masks, orig_shape) -> None:
"""Initialize the Masks class with the given masks tensor and original image shape."""
"""Initializes the Masks class with a masks tensor and original image shape."""
if masks.ndim == 2:
masks = masks[None, :]
super().__init__(masks, orig_shape)
@ -552,7 +625,7 @@ class Masks(BaseTensor):
@property
@lru_cache(maxsize=1)
def xyn(self):
"""Return normalized segments."""
"""Return normalized xy-coordinates of the segmentation masks."""
return [
ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=True)
for x in ops.masks2segments(self.data)
@ -561,7 +634,7 @@ class Masks(BaseTensor):
@property
@lru_cache(maxsize=1)
def xy(self):
"""Return segments in pixel coordinates."""
"""Returns the [x, y] normalized mask coordinates for each segment in the mask tensor."""
return [
ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=False)
for x in ops.masks2segments(self.data)
@ -572,7 +645,7 @@ class Keypoints(BaseTensor):
"""
A class for storing and manipulating detection keypoints.
Attributes:
Attributes
xy (torch.Tensor): A collection of keypoints containing x, y coordinates for each detection.
xyn (torch.Tensor): A normalized version of xy with coordinates in the range [0, 1].
conf (torch.Tensor): Confidence values associated with keypoints if available, otherwise None.
@ -586,7 +659,7 @@ class Keypoints(BaseTensor):
@smart_inference_mode() # avoid keypoints < conf in-place error
def __init__(self, keypoints, orig_shape) -> None:
"""Initializes the Keypoints object with detection keypoints and original image size."""
"""Initializes the Keypoints object with detection keypoints and original image dimensions."""
if keypoints.ndim == 2:
keypoints = keypoints[None, :]
if keypoints.shape[2] == 3: # x, y, conf
@ -604,7 +677,7 @@ class Keypoints(BaseTensor):
@property
@lru_cache(maxsize=1)
def xyn(self):
"""Returns normalized x, y coordinates of keypoints."""
"""Returns normalized coordinates (x, y) of keypoints relative to the original image size."""
xy = self.xy.clone() if isinstance(self.xy, torch.Tensor) else np.copy(self.xy)
xy[..., 0] /= self.orig_shape[1]
xy[..., 1] /= self.orig_shape[0]
@ -613,7 +686,7 @@ class Keypoints(BaseTensor):
@property
@lru_cache(maxsize=1)
def conf(self):
"""Returns confidence values of keypoints if available, else None."""
"""Returns confidence values for each keypoint."""
return self.data[..., 2] if self.has_visible else None
@ -621,7 +694,7 @@ class Probs(BaseTensor):
"""
A class for storing and manipulating classification predictions.
Attributes:
Attributes
top1 (int): Index of the top 1 class.
top5 (list[int]): Indices of the top 5 classes.
top1conf (torch.Tensor): Confidence of the top 1 class.
@ -635,31 +708,31 @@ class Probs(BaseTensor):
"""
def __init__(self, probs, orig_shape=None) -> None:
"""Initialize the Probs class with classification probabilities and optional original shape of the image."""
"""Initialize Probs with classification probabilities and optional original image shape."""
super().__init__(probs, orig_shape)
@property
@lru_cache(maxsize=1)
def top1(self):
"""Return the index of top 1."""
"""Return the index of the class with the highest probability."""
return int(self.data.argmax())
@property
@lru_cache(maxsize=1)
def top5(self):
"""Return the indices of top 5."""
"""Return the indices of the top 5 class probabilities."""
return (-self.data).argsort(0)[:5].tolist() # this way works with both torch and numpy.
@property
@lru_cache(maxsize=1)
def top1conf(self):
"""Return the confidence of top 1."""
"""Retrieves the confidence score of the highest probability class."""
return self.data[self.top1]
@property
@lru_cache(maxsize=1)
def top5conf(self):
"""Return the confidences of top 5."""
"""Returns confidence scores for the top 5 classification predictions."""
return self.data[self.top5]
@ -673,7 +746,7 @@ class OBB(BaseTensor):
If present, the third last column contains track IDs, and the fifth column from the left contains rotation.
orig_shape (tuple): Original image size, in the format (height, width).
Attributes:
Attributes
xywhr (torch.Tensor | numpy.ndarray): The boxes in [x_center, y_center, width, height, rotation] format.
conf (torch.Tensor | numpy.ndarray): The confidence values of the boxes.
cls (torch.Tensor | numpy.ndarray): The class values of the boxes.
@ -691,7 +764,7 @@ class OBB(BaseTensor):
"""
def __init__(self, boxes, orig_shape) -> None:
"""Initialize the Boxes class."""
"""Initialize an OBB instance with oriented bounding box data and original image shape."""
if boxes.ndim == 1:
boxes = boxes[None, :]
n = boxes.shape[-1]
@ -702,34 +775,34 @@ class OBB(BaseTensor):
@property
def xywhr(self):
"""Return the rotated boxes in xywhr format."""
"""Return boxes in [x_center, y_center, width, height, rotation] format."""
return self.data[:, :5]
@property
def conf(self):
"""Return the confidence values of the boxes."""
"""Gets the confidence values of Oriented Bounding Boxes (OBBs)."""
return self.data[:, -2]
@property
def cls(self):
"""Return the class values of the boxes."""
"""Returns the class values of the oriented bounding boxes."""
return self.data[:, -1]
@property
def id(self):
"""Return the track IDs of the boxes (if available)."""
"""Return the tracking IDs of the oriented bounding boxes (if available)."""
return self.data[:, -3] if self.is_track else None
@property
@lru_cache(maxsize=2)
def xyxyxyxy(self):
"""Return the boxes in xyxyxyxy format, (N, 4, 2)."""
"""Convert OBB format to 8-point (xyxyxyxy) coordinate format of shape (N, 4, 2) for rotated bounding boxes."""
return ops.xywhr2xyxyxyxy(self.xywhr)
@property
@lru_cache(maxsize=2)
def xyxyxyxyn(self):
"""Return the boxes in xyxyxyxy format, (N, 4, 2)."""
"""Converts rotated bounding boxes to normalized xyxyxyxy format of shape (N, 4, 2)."""
xyxyxyxyn = self.xyxyxyxy.clone() if isinstance(self.xyxyxyxy, torch.Tensor) else np.copy(self.xyxyxyxy)
xyxyxyxyn[..., 0] /= self.orig_shape[1]
xyxyxyxyn[..., 1] /= self.orig_shape[0]
@ -739,9 +812,28 @@ class OBB(BaseTensor):
@lru_cache(maxsize=2)
def xyxy(self):
"""
Return the horizontal boxes in xyxy format, (N, 4).
Convert the oriented bounding boxes (OBB) to axis-aligned bounding boxes in xyxy format (x1, y1, x2, y2).
Accepts both torch and numpy boxes.
Returns:
(torch.Tensor | numpy.ndarray): Axis-aligned bounding boxes in xyxy format with shape (num_boxes, 4).
Example:
```python
import torch
from ultralytics import YOLO
model = YOLO('yolov8n.pt')
results = model('path/to/image.jpg')
for result in results:
obb = result.obb
if obb is not None:
xyxy_boxes = obb.xyxy
# Do something with xyxy_boxes
```
Note:
This method is useful to perform operations that require axis-aligned bounding boxes, such as IoU
calculation with non-rotated boxes. The conversion approximates the OBB by the minimal enclosing rectangle.
"""
x = self.xyxyxyxy[..., 0]
y = self.xyxyxyxy[..., 1]

View file

@ -1172,8 +1172,6 @@ class ClassifyMetrics(SimpleClass):
top1 (float): The top-1 accuracy.
top5 (float): The top-5 accuracy.
speed (Dict[str, float]): A dictionary containing the time taken for each step in the pipeline.
Properties:
fitness (float): The fitness of the model, which is equal to top-5 accuracy.
results_dict (Dict[str, Union[float, str]]): A dictionary containing the classification metrics and fitness.
keys (List[str]): A list of keys for the results_dict.