Update Results and CFG docstrings (#14139)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com>
Co-authored-by: UltralyticsAssistant <web@ultralytics.com>
This commit is contained in:
Glenn Jocher 2024-07-01 21:18:55 +02:00 committed by GitHub
parent 08bc98812c
commit 0f2bee4cc6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 299 additions and 103 deletions

View file

@ -187,10 +187,31 @@ def cfg2dict(cfg):
Convert a configuration object to a dictionary, whether it is a file path, a string, or a SimpleNamespace object. Convert a configuration object to a dictionary, whether it is a file path, a string, or a SimpleNamespace object.
Args: Args:
cfg (str | Path | dict | SimpleNamespace): Configuration object to be converted to a dictionary. cfg (str | Path | dict | SimpleNamespace): Configuration object to be converted to a dictionary. This may be a
path to a configuration file, a dictionary, or a SimpleNamespace object.
Returns: Returns:
cfg (dict): Configuration object in dictionary format. (dict): Configuration object in dictionary format.
Example:
```python
from ultralytics.cfg import cfg2dict
from types import SimpleNamespace
# Example usage with a file path
config_dict = cfg2dict('config.yaml')
# Example usage with a SimpleNamespace
config_sn = SimpleNamespace(param1='value1', param2='value2')
config_dict = cfg2dict(config_sn)
# Example usage with a dictionary (returns the same dictionary)
config_dict = cfg2dict({'param1': 'value1', 'param2': 'value2'})
```
Notes:
- If `cfg` is a path or a string, it will be loaded as YAML and converted to a dictionary.
- If `cfg` is a SimpleNamespace object, it will be converted to a dictionary using `vars()`.
""" """
if isinstance(cfg, (str, Path)): if isinstance(cfg, (str, Path)):
cfg = yaml_load(cfg) # load dict cfg = yaml_load(cfg) # load dict
@ -201,14 +222,36 @@ def cfg2dict(cfg):
def get_cfg(cfg: Union[str, Path, Dict, SimpleNamespace] = DEFAULT_CFG_DICT, overrides: Dict = None): def get_cfg(cfg: Union[str, Path, Dict, SimpleNamespace] = DEFAULT_CFG_DICT, overrides: Dict = None):
""" """
Load and merge configuration data from a file or dictionary. Load and merge configuration data from a file or dictionary, with optional overrides.
Args: Args:
cfg (str | Path | Dict | SimpleNamespace): Configuration data. cfg (str | Path | dict | SimpleNamespace, optional): Configuration data source. Defaults to `DEFAULT_CFG_DICT`.
overrides (str | Dict | optional): Overrides in the form of a file name or a dictionary. Default is None. overrides (dict | None, optional): Dictionary containing key-value pairs to override the base configuration.
Defaults to None.
Returns: Returns:
(SimpleNamespace): Training arguments namespace. (SimpleNamespace): Namespace containing the merged training arguments.
Notes:
- If both `cfg` and `overrides` are provided, the values in `overrides` will take precedence.
- Special handling ensures alignment and correctness of the configuration, such as converting numeric `project`
and `name` to strings and validating the configuration keys and values.
Example:
```python
from ultralytics.cfg import get_cfg
# Load default configuration
config = get_cfg()
# Load from a custom file with overrides
config = get_cfg('path/to/config.yaml', overrides={'epochs': 50, 'batch_size': 16})
```
Configuration dictionary merged with overrides:
```python
{'epochs': 50, 'batch_size': 16, ...}
```
""" """
cfg = cfg2dict(cfg) cfg = cfg2dict(cfg)
@ -236,7 +279,7 @@ def get_cfg(cfg: Union[str, Path, Dict, SimpleNamespace] = DEFAULT_CFG_DICT, ove
def check_cfg(cfg, hard=True): def check_cfg(cfg, hard=True):
"""Check Ultralytics configuration argument types and values.""" """Validate Ultralytics configuration argument types and values, converting them if necessary."""
for k, v in cfg.items(): for k, v in cfg.items():
if v is not None: # None values may be from optional args if v is not None: # None values may be from optional args
if k in CFG_FLOAT_KEYS and not isinstance(v, (int, float)): if k in CFG_FLOAT_KEYS and not isinstance(v, (int, float)):
@ -272,7 +315,7 @@ def check_cfg(cfg, hard=True):
def get_save_dir(args, name=None): def get_save_dir(args, name=None):
"""Return save_dir as created from train/val/predict arguments.""" """Returns the directory path for saving outputs, derived from arguments or default settings."""
if getattr(args, "save_dir", None): if getattr(args, "save_dir", None):
save_dir = args.save_dir save_dir = args.save_dir
@ -287,7 +330,7 @@ def get_save_dir(args, name=None):
def _handle_deprecation(custom): def _handle_deprecation(custom):
"""Hardcoded function to handle deprecated config keys.""" """Handles deprecated configuration keys by mapping them to current equivalents with deprecation warnings."""
for key in custom.copy().keys(): for key in custom.copy().keys():
if key == "boxes": if key == "boxes":
@ -308,13 +351,35 @@ def _handle_deprecation(custom):
def check_dict_alignment(base: Dict, custom: Dict, e=None): def check_dict_alignment(base: Dict, custom: Dict, e=None):
""" """
This function checks for any mismatched keys between a custom configuration list and a base configuration list. If Check for key alignment between custom and base configuration dictionaries, catering for deprecated keys and
any mismatched keys are found, the function prints out similar keys from the base list and exits the program. providing informative error messages for mismatched keys.
Args: Args:
custom (dict): a dictionary of custom configuration options base (dict): The base configuration dictionary containing valid keys.
base (dict): a dictionary of base configuration options custom (dict): The custom configuration dictionary to be checked for alignment.
e (Error, optional): An optional error that is passed by the calling function. e (Exception, optional): An optional error instance passed by the calling function. Default is None.
Raises:
SystemExit: Terminates the program execution if mismatched keys are found.
Notes:
- The function provides suggestions for mismatched keys based on their similarity to valid keys in the
base configuration.
- Deprecated keys in the custom configuration are automatically handled and replaced with their updated
equivalents.
- A detailed error message is printed for each mismatched key, helping users to quickly identify and correct
their custom configurations.
Example:
```python
base_cfg = {'epochs': 50, 'lr0': 0.01, 'batch_size': 16}
custom_cfg = {'epoch': 100, 'lr': 0.02, 'batch_size': 32}
try:
check_dict_alignment(base_cfg, custom_cfg)
except SystemExit:
# Handle the error or correct the configuration
```
""" """
custom = _handle_deprecation(custom) custom = _handle_deprecation(custom)
base_keys, custom_keys = (set(x.keys()) for x in (base, custom)) base_keys, custom_keys = (set(x.keys()) for x in (base, custom))
@ -341,6 +406,22 @@ def merge_equals_args(args: List[str]) -> List[str]:
Returns: Returns:
(List[str]): A list of strings where the arguments around isolated '=' are merged. (List[str]): A list of strings where the arguments around isolated '=' are merged.
Example:
The function modifies the argument list as follows:
```python
args = ["arg1", "=", "value"]
new_args = merge_equals_args(args)
print(new_args) # Output: ["arg1=value"]
args = ["arg1=", "value"]
new_args = merge_equals_args(args)
print(new_args) # Output: ["arg1=value"]
args = ["arg1", "=value"]
new_args = merge_equals_args(args)
print(new_args) # Output: ["arg1=value"]
```
""" """
new_args = [] new_args = []
for i, arg in enumerate(args): for i, arg in enumerate(args):
@ -361,15 +442,18 @@ def handle_yolo_hub(args: List[str]) -> None:
""" """
Handle Ultralytics HUB command-line interface (CLI) commands. Handle Ultralytics HUB command-line interface (CLI) commands.
This function processes Ultralytics HUB CLI commands such as login and logout. This function processes Ultralytics HUB CLI commands such as login and logout. It should be called when executing
It should be called when executing a script with arguments related to HUB authentication. a script with arguments related to HUB authentication.
Args: Args:
args (List[str]): A list of command line arguments args (List[str]): A list of command line arguments.
Returns:
None
Example: Example:
```bash ```bash
python my_script.py hub login your_api_key yolo hub login YOUR_API_KEY
``` ```
""" """
from ultralytics import hub from ultralytics import hub
@ -387,16 +471,23 @@ def handle_yolo_settings(args: List[str]) -> None:
""" """
Handle YOLO settings command-line interface (CLI) commands. Handle YOLO settings command-line interface (CLI) commands.
This function processes YOLO settings CLI commands such as reset. This function processes YOLO settings CLI commands such as reset. It should be called when executing a script with
It should be called when executing a script with arguments related to YOLO settings management. arguments related to YOLO settings management.
Args: Args:
args (List[str]): A list of command line arguments for YOLO settings management. args (List[str]): A list of command line arguments for YOLO settings management.
Returns:
None
Example: Example:
```bash ```bash
python my_script.py yolo settings reset yolo settings reset
``` ```
Notes:
For more information on handling YOLO settings, visit:
https://docs.ultralytics.com/quickstart/#ultralytics-settings
""" """
url = "https://docs.ultralytics.com/quickstart/#ultralytics-settings" # help URL url = "https://docs.ultralytics.com/quickstart/#ultralytics-settings" # help URL
try: try:
@ -417,7 +508,7 @@ def handle_yolo_settings(args: List[str]) -> None:
def handle_explorer(): def handle_explorer():
"""Open the Ultralytics Explorer GUI.""" """Open the Ultralytics Explorer GUI for dataset exploration and analysis."""
checks.check_requirements("streamlit") checks.check_requirements("streamlit")
LOGGER.info("💡 Loading Explorer dashboard...") LOGGER.info("💡 Loading Explorer dashboard...")
subprocess.run(["streamlit", "run", ROOT / "data/explorer/gui/dash.py", "--server.maxMessageSize", "2048"]) subprocess.run(["streamlit", "run", ROOT / "data/explorer/gui/dash.py", "--server.maxMessageSize", "2048"])
@ -432,7 +523,7 @@ def parse_key_value_pair(pair):
def smart_value(v): def smart_value(v):
"""Convert a string to an underlying type such as int, float, bool, etc.""" """Convert a string to its appropriate type (int, float, bool, None, etc.)."""
v_lower = v.lower() v_lower = v.lower()
if v_lower == "none": if v_lower == "none":
return None return None
@ -448,18 +539,33 @@ def smart_value(v):
def entrypoint(debug=""): def entrypoint(debug=""):
""" """
This function is the ultralytics package entrypoint, it's responsible for parsing the command line arguments passed Ultralytics entrypoint function for parsing and executing command-line arguments.
to the package.
This function allows for: This function serves as the main entry point for the Ultralytics CLI, parsing command-line arguments and
- passing mandatory YOLO args as a list of strings executing the corresponding tasks such as training, validation, prediction, exporting models, and more.
- specifying the task to be performed, either 'detect', 'segment' or 'classify'
- specifying the mode, either 'train', 'val', 'test', or 'predict'
- running special modes like 'checks'
- passing overrides to the package's configuration
It uses the package's default cfg and initializes it using the passed overrides. Args:
Then it calls the CLI function with the composed cfg debug (str, optional): Space-separated string of command-line arguments for debugging purposes. Default is "".
Returns:
(None): This function does not return any value.
Notes:
- For a list of all available commands and their arguments, see the provided help messages and the Ultralytics
documentation at https://docs.ultralytics.com.
- If no arguments are passed, the function will display the usage help message.
Example:
```python
# Train a detection model for 10 epochs with an initial learning_rate of 0.01
entrypoint("train data=coco8.yaml model=yolov8n.pt epochs=10 lr0=0.01")
# Predict a YouTube video using a pretrained segmentation model at image size 320
entrypoint("predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320")
# Validate a pretrained detection model at batch-size 1 and image size 640
entrypoint("val model=yolov8n.pt data=coco8.yaml batch=1 imgsz=640")
```
""" """
args = (debug.split(" ") if debug else ARGV)[1:] args = (debug.split(" ") if debug else ARGV)[1:]
if not args: # no arguments passed if not args: # no arguments passed
@ -596,7 +702,7 @@ def entrypoint(debug=""):
# Special modes -------------------------------------------------------------------------------------------------------- # Special modes --------------------------------------------------------------------------------------------------------
def copy_default_cfg(): def copy_default_cfg():
"""Copy and create a new default configuration file with '_copy' appended to its name.""" """Copy and create a new default configuration file with '_copy' appended to its name, providing usage example."""
new_file = Path.cwd() / DEFAULT_CFG_PATH.name.replace(".yaml", "_copy.yaml") new_file = Path.cwd() / DEFAULT_CFG_PATH.name.replace(".yaml", "_copy.yaml")
shutil.copy2(DEFAULT_CFG_PATH, new_file) shutil.copy2(DEFAULT_CFG_PATH, new_file)
LOGGER.info( LOGGER.info(

View file

@ -23,11 +23,24 @@ class BaseTensor(SimpleClass):
def __init__(self, data, orig_shape) -> None: def __init__(self, data, orig_shape) -> None:
""" """
Initialize BaseTensor with data and original shape. Initialize BaseTensor with prediction data and the original shape of the image.
Args: Args:
data (torch.Tensor | np.ndarray): Predictions, such as bboxes, masks and keypoints. data (torch.Tensor | np.ndarray): Prediction data such as bounding boxes, masks, or keypoints.
orig_shape (tuple): Original shape of image. orig_shape (tuple): Original shape of the image, typically in the format (height, width).
Returns:
(None)
Example:
```python
import torch
from ultralytics.engine.results import BaseTensor
data = torch.tensor([[1, 2, 3], [4, 5, 6]])
orig_shape = (720, 1280)
base_tensor = BaseTensor(data, orig_shape)
```
""" """
assert isinstance(data, (torch.Tensor, np.ndarray)) assert isinstance(data, (torch.Tensor, np.ndarray))
self.data = data self.data = data
@ -35,19 +48,19 @@ class BaseTensor(SimpleClass):
@property @property
def shape(self): def shape(self):
"""Return the shape of the data tensor.""" """Returns the shape of the underlying data tensor for easier manipulation and device handling."""
return self.data.shape return self.data.shape
def cpu(self): def cpu(self):
"""Return a copy of the tensor on CPU memory.""" """Return a copy of the tensor stored in CPU memory."""
return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.cpu(), self.orig_shape) return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.cpu(), self.orig_shape)
def numpy(self): def numpy(self):
"""Return a copy of the tensor as a numpy array.""" """Returns a copy of the tensor as a numpy array for efficient numerical operations."""
return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.numpy(), self.orig_shape) return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.numpy(), self.orig_shape)
def cuda(self): def cuda(self):
"""Return a copy of the tensor on GPU memory.""" """Moves the tensor to GPU memory, returning a new instance if necessary."""
return self.__class__(torch.as_tensor(self.data).cuda(), self.orig_shape) return self.__class__(torch.as_tensor(self.data).cuda(), self.orig_shape)
def to(self, *args, **kwargs): def to(self, *args, **kwargs):
@ -55,11 +68,11 @@ class BaseTensor(SimpleClass):
return self.__class__(torch.as_tensor(self.data).to(*args, **kwargs), self.orig_shape) return self.__class__(torch.as_tensor(self.data).to(*args, **kwargs), self.orig_shape)
def __len__(self): # override len(results) def __len__(self): # override len(results)
"""Return the length of the data tensor.""" """Return the length of the underlying data tensor."""
return len(self.data) return len(self.data)
def __getitem__(self, idx): def __getitem__(self, idx):
"""Return a BaseTensor with the specified index of the data tensor.""" """Return a new BaseTensor instance containing the specified indexed elements of the data tensor."""
return self.__class__(self.data[idx], self.orig_shape) return self.__class__(self.data[idx], self.orig_shape)
@ -98,7 +111,7 @@ class Results(SimpleClass):
self, orig_img, path, names, boxes=None, masks=None, probs=None, keypoints=None, obb=None, speed=None self, orig_img, path, names, boxes=None, masks=None, probs=None, keypoints=None, obb=None, speed=None
) -> None: ) -> None:
""" """
Initialize the Results class. Initialize the Results class for storing and manipulating inference results.
Args: Args:
orig_img (numpy.ndarray): The original image as a numpy array. orig_img (numpy.ndarray): The original image as a numpy array.
@ -109,6 +122,15 @@ class Results(SimpleClass):
probs (torch.tensor, optional): A 1D tensor of probabilities of each class for classification task. probs (torch.tensor, optional): A 1D tensor of probabilities of each class for classification task.
keypoints (torch.tensor, optional): A 2D tensor of keypoint coordinates for each detection. keypoints (torch.tensor, optional): A 2D tensor of keypoint coordinates for each detection.
obb (torch.tensor, optional): A 2D tensor of oriented bounding box coordinates for each detection. obb (torch.tensor, optional): A 2D tensor of oriented bounding box coordinates for each detection.
speed (dict, optional): A dictionary containing preprocess, inference, and postprocess speeds (ms/image).
Returns:
None
Example:
```python
results = model("path/to/image.jpg")
```
""" """
self.orig_img = orig_img self.orig_img = orig_img
self.orig_shape = orig_img.shape[:2] self.orig_shape = orig_img.shape[:2]
@ -124,18 +146,18 @@ class Results(SimpleClass):
self._keys = "boxes", "masks", "probs", "keypoints", "obb" self._keys = "boxes", "masks", "probs", "keypoints", "obb"
def __getitem__(self, idx): def __getitem__(self, idx):
"""Return a Results object for the specified index.""" """Return a Results object for a specific index of inference results."""
return self._apply("__getitem__", idx) return self._apply("__getitem__", idx)
def __len__(self): def __len__(self):
"""Return the number of detections in the Results object.""" """Return the number of detections in the Results object from a non-empty attribute set (boxes, masks, etc.)."""
for k in self._keys: for k in self._keys:
v = getattr(self, k) v = getattr(self, k)
if v is not None: if v is not None:
return len(v) return len(v)
def update(self, boxes=None, masks=None, probs=None, obb=None): def update(self, boxes=None, masks=None, probs=None, obb=None):
"""Update the boxes, masks, and probs attributes of the Results object.""" """Updates detection results attributes including boxes, masks, probs, and obb with new data."""
if boxes is not None: if boxes is not None:
self.boxes = Boxes(ops.clip_boxes(boxes, self.orig_shape), self.orig_shape) self.boxes = Boxes(ops.clip_boxes(boxes, self.orig_shape), self.orig_shape)
if masks is not None: if masks is not None:
@ -156,7 +178,15 @@ class Results(SimpleClass):
**kwargs: Arbitrary keyword arguments to pass to the function. **kwargs: Arbitrary keyword arguments to pass to the function.
Returns: Returns:
Results: A new Results object with attributes modified by the applied function. (Results): A new Results object with attributes modified by the applied function.
Example:
```python
results = model("path/to/image.jpg")
for result in results:
result_cuda = result.cuda()
result_cpu = result.cpu()
```
""" """
r = self.new() r = self.new()
for k in self._keys: for k in self._keys:
@ -166,23 +196,23 @@ class Results(SimpleClass):
return r return r
def cpu(self): def cpu(self):
"""Return a copy of the Results object with all tensors on CPU memory.""" """Returns a copy of the Results object with all its tensors moved to CPU memory."""
return self._apply("cpu") return self._apply("cpu")
def numpy(self): def numpy(self):
"""Return a copy of the Results object with all tensors as numpy arrays.""" """Returns a copy of the Results object with all tensors as numpy arrays."""
return self._apply("numpy") return self._apply("numpy")
def cuda(self): def cuda(self):
"""Return a copy of the Results object with all tensors on GPU memory.""" """Moves all tensors in the Results object to GPU memory."""
return self._apply("cuda") return self._apply("cuda")
def to(self, *args, **kwargs): def to(self, *args, **kwargs):
"""Return a copy of the Results object with tensors on the specified device and dtype.""" """Moves all tensors in the Results object to the specified device and dtype."""
return self._apply("to", *args, **kwargs) return self._apply("to", *args, **kwargs)
def new(self): def new(self):
"""Return a new Results object with the same image, path, names and speed.""" """Returns a new Results object with the same image, path, names, and speed attributes."""
return Results(orig_img=self.orig_img, path=self.path, names=self.names, speed=self.speed) return Results(orig_img=self.orig_img, path=self.path, names=self.names, speed=self.speed)
def plot( def plot(
@ -220,7 +250,7 @@ class Results(SimpleClass):
labels (bool): Whether to plot the label of bounding boxes. labels (bool): Whether to plot the label of bounding boxes.
boxes (bool): Whether to plot the bounding boxes. boxes (bool): Whether to plot the bounding boxes.
masks (bool): Whether to plot the masks. masks (bool): Whether to plot the masks.
probs (bool): Whether to plot classification probability probs (bool): Whether to plot classification probability.
show (bool): Whether to display the annotated image directly. show (bool): Whether to display the annotated image directly.
save (bool): Whether to save the annotated image to `filename`. save (bool): Whether to save the annotated image to `filename`.
filename (str): Filename to save image to if save is True. filename (str): Filename to save image to if save is True.
@ -304,18 +334,18 @@ class Results(SimpleClass):
return annotator.result() return annotator.result()
def show(self, *args, **kwargs): def show(self, *args, **kwargs):
"""Show annotated results image.""" """Show the image with annotated inference results."""
self.plot(show=True, *args, **kwargs) self.plot(show=True, *args, **kwargs)
def save(self, filename=None, *args, **kwargs): def save(self, filename=None, *args, **kwargs):
"""Save annotated results image.""" """Save annotated inference results image to file."""
if not filename: if not filename:
filename = f"results_{Path(self.path).name}" filename = f"results_{Path(self.path).name}"
self.plot(save=True, filename=filename, *args, **kwargs) self.plot(save=True, filename=filename, *args, **kwargs)
return filename return filename
def verbose(self): def verbose(self):
"""Return log string for each task.""" """Returns a log string for each task in the results, detailing detection and classification outcomes."""
log_string = "" log_string = ""
probs = self.probs probs = self.probs
boxes = self.boxes boxes = self.boxes
@ -331,11 +361,35 @@ class Results(SimpleClass):
def save_txt(self, txt_file, save_conf=False): def save_txt(self, txt_file, save_conf=False):
""" """
Save predictions into txt file. Save detection results to a text file.
Args: Args:
txt_file (str): txt file path. txt_file (str): Path to the output text file.
save_conf (bool): save confidence score or not. save_conf (bool): Whether to include confidence scores in the output.
Returns:
(str): Path to the saved text file.
Example:
```python
from ultralytics import YOLO
model = YOLO('yolov8n.pt')
results = model("path/to/image.jpg")
for result in results:
result.save_txt("output.txt")
```
Notes:
- The file will contain one line per detection or classification with the following structure:
- For detections: `class confidence x_center y_center width height`
- For classifications: `confidence class_name`
- For masks and keypoints, the specific formats will vary accordingly.
- The function will create the output directory if it does not exist.
- If save_conf is False, the confidence scores will be excluded from the output.
- Existing contents of the file will not be overwritten; new results will be appended.
""" """
is_obb = self.obb is not None is_obb = self.obb is not None
boxes = self.obb if is_obb else self.boxes boxes = self.obb if is_obb else self.boxes
@ -367,11 +421,27 @@ class Results(SimpleClass):
def save_crop(self, save_dir, file_name=Path("im.jpg")): def save_crop(self, save_dir, file_name=Path("im.jpg")):
""" """
Save cropped predictions to `save_dir/cls/file_name.jpg`. Save cropped detection images to `save_dir/cls/file_name.jpg`.
Args: Args:
save_dir (str | pathlib.Path): Save path. save_dir (str | pathlib.Path): Directory path where the cropped images should be saved.
file_name (str | pathlib.Path): File name. file_name (str | pathlib.Path): Filename for the saved cropped image.
Notes:
This function does not support Classify or Oriented Bounding Box (OBB) tasks. It will warn and exit if
called for such tasks.
Example:
```python
from ultralytics import YOLO
model = YOLO("yolov8n.pt")
results = model("path/to/image.jpg")
# Save cropped images to the specified directory
for result in results:
result.save_crop(save_dir="path/to/save/crops", file_name="crop")
```
""" """
if self.probs is not None: if self.probs is not None:
LOGGER.warning("WARNING ⚠️ Classify task do not support `save_crop`.") LOGGER.warning("WARNING ⚠️ Classify task do not support `save_crop`.")
@ -388,7 +458,7 @@ class Results(SimpleClass):
) )
def summary(self, normalize=False, decimals=5): def summary(self, normalize=False, decimals=5):
"""Convert the results to a summarized format.""" """Convert inference results to a summarized dictionary with optional normalization for box coordinates."""
# Create list of detection dictionaries # Create list of detection dictionaries
results = [] results = []
if self.probs is not None: if self.probs is not None:
@ -432,7 +502,7 @@ class Results(SimpleClass):
return results return results
def tojson(self, normalize=False, decimals=5): def tojson(self, normalize=False, decimals=5):
"""Convert the results to JSON format.""" """Converts detection results to JSON format."""
import json import json
return json.dumps(self.summary(normalize=normalize, decimals=decimals), indent=2) return json.dumps(self.summary(normalize=normalize, decimals=decimals), indent=2)
@ -449,7 +519,7 @@ class Boxes(BaseTensor):
orig_shape (tuple): The original image size as a tuple (height, width), used for normalization. orig_shape (tuple): The original image size as a tuple (height, width), used for normalization.
is_track (bool): Indicates whether tracking IDs are included in the box data. is_track (bool): Indicates whether tracking IDs are included in the box data.
Properties: Attributes:
xyxy (torch.Tensor | numpy.ndarray): Boxes in [x1, y1, x2, y2] format. xyxy (torch.Tensor | numpy.ndarray): Boxes in [x1, y1, x2, y2] format.
conf (torch.Tensor | numpy.ndarray): Confidence scores for each box. conf (torch.Tensor | numpy.ndarray): Confidence scores for each box.
cls (torch.Tensor | numpy.ndarray): Class labels for each box. cls (torch.Tensor | numpy.ndarray): Class labels for each box.
@ -467,13 +537,16 @@ class Boxes(BaseTensor):
def __init__(self, boxes, orig_shape) -> None: def __init__(self, boxes, orig_shape) -> None:
""" """
Initialize the Boxes class. Initialize the Boxes class with detection box data and the original image shape.
Args: Args:
boxes (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the detection boxes, with boxes (torch.Tensor | np.ndarray): A tensor or numpy array with detection boxes of shape (num_boxes, 6)
shape (num_boxes, 6) or (num_boxes, 7). The last two columns contain confidence and class values. or (num_boxes, 7). Columns should contain [x1, y1, x2, y2, confidence, class, (optional) track_id].
If present, the third last column contains track IDs. The track ID column is included if present.
orig_shape (tuple): Original image size, in the format (height, width). orig_shape (tuple): The original image shape as (height, width). Used for normalization.
Returns:
(None)
""" """
if boxes.ndim == 1: if boxes.ndim == 1:
boxes = boxes[None, :] boxes = boxes[None, :]
@ -485,34 +558,34 @@ class Boxes(BaseTensor):
@property @property
def xyxy(self): def xyxy(self):
"""Return the boxes in xyxy format.""" """Returns bounding boxes in [x1, y1, x2, y2] format."""
return self.data[:, :4] return self.data[:, :4]
@property @property
def conf(self): def conf(self):
"""Return the confidence values of the boxes.""" """Returns the confidence scores for each detection box."""
return self.data[:, -2] return self.data[:, -2]
@property @property
def cls(self): def cls(self):
"""Return the class values of the boxes.""" """Class ID tensor representing category predictions for each bounding box."""
return self.data[:, -1] return self.data[:, -1]
@property @property
def id(self): def id(self):
"""Return the track IDs of the boxes (if available).""" """Return the tracking IDs for each box if available."""
return self.data[:, -3] if self.is_track else None return self.data[:, -3] if self.is_track else None
@property @property
@lru_cache(maxsize=2) # maxsize 1 should suffice @lru_cache(maxsize=2) # maxsize 1 should suffice
def xywh(self): def xywh(self):
"""Return the boxes in xywh format.""" """Returns boxes in [x, y, width, height] format."""
return ops.xyxy2xywh(self.xyxy) return ops.xyxy2xywh(self.xyxy)
@property @property
@lru_cache(maxsize=2) @lru_cache(maxsize=2)
def xyxyn(self): def xyxyn(self):
"""Return the boxes in xyxy format normalized by original image size.""" """Normalize box coordinates to [x1, y1, x2, y2] relative to the original image size."""
xyxy = self.xyxy.clone() if isinstance(self.xyxy, torch.Tensor) else np.copy(self.xyxy) xyxy = self.xyxy.clone() if isinstance(self.xyxy, torch.Tensor) else np.copy(self.xyxy)
xyxy[..., [0, 2]] /= self.orig_shape[1] xyxy[..., [0, 2]] /= self.orig_shape[1]
xyxy[..., [1, 3]] /= self.orig_shape[0] xyxy[..., [1, 3]] /= self.orig_shape[0]
@ -521,7 +594,7 @@ class Boxes(BaseTensor):
@property @property
@lru_cache(maxsize=2) @lru_cache(maxsize=2)
def xywhn(self): def xywhn(self):
"""Return the boxes in xywh format normalized by original image size.""" """Returns normalized bounding boxes in [x, y, width, height] format."""
xywh = ops.xyxy2xywh(self.xyxy) xywh = ops.xyxy2xywh(self.xyxy)
xywh[..., [0, 2]] /= self.orig_shape[1] xywh[..., [0, 2]] /= self.orig_shape[1]
xywh[..., [1, 3]] /= self.orig_shape[0] xywh[..., [1, 3]] /= self.orig_shape[0]
@ -544,7 +617,7 @@ class Masks(BaseTensor):
""" """
def __init__(self, masks, orig_shape) -> None: def __init__(self, masks, orig_shape) -> None:
"""Initialize the Masks class with the given masks tensor and original image shape.""" """Initializes the Masks class with a masks tensor and original image shape."""
if masks.ndim == 2: if masks.ndim == 2:
masks = masks[None, :] masks = masks[None, :]
super().__init__(masks, orig_shape) super().__init__(masks, orig_shape)
@ -552,7 +625,7 @@ class Masks(BaseTensor):
@property @property
@lru_cache(maxsize=1) @lru_cache(maxsize=1)
def xyn(self): def xyn(self):
"""Return normalized segments.""" """Return normalized xy-coordinates of the segmentation masks."""
return [ return [
ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=True) ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=True)
for x in ops.masks2segments(self.data) for x in ops.masks2segments(self.data)
@ -561,7 +634,7 @@ class Masks(BaseTensor):
@property @property
@lru_cache(maxsize=1) @lru_cache(maxsize=1)
def xy(self): def xy(self):
"""Return segments in pixel coordinates.""" """Returns the [x, y] normalized mask coordinates for each segment in the mask tensor."""
return [ return [
ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=False) ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=False)
for x in ops.masks2segments(self.data) for x in ops.masks2segments(self.data)
@ -572,7 +645,7 @@ class Keypoints(BaseTensor):
""" """
A class for storing and manipulating detection keypoints. A class for storing and manipulating detection keypoints.
Attributes: Attributes
xy (torch.Tensor): A collection of keypoints containing x, y coordinates for each detection. xy (torch.Tensor): A collection of keypoints containing x, y coordinates for each detection.
xyn (torch.Tensor): A normalized version of xy with coordinates in the range [0, 1]. xyn (torch.Tensor): A normalized version of xy with coordinates in the range [0, 1].
conf (torch.Tensor): Confidence values associated with keypoints if available, otherwise None. conf (torch.Tensor): Confidence values associated with keypoints if available, otherwise None.
@ -586,7 +659,7 @@ class Keypoints(BaseTensor):
@smart_inference_mode() # avoid keypoints < conf in-place error @smart_inference_mode() # avoid keypoints < conf in-place error
def __init__(self, keypoints, orig_shape) -> None: def __init__(self, keypoints, orig_shape) -> None:
"""Initializes the Keypoints object with detection keypoints and original image size.""" """Initializes the Keypoints object with detection keypoints and original image dimensions."""
if keypoints.ndim == 2: if keypoints.ndim == 2:
keypoints = keypoints[None, :] keypoints = keypoints[None, :]
if keypoints.shape[2] == 3: # x, y, conf if keypoints.shape[2] == 3: # x, y, conf
@ -604,7 +677,7 @@ class Keypoints(BaseTensor):
@property @property
@lru_cache(maxsize=1) @lru_cache(maxsize=1)
def xyn(self): def xyn(self):
"""Returns normalized x, y coordinates of keypoints.""" """Returns normalized coordinates (x, y) of keypoints relative to the original image size."""
xy = self.xy.clone() if isinstance(self.xy, torch.Tensor) else np.copy(self.xy) xy = self.xy.clone() if isinstance(self.xy, torch.Tensor) else np.copy(self.xy)
xy[..., 0] /= self.orig_shape[1] xy[..., 0] /= self.orig_shape[1]
xy[..., 1] /= self.orig_shape[0] xy[..., 1] /= self.orig_shape[0]
@ -613,7 +686,7 @@ class Keypoints(BaseTensor):
@property @property
@lru_cache(maxsize=1) @lru_cache(maxsize=1)
def conf(self): def conf(self):
"""Returns confidence values of keypoints if available, else None.""" """Returns confidence values for each keypoint."""
return self.data[..., 2] if self.has_visible else None return self.data[..., 2] if self.has_visible else None
@ -621,7 +694,7 @@ class Probs(BaseTensor):
""" """
A class for storing and manipulating classification predictions. A class for storing and manipulating classification predictions.
Attributes: Attributes
top1 (int): Index of the top 1 class. top1 (int): Index of the top 1 class.
top5 (list[int]): Indices of the top 5 classes. top5 (list[int]): Indices of the top 5 classes.
top1conf (torch.Tensor): Confidence of the top 1 class. top1conf (torch.Tensor): Confidence of the top 1 class.
@ -635,31 +708,31 @@ class Probs(BaseTensor):
""" """
def __init__(self, probs, orig_shape=None) -> None: def __init__(self, probs, orig_shape=None) -> None:
"""Initialize the Probs class with classification probabilities and optional original shape of the image.""" """Initialize Probs with classification probabilities and optional original image shape."""
super().__init__(probs, orig_shape) super().__init__(probs, orig_shape)
@property @property
@lru_cache(maxsize=1) @lru_cache(maxsize=1)
def top1(self): def top1(self):
"""Return the index of top 1.""" """Return the index of the class with the highest probability."""
return int(self.data.argmax()) return int(self.data.argmax())
@property @property
@lru_cache(maxsize=1) @lru_cache(maxsize=1)
def top5(self): def top5(self):
"""Return the indices of top 5.""" """Return the indices of the top 5 class probabilities."""
return (-self.data).argsort(0)[:5].tolist() # this way works with both torch and numpy. return (-self.data).argsort(0)[:5].tolist() # this way works with both torch and numpy.
@property @property
@lru_cache(maxsize=1) @lru_cache(maxsize=1)
def top1conf(self): def top1conf(self):
"""Return the confidence of top 1.""" """Retrieves the confidence score of the highest probability class."""
return self.data[self.top1] return self.data[self.top1]
@property @property
@lru_cache(maxsize=1) @lru_cache(maxsize=1)
def top5conf(self): def top5conf(self):
"""Return the confidences of top 5.""" """Returns confidence scores for the top 5 classification predictions."""
return self.data[self.top5] return self.data[self.top5]
@ -673,7 +746,7 @@ class OBB(BaseTensor):
If present, the third last column contains track IDs, and the fifth column from the left contains rotation. If present, the third last column contains track IDs, and the fifth column from the left contains rotation.
orig_shape (tuple): Original image size, in the format (height, width). orig_shape (tuple): Original image size, in the format (height, width).
Attributes: Attributes
xywhr (torch.Tensor | numpy.ndarray): The boxes in [x_center, y_center, width, height, rotation] format. xywhr (torch.Tensor | numpy.ndarray): The boxes in [x_center, y_center, width, height, rotation] format.
conf (torch.Tensor | numpy.ndarray): The confidence values of the boxes. conf (torch.Tensor | numpy.ndarray): The confidence values of the boxes.
cls (torch.Tensor | numpy.ndarray): The class values of the boxes. cls (torch.Tensor | numpy.ndarray): The class values of the boxes.
@ -691,7 +764,7 @@ class OBB(BaseTensor):
""" """
def __init__(self, boxes, orig_shape) -> None: def __init__(self, boxes, orig_shape) -> None:
"""Initialize the Boxes class.""" """Initialize an OBB instance with oriented bounding box data and original image shape."""
if boxes.ndim == 1: if boxes.ndim == 1:
boxes = boxes[None, :] boxes = boxes[None, :]
n = boxes.shape[-1] n = boxes.shape[-1]
@ -702,34 +775,34 @@ class OBB(BaseTensor):
@property @property
def xywhr(self): def xywhr(self):
"""Return the rotated boxes in xywhr format.""" """Return boxes in [x_center, y_center, width, height, rotation] format."""
return self.data[:, :5] return self.data[:, :5]
@property @property
def conf(self): def conf(self):
"""Return the confidence values of the boxes.""" """Gets the confidence values of Oriented Bounding Boxes (OBBs)."""
return self.data[:, -2] return self.data[:, -2]
@property @property
def cls(self): def cls(self):
"""Return the class values of the boxes.""" """Returns the class values of the oriented bounding boxes."""
return self.data[:, -1] return self.data[:, -1]
@property @property
def id(self): def id(self):
"""Return the track IDs of the boxes (if available).""" """Return the tracking IDs of the oriented bounding boxes (if available)."""
return self.data[:, -3] if self.is_track else None return self.data[:, -3] if self.is_track else None
@property @property
@lru_cache(maxsize=2) @lru_cache(maxsize=2)
def xyxyxyxy(self): def xyxyxyxy(self):
"""Return the boxes in xyxyxyxy format, (N, 4, 2).""" """Convert OBB format to 8-point (xyxyxyxy) coordinate format of shape (N, 4, 2) for rotated bounding boxes."""
return ops.xywhr2xyxyxyxy(self.xywhr) return ops.xywhr2xyxyxyxy(self.xywhr)
@property @property
@lru_cache(maxsize=2) @lru_cache(maxsize=2)
def xyxyxyxyn(self): def xyxyxyxyn(self):
"""Return the boxes in xyxyxyxy format, (N, 4, 2).""" """Converts rotated bounding boxes to normalized xyxyxyxy format of shape (N, 4, 2)."""
xyxyxyxyn = self.xyxyxyxy.clone() if isinstance(self.xyxyxyxy, torch.Tensor) else np.copy(self.xyxyxyxy) xyxyxyxyn = self.xyxyxyxy.clone() if isinstance(self.xyxyxyxy, torch.Tensor) else np.copy(self.xyxyxyxy)
xyxyxyxyn[..., 0] /= self.orig_shape[1] xyxyxyxyn[..., 0] /= self.orig_shape[1]
xyxyxyxyn[..., 1] /= self.orig_shape[0] xyxyxyxyn[..., 1] /= self.orig_shape[0]
@ -739,9 +812,28 @@ class OBB(BaseTensor):
@lru_cache(maxsize=2) @lru_cache(maxsize=2)
def xyxy(self): def xyxy(self):
""" """
Return the horizontal boxes in xyxy format, (N, 4). Convert the oriented bounding boxes (OBB) to axis-aligned bounding boxes in xyxy format (x1, y1, x2, y2).
Accepts both torch and numpy boxes. Returns:
(torch.Tensor | numpy.ndarray): Axis-aligned bounding boxes in xyxy format with shape (num_boxes, 4).
Example:
```python
import torch
from ultralytics import YOLO
model = YOLO('yolov8n.pt')
results = model('path/to/image.jpg')
for result in results:
obb = result.obb
if obb is not None:
xyxy_boxes = obb.xyxy
# Do something with xyxy_boxes
```
Note:
This method is useful to perform operations that require axis-aligned bounding boxes, such as IoU
calculation with non-rotated boxes. The conversion approximates the OBB by the minimal enclosing rectangle.
""" """
x = self.xyxyxyxy[..., 0] x = self.xyxyxyxy[..., 0]
y = self.xyxyxyxy[..., 1] y = self.xyxyxyxy[..., 1]

View file

@ -1172,8 +1172,6 @@ class ClassifyMetrics(SimpleClass):
top1 (float): The top-1 accuracy. top1 (float): The top-1 accuracy.
top5 (float): The top-5 accuracy. top5 (float): The top-5 accuracy.
speed (Dict[str, float]): A dictionary containing the time taken for each step in the pipeline. speed (Dict[str, float]): A dictionary containing the time taken for each step in the pipeline.
Properties:
fitness (float): The fitness of the model, which is equal to top-5 accuracy. fitness (float): The fitness of the model, which is equal to top-5 accuracy.
results_dict (Dict[str, Union[float, str]]): A dictionary containing the classification metrics and fitness. results_dict (Dict[str, Union[float, str]]): A dictionary containing the classification metrics and fitness.
keys (List[str]): A list of keys for the results_dict. keys (List[str]): A list of keys for the results_dict.