diff --git a/.gitignore b/.gitignore index 1c0c5fbe..5cc365b4 100644 --- a/.gitignore +++ b/.gitignore @@ -130,7 +130,6 @@ venv.bak/ # mkdocs documentation /site -mkdocs_github_authors.yaml # mypy .mypy_cache/ @@ -140,8 +139,8 @@ dmypy.json # Pyre type checker .pyre/ -# datasets and projects -datasets/ +# datasets and projects (ignore /datasets dir at root only to allow for docs/en/datasets dir) +/datasets runs/ wandb/ .DS_Store diff --git a/docs/en/datasets/classify/caltech101.md b/docs/en/datasets/classify/caltech101.md index 7029c5e6..2462a167 100644 --- a/docs/en/datasets/classify/caltech101.md +++ b/docs/en/datasets/classify/caltech101.md @@ -110,11 +110,13 @@ To train an Ultralytics YOLO model on the Caltech-101 dataset, you can use the p # Start training from a pretrained *.pt model yolo classify train data=caltech101 model=yolov8n-cls.pt epochs=100 imgsz=416 ``` + For more detailed arguments and options, refer to the model [Training](../../modes/train.md) page. ### What are the key features of the Caltech-101 dataset? The Caltech-101 dataset includes: + - Around 9,000 color images across 101 categories. - Categories covering a diverse range of objects, including animals, vehicles, and household items. - Variable number of images per category, typically between 40 and 800. @@ -142,6 +144,7 @@ Citing the Caltech-101 dataset in your research acknowledges the creators' contr publisher={Elsevier} } ``` + Citing helps in maintaining the integrity of academic work and assists peers in locating the original resource. ### Can I use Ultralytics HUB for training models on the Caltech-101 dataset? diff --git a/docs/en/datasets/classify/cifar100.md b/docs/en/datasets/classify/cifar100.md index 722eccf9..2861c946 100644 --- a/docs/en/datasets/classify/cifar100.md +++ b/docs/en/datasets/classify/cifar100.md @@ -92,7 +92,7 @@ You can train a YOLO model on the CIFAR-100 dataset using either Python or CLI c !!! Example "Train Example" === "Python" - + ```python from ultralytics import YOLO @@ -104,7 +104,7 @@ You can train a YOLO model on the CIFAR-100 dataset using either Python or CLI c ``` === "CLI" - + ```bash # Start training from a pretrained *.pt model yolo classify train data=cifar100 model=yolov8n-cls.pt epochs=100 imgsz=32 diff --git a/docs/en/datasets/classify/fashion-mnist.md b/docs/en/datasets/classify/fashion-mnist.md index 674e0858..2de2a805 100644 --- a/docs/en/datasets/classify/fashion-mnist.md +++ b/docs/en/datasets/classify/fashion-mnist.md @@ -102,7 +102,7 @@ To train an Ultralytics YOLO model on the Fashion-MNIST dataset, you can use bot !!! Example "Train Example" === "Python" - + ```python from ultralytics import YOLO @@ -112,10 +112,10 @@ To train an Ultralytics YOLO model on the Fashion-MNIST dataset, you can use bot # Train the model on Fashion-MNIST results = model.train(data="fashion-mnist", epochs=100, imgsz=28) ``` - + === "CLI" - + ```bash yolo classify train data=fashion-mnist model=yolov8n-cls.pt epochs=100 imgsz=28 ``` diff --git a/docs/en/datasets/classify/imagenet.md b/docs/en/datasets/classify/imagenet.md index 6ec3f920..ae1ade9b 100644 --- a/docs/en/datasets/classify/imagenet.md +++ b/docs/en/datasets/classify/imagenet.md @@ -11,7 +11,7 @@ keywords: ImageNet, deep learning, visual recognition, computer vision, pretrain ## ImageNet Pretrained Models | Model | size
(pixels) | acc
top1 | acc
top5 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) at 640 | -|----------------------------------------------------------------------------------------------|-----------------------|------------------|------------------|--------------------------------|-------------------------------------|--------------------|--------------------------| +| -------------------------------------------------------------------------------------------- | --------------------- | ---------------- | ---------------- | ------------------------------ | ----------------------------------- | ------------------ | ------------------------ | | [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n-cls.pt) | 224 | 69.0 | 88.3 | 12.9 | 0.31 | 2.7 | 4.3 | | [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s-cls.pt) | 224 | 73.8 | 91.7 | 23.4 | 0.35 | 6.4 | 13.5 | | [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m-cls.pt) | 224 | 76.8 | 93.5 | 85.4 | 0.62 | 17.0 | 42.7 | @@ -105,7 +105,7 @@ To use a pretrained Ultralytics YOLO model for image classification on the Image !!! Example "Train Example" === "Python" - + ```python from ultralytics import YOLO @@ -117,7 +117,7 @@ To use a pretrained Ultralytics YOLO model for image classification on the Image ``` === "CLI" - + ```bash # Start training from a pretrained *.pt model yolo classify train data=imagenet model=yolov8n-cls.pt epochs=100 imgsz=224 diff --git a/docs/en/datasets/classify/imagenette.md b/docs/en/datasets/classify/imagenette.md index aea183f3..fa06e0d3 100644 --- a/docs/en/datasets/classify/imagenette.md +++ b/docs/en/datasets/classify/imagenette.md @@ -152,12 +152,12 @@ The ImageNette dataset is advantageous for several reasons: - **Quick and Simple**: It contains only 10 classes, making it less complex and time-consuming compared to larger datasets. - **Educational Use**: Ideal for learning and teaching the basics of image classification since it requires less computational power and time. - **Versatility**: Widely used to train and benchmark various machine learning models, especially in image classification. - + For more details on model training and dataset management, explore the [Dataset Structure](#dataset-structure) section. ### Can the ImageNette dataset be used with different image sizes? -Yes, the ImageNette dataset is also available in two resized versions: ImageNette160 and ImageNette320. These versions help in faster prototyping and are especially useful when computational resources are limited. +Yes, the ImageNette dataset is also available in two resized versions: ImageNette160 and ImageNette320. These versions help in faster prototyping and are especially useful when computational resources are limited. !!! Example "Train Example with ImageNette160" @@ -174,7 +174,7 @@ Yes, the ImageNette dataset is also available in two resized versions: ImageNett ``` === "CLI" - + ```bash # Start training from a pretrained *.pt model with ImageNette160 yolo detect train data=imagenette160 model=yolov8n-cls.pt epochs=100 imgsz=160 diff --git a/docs/en/datasets/classify/imagewoof.md b/docs/en/datasets/classify/imagewoof.md index 0d768b07..0f653745 100644 --- a/docs/en/datasets/classify/imagewoof.md +++ b/docs/en/datasets/classify/imagewoof.md @@ -112,17 +112,17 @@ To train a Convolutional Neural Network (CNN) model on the ImageWoof dataset usi !!! Example "Train Example" === "Python" - + ```python from ultralytics import YOLO model = YOLO("yolov8n-cls.pt") # Load a pretrained model results = model.train(data="imagewoof", epochs=100, imgsz=224) ``` - + === "CLI" - + ```bash yolo classify train data=imagewoof model=yolov8n-cls.pt epochs=100 imgsz=224 ``` diff --git a/docs/en/datasets/classify/index.md b/docs/en/datasets/classify/index.md index 58aaaecd..bc3d7191 100644 --- a/docs/en/datasets/classify/index.md +++ b/docs/en/datasets/classify/index.md @@ -197,7 +197,7 @@ Training a model using Ultralytics YOLO can be done easily in both Python and CL !!! Example === "Python" - + ```python from ultralytics import YOLO @@ -207,10 +207,10 @@ Training a model using Ultralytics YOLO can be done easily in both Python and CL # Train the model results = model.train(data="path/to/dataset", epochs=100, imgsz=640) ``` - + === "CLI" - + ```bash # Start training from a pretrained *.pt model yolo detect train data=path/to/data model=yolov8n-cls.pt epochs=100 imgsz=640 diff --git a/docs/en/datasets/classify/mnist.md b/docs/en/datasets/classify/mnist.md index ae9be2bc..6fcf5bd4 100644 --- a/docs/en/datasets/classify/mnist.md +++ b/docs/en/datasets/classify/mnist.md @@ -98,7 +98,7 @@ To train a model on the MNIST dataset using Ultralytics YOLO, you can follow the !!! Example "Train Example" === "Python" - + ```python from ultralytics import YOLO @@ -110,7 +110,7 @@ To train a model on the MNIST dataset using Ultralytics YOLO, you can follow the ``` === "CLI" - + ```bash # Start training from a pretrained *.pt model yolo classify train data=mnist model=yolov8n-cls.pt epochs=100 imgsz=28 diff --git a/docs/en/datasets/detect/african-wildlife.md b/docs/en/datasets/detect/african-wildlife.md index bdd392cd..fce0cf54 100644 --- a/docs/en/datasets/detect/african-wildlife.md +++ b/docs/en/datasets/detect/african-wildlife.md @@ -114,7 +114,7 @@ You can train a YOLOv8 model on the African Wildlife Dataset by using the `afric !!! Example === "Python" - + ```python from ultralytics import YOLO @@ -126,7 +126,7 @@ You can train a YOLOv8 model on the African Wildlife Dataset by using the `afric ``` === "CLI" - + ```bash # Start training from a pretrained *.pt model yolo detect train data=african-wildlife.yaml model=yolov8n.pt epochs=100 imgsz=640 diff --git a/docs/en/datasets/detect/argoverse.md b/docs/en/datasets/detect/argoverse.md index 56023b6b..c3a2c6e2 100644 --- a/docs/en/datasets/detect/argoverse.md +++ b/docs/en/datasets/detect/argoverse.md @@ -109,7 +109,7 @@ To train a YOLOv8 model with the Argoverse dataset, use the provided YAML config !!! Example "Train Example" === "Python" - + ```python from ultralytics import YOLO @@ -119,10 +119,10 @@ To train a YOLOv8 model with the Argoverse dataset, use the provided YAML config # Train the model results = model.train(data="Argoverse.yaml", epochs=100, imgsz=640) ``` - + === "CLI" - + ```bash # Start training from a pretrained *.pt model yolo detect train data=Argoverse.yaml model=yolov8n.pt epochs=100 imgsz=640 diff --git a/docs/en/datasets/detect/brain-tumor.md b/docs/en/datasets/detect/brain-tumor.md index 4ec217d5..38d69ff6 100644 --- a/docs/en/datasets/detect/brain-tumor.md +++ b/docs/en/datasets/detect/brain-tumor.md @@ -113,7 +113,7 @@ You can train a YOLOv8 model on the brain tumor dataset for 100 epochs with an i !!! Example "Train Example" === "Python" - + ```python from ultralytics import YOLO @@ -123,10 +123,10 @@ You can train a YOLOv8 model on the brain tumor dataset for 100 epochs with an i # Train the model results = model.train(data="brain-tumor.yaml", epochs=100, imgsz=640) ``` - + === "CLI" - + ```bash # Start training from a pretrained *.pt model yolo detect train data=brain-tumor.yaml model=yolov8n.pt epochs=100 imgsz=640 @@ -157,7 +157,7 @@ Inference using a fine-tuned YOLOv8 model can be performed with either Python or ``` === "CLI" - + ```bash # Start prediction with a finetuned *.pt model yolo detect predict model='path/to/best.pt' imgsz=640 source="https://ultralytics.com/assets/brain-tumor-sample.jpg" diff --git a/docs/en/datasets/detect/coco.md b/docs/en/datasets/detect/coco.md index d3b0589e..b0d42b9b 100644 --- a/docs/en/datasets/detect/coco.md +++ b/docs/en/datasets/detect/coco.md @@ -22,7 +22,7 @@ The [COCO](https://cocodataset.org/#home) (Common Objects in Context) dataset is ## COCO Pretrained Models | Model | size
(pixels) | mAPval
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | -|--------------------------------------------------------------------------------------|-----------------------|----------------------|--------------------------------|-------------------------------------|--------------------|-------------------| +| ------------------------------------------------------------------------------------ | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | | [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt) | 640 | 37.3 | 80.4 | 0.99 | 3.2 | 8.7 | | [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s.pt) | 640 | 44.9 | 128.4 | 1.20 | 11.2 | 28.6 | | [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m.pt) | 640 | 50.2 | 234.7 | 1.83 | 25.9 | 78.9 | @@ -127,7 +127,7 @@ To train a YOLOv8 model using the COCO dataset, you can use the following code s !!! Example "Train Example" === "Python" - + ```python from ultralytics import YOLO @@ -139,7 +139,7 @@ To train a YOLOv8 model using the COCO dataset, you can use the following code s ``` === "CLI" - + ```bash # Start training from a pretrained *.pt model yolo detect train data=coco.yaml model=yolov8n.pt epochs=100 imgsz=640 diff --git a/docs/en/datasets/detect/coco8.md b/docs/en/datasets/detect/coco8.md index c1df16ee..b2ed77b4 100644 --- a/docs/en/datasets/detect/coco8.md +++ b/docs/en/datasets/detect/coco8.md @@ -102,7 +102,7 @@ To train a YOLOv8 model using the COCO8 dataset, you can employ either Python or !!! Example "Train Example" === "Python" - + ```python from ultralytics import YOLO diff --git a/docs/en/datasets/detect/globalwheat2020.md b/docs/en/datasets/detect/globalwheat2020.md index a8e255b5..8b8a0467 100644 --- a/docs/en/datasets/detect/globalwheat2020.md +++ b/docs/en/datasets/detect/globalwheat2020.md @@ -103,7 +103,7 @@ To train a YOLOv8n model on the Global Wheat Head Dataset, you can use the follo !!! Example "Train Example" === "Python" - + ```python from ultralytics import YOLO diff --git a/docs/en/datasets/detect/index.md b/docs/en/datasets/detect/index.md index 2b202544..e43cd461 100644 --- a/docs/en/datasets/detect/index.md +++ b/docs/en/datasets/detect/index.md @@ -16,20 +16,20 @@ The Ultralytics YOLO format is a dataset configuration format that allows you to ```yaml # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] -path: ../datasets/coco8 # dataset root dir -train: images/train # train images (relative to 'path') 4 images -val: images/val # val images (relative to 'path') 4 images -test: # test images (optional) +path: ../datasets/coco8 # dataset root dir +train: images/train # train images (relative to 'path') 4 images +val: images/val # val images (relative to 'path') 4 images +test: # test images (optional) # Classes (80 COCO classes) names: - 0: person - 1: bicycle - 2: car - # ... - 77: teddy bear - 78: hair drier - 79: toothbrush + 0: person + 1: bicycle + 2: car + # ... + 77: teddy bear + 78: hair drier + 79: toothbrush ``` Labels for this format should be exported to YOLO format with one `*.txt` file per image. If there are no objects in an image, no `*.txt` file is required. The `*.txt` file should be formatted with one row per object in `class x_center y_center width height` format. Box coordinates must be in **normalized xywh** format (from 0 to 1). If your boxes are in pixels, you should divide `x_center` and `width` by image width, and `y_center` and `height` by image height. Class numbers should be zero-indexed (start with 0). @@ -121,15 +121,15 @@ Remember to double-check if the dataset you want to use is compatible with your The Ultralytics YOLO format is a structured configuration for defining datasets in your training projects. It involves setting paths to your training, validation, and testing images and corresponding labels. For example: ```yaml -path: ../datasets/coco8 # dataset root directory -train: images/train # training images (relative to 'path') -val: images/val # validation images (relative to 'path') -test: # optional test images +path: ../datasets/coco8 # dataset root directory +train: images/train # training images (relative to 'path') +val: images/val # validation images (relative to 'path') +test: # optional test images names: - 0: person - 1: bicycle - 2: car - # ... + 0: person + 1: bicycle + 2: car + # ... ``` Labels are saved in `*.txt` files with one file per image, formatted as `class x_center y_center width height` with normalized coordinates. For a detailed guide, see the [COCO8 dataset example](coco8.md). @@ -167,7 +167,7 @@ To start training a YOLOv8 model, ensure your dataset is formatted correctly and !!! Example === "Python" - + ```python from ultralytics import YOLO @@ -176,7 +176,7 @@ To start training a YOLOv8 model, ensure your dataset is formatted correctly and ``` === "CLI" - + ```bash yolo detect train data=path/to/your_dataset.yaml model=yolov8n.pt epochs=100 imgsz=640 ``` diff --git a/docs/en/datasets/detect/lvis.md b/docs/en/datasets/detect/lvis.md index 2cf6a0c2..afc91fc6 100644 --- a/docs/en/datasets/detect/lvis.md +++ b/docs/en/datasets/detect/lvis.md @@ -121,7 +121,7 @@ To train a YOLOv8n model on the LVIS dataset for 100 epochs with an image size o !!! Example "Train Example" === "Python" - + ```python from ultralytics import YOLO @@ -131,10 +131,10 @@ To train a YOLOv8n model on the LVIS dataset for 100 epochs with an image size o # Train the model results = model.train(data="lvis.yaml", epochs=100, imgsz=640) ``` - + === "CLI" - + ```bash # Start training from a pretrained *.pt model yolo detect train data=lvis.yaml model=yolov8n.pt epochs=100 imgsz=640 diff --git a/docs/en/datasets/detect/objects365.md b/docs/en/datasets/detect/objects365.md index a3ffcbe8..ea95798f 100644 --- a/docs/en/datasets/detect/objects365.md +++ b/docs/en/datasets/detect/objects365.md @@ -127,6 +127,7 @@ Refer to the [Training](../../modes/train.md) page for a comprehensive list of a ### Why should I use the Objects365 dataset for my object detection projects? The Objects365 dataset offers several advantages for object detection tasks: + 1. **Diversity**: It includes 2 million images with objects in diverse scenarios, covering 365 categories. 2. **High-quality Annotations**: Over 30 million bounding boxes provide comprehensive ground truth data. 3. **Performance**: Models pre-trained on Objects365 significantly outperform those trained on datasets like ImageNet, leading to better generalization. diff --git a/docs/en/datasets/detect/open-images-v7.md b/docs/en/datasets/detect/open-images-v7.md index 41e40d76..92958773 100644 --- a/docs/en/datasets/detect/open-images-v7.md +++ b/docs/en/datasets/detect/open-images-v7.md @@ -22,7 +22,7 @@ keywords: Open Images V7, Google dataset, computer vision, YOLOv8 models, object ## Open Images V7 Pretrained Models | Model | size
(pixels) | mAPval
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | -|-------------------------------------------------------------------------------------------|-----------------------|----------------------|--------------------------------|-------------------------------------|--------------------|-------------------| +| ----------------------------------------------------------------------------------------- | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | | [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n-oiv7.pt) | 640 | 18.4 | 142.4 | 1.21 | 3.5 | 10.5 | | [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s-oiv7.pt) | 640 | 27.7 | 183.1 | 1.40 | 11.4 | 29.7 | | [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m-oiv7.pt) | 640 | 33.6 | 408.5 | 2.26 | 26.2 | 80.6 | @@ -141,10 +141,9 @@ Open Images V7 is an extensive and versatile dataset created by Google, designed To train a YOLOv8 model on the Open Images V7 dataset, you can use both Python and CLI commands. Here's an example of training the YOLOv8n model for 100 epochs with an image size of 640: !!! Example "Train Example" - === "Python" - + ```python from ultralytics import YOLO @@ -154,10 +153,10 @@ To train a YOLOv8 model on the Open Images V7 dataset, you can use both Python a # Train the model on the Open Images V7 dataset results = model.train(data="open-images-v7.yaml", epochs=100, imgsz=640) ``` - + === "CLI" - + ```bash # Train a COCO-pretrained YOLOv8n model on the Open Images V7 dataset yolo detect train data=open-images-v7.yaml model=yolov8n.pt epochs=100 imgsz=640 @@ -168,6 +167,7 @@ For more details on arguments and settings, refer to the [Training](../../modes/ ### What are some key features of the Open Images V7 dataset? The Open Images V7 dataset includes approximately 9 million images with various annotations: + - **Bounding Boxes**: 16 million bounding boxes across 600 object classes. - **Segmentation Masks**: Masks for 2.8 million objects across 350 classes. - **Visual Relationships**: 3.3 million annotations indicating relationships, properties, and actions. @@ -179,17 +179,18 @@ The Open Images V7 dataset includes approximately 9 million images with various Ultralytics provides several YOLOv8 pretrained models for the Open Images V7 dataset, each with different sizes and performance metrics: -| Model | size
(pixels) | mAPval
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | -|-------|-----------------------|----------------------|--------------------------------|-------------------------------------|--------------------|-------------------| -| [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n-oiv7.pt) | 640 | 18.4 | 142.4 | 1.21 | 3.5 | 10.5 | -| [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s-oiv7.pt) | 640 | 27.7 | 183.1 | 1.40 | 11.4 | 29.7 | -| [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m-oiv7.pt) | 640 | 33.6 | 408.5 | 2.26 | 26.2 | 80.6 | -| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8l-oiv7.pt) | 640 | 34.9 | 596.9 | 2.43 | 44.1 | 167.4 | -| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x-oiv7.pt) | 640 | 36.3 | 860.6 | 3.56 | 68.7 | 260.6 | +| Model | size
(pixels) | mAPval
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | +| ----------------------------------------------------------------------------------------- | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | +| [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n-oiv7.pt) | 640 | 18.4 | 142.4 | 1.21 | 3.5 | 10.5 | +| [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s-oiv7.pt) | 640 | 27.7 | 183.1 | 1.40 | 11.4 | 29.7 | +| [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m-oiv7.pt) | 640 | 33.6 | 408.5 | 2.26 | 26.2 | 80.6 | +| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8l-oiv7.pt) | 640 | 34.9 | 596.9 | 2.43 | 44.1 | 167.4 | +| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x-oiv7.pt) | 640 | 36.3 | 860.6 | 3.56 | 68.7 | 260.6 | ### What applications can the Open Images V7 dataset be used for? The Open Images V7 dataset supports a variety of computer vision tasks including: + - **Image Classification** - **Object Detection** - **Instance Segmentation** diff --git a/docs/en/datasets/detect/roboflow-100.md b/docs/en/datasets/detect/roboflow-100.md index 253b640f..870ecb84 100644 --- a/docs/en/datasets/detect/roboflow-100.md +++ b/docs/en/datasets/detect/roboflow-100.md @@ -142,7 +142,7 @@ To use the Roboflow 100 dataset for benchmarking, you can implement the RF100Ben !!! Example "Benchmarking example" === "Python" - + ```python import os import shutil diff --git a/docs/en/datasets/detect/sku-110k.md b/docs/en/datasets/detect/sku-110k.md index a16c4153..b307e597 100644 --- a/docs/en/datasets/detect/sku-110k.md +++ b/docs/en/datasets/detect/sku-110k.md @@ -116,7 +116,7 @@ Training a YOLOv8 model on the SKU-110k dataset is straightforward. Here's an ex !!! Example "Train Example" === "Python" - + ```python from ultralytics import YOLO @@ -126,10 +126,10 @@ Training a YOLOv8 model on the SKU-110k dataset is straightforward. Here's an ex # Train the model results = model.train(data="SKU-110K.yaml", epochs=100, imgsz=640) ``` - + === "CLI" - + ```bash # Start training from a pretrained *.pt model yolo detect train data=SKU-110K.yaml model=yolov8n.pt epochs=100 imgsz=640 diff --git a/docs/en/datasets/detect/visdrone.md b/docs/en/datasets/detect/visdrone.md index 88ddcb7c..4473bd2d 100644 --- a/docs/en/datasets/detect/visdrone.md +++ b/docs/en/datasets/detect/visdrone.md @@ -107,6 +107,7 @@ We would like to acknowledge the AISKYEYE team at the Lab of Machine Learning an ### What is the VisDrone Dataset and what are its key features? The [VisDrone Dataset](https://github.com/VisDrone/VisDrone-Dataset) is a large-scale benchmark created by the AISKYEYE team at Tianjin University, China. It is designed for various computer vision tasks related to drone-based image and video analysis. Key features include: + - **Composition**: 288 video clips with 261,908 frames and 10,209 static images. - **Annotations**: Over 2.6 million bounding boxes for objects like pedestrians, cars, bicycles, and tricycles. - **Diversity**: Collected across 14 cities, in urban and rural settings, under different weather and lighting conditions. @@ -119,7 +120,7 @@ To train a YOLOv8 model on the VisDrone dataset for 100 epochs with an image siz !!! Example "Train Example" === "Python" - + ```python from ultralytics import YOLO @@ -131,7 +132,7 @@ To train a YOLOv8 model on the VisDrone dataset for 100 epochs with an image siz ``` === "CLI" - + ```bash # Start training from a pretrained *.pt model yolo detect train data=VisDrone.yaml model=yolov8n.pt epochs=100 imgsz=640 @@ -142,6 +143,7 @@ For additional configuration options, please refer to the model [Training](../.. ### What are the main subsets of the VisDrone dataset and their applications? The VisDrone dataset is divided into five main subsets, each tailored for a specific computer vision task: + 1. **Task 1**: Object detection in images. 2. **Task 2**: Object detection in videos. 3. **Task 3**: Single-object tracking. diff --git a/docs/en/datasets/detect/xview.md b/docs/en/datasets/detect/xview.md index 051ade1d..53afa195 100644 --- a/docs/en/datasets/detect/xview.md +++ b/docs/en/datasets/detect/xview.md @@ -109,7 +109,7 @@ To train a model on the xView dataset using Ultralytics YOLO, follow these steps !!! Example "Train Example" === "Python" - + ```python from ultralytics import YOLO @@ -119,10 +119,10 @@ To train a model on the xView dataset using Ultralytics YOLO, follow these steps # Train the model results = model.train(data="xView.yaml", epochs=100, imgsz=640) ``` - + === "CLI" - + ```bash # Start training from a pretrained *.pt model yolo detect train data=xView.yaml model=yolov8n.pt epochs=100 imgsz=640 @@ -133,6 +133,7 @@ For detailed arguments and settings, refer to the model [Training](../../modes/t ### What are the key features of the xView dataset? The xView dataset stands out due to its comprehensive set of features: + - Over 1 million object instances across 60 distinct classes. - High-resolution imagery at 0.3 meters. - Diverse object types including small, rare, and fine-grained objects, all annotated with bounding boxes. @@ -160,5 +161,5 @@ If you utilize the xView dataset in your research, please cite the following pap primaryClass={cs.CV} } ``` - + For more information about the xView dataset, visit the official [xView dataset website](http://xviewdataset.org/). diff --git a/docs/en/datasets/explorer/api.md b/docs/en/datasets/explorer/api.md index 7dbc003a..87a09d19 100644 --- a/docs/en/datasets/explorer/api.md +++ b/docs/en/datasets/explorer/api.md @@ -342,14 +342,17 @@ The Ultralytics Explorer API is designed for comprehensive dataset exploration. ### How do I install the Ultralytics Explorer API? To install the Ultralytics Explorer API along with its dependencies, use the following command: + ```bash pip install ultralytics[explorer] ``` + This will automatically install all necessary external libraries for the Explorer API functionality. For additional setup details, refer to the [installation section](#installation) of our documentation. ### How can I use the Ultralytics Explorer API for similarity search? You can use the Ultralytics Explorer API to perform similarity searches by creating an embeddings table and querying it for similar images. Here's a basic example: + ```python from ultralytics import Explorer @@ -361,6 +364,7 @@ explorer.create_embeddings_table() similar_images_df = explorer.get_similar(img="path/to/image.jpg") print(similar_images_df.head()) ``` + For more details, please visit the [Similarity Search section](#1-similarity-search). ### What are the benefits of using LanceDB with Ultralytics Explorer? diff --git a/docs/en/datasets/explorer/dashboard.md b/docs/en/datasets/explorer/dashboard.md index ebf4a1b9..ed5760be 100644 --- a/docs/en/datasets/explorer/dashboard.md +++ b/docs/en/datasets/explorer/dashboard.md @@ -40,11 +40,13 @@ Semantic search is a technique for finding similar images to a given image. It i For example: In this VOC Exploration dashboard, user selects a couple airplane images like this: +

Explorer Dashboard Screenshot 2

On performing similarity search, you should see a similar result: +

Explorer Dashboard Screenshot 3

@@ -52,6 +54,7 @@ On performing similarity search, you should see a similar result: ## Ask AI This allows you to write how you want to filter your dataset using natural language. You don't have to be proficient in writing SQL queries. Our AI powered query generator will automatically do that under the hood. For example - you can say - "show me 100 images with exactly one person and 2 dogs. There can be other objects too" and it'll internally generate the query and show you those results. Here's an example output when asked to "Show 10 images with exactly 5 persons" and you'll see a result like this: +

Explorer Dashboard Screenshot 4

@@ -76,7 +79,7 @@ This is a Demo build using the Explorer API. You can use the API to build your o ### What is Ultralytics Explorer GUI and how do I install it? -Ultralytics Explorer GUI is a powerful interface that unlocks advanced data exploration capabilities using the [Ultralytics Explorer API](api.md). It allows you to run semantic/vector similarity search, SQL queries, and natural language queries using the Ask AI feature powered by Large Language Models (LLMs). +Ultralytics Explorer GUI is a powerful interface that unlocks advanced data exploration capabilities using the [Ultralytics Explorer API](api.md). It allows you to run semantic/vector similarity search, SQL queries, and natural language queries using the Ask AI feature powered by Large Language Models (LLMs). To install the Explorer GUI, you can use pip: @@ -106,13 +109,14 @@ Ultralytics Explorer GUI allows you to run SQL queries directly on your dataset WHERE labels LIKE '%person%' AND labels LIKE '%dog%' ``` -You can also provide only the WHERE clause, making the querying process more flexible. +You can also provide only the WHERE clause, making the querying process more flexible. For more details, refer to the [SQL Queries Section](#run-sql-queries-on-your-cv-datasets). ### What are the benefits of using Ultralytics Explorer GUI for data exploration? Ultralytics Explorer GUI enhances data exploration with features like semantic search, SQL querying, and natural language interactions through the Ask AI feature. These capabilities allow users to: + - Efficiently find visually similar images. - Filter datasets using complex SQL queries. - Utilize AI to perform natural language searches, eliminating the need for advanced SQL expertise. diff --git a/docs/en/datasets/explorer/explorer.ipynb b/docs/en/datasets/explorer/explorer.ipynb index 9c988f43..fbca0173 100644 --- a/docs/en/datasets/explorer/explorer.ipynb +++ b/docs/en/datasets/explorer/explorer.ipynb @@ -1,601 +1,604 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "aa923c26-81c8-4565-9277-1cb686e3702e", - "metadata": { - "id": "aa923c26-81c8-4565-9277-1cb686e3702e" - }, - "source": [ - "# VOC Exploration Example\n", - "
\n", - "\n", - " \n", - " \n", - "\n", - " [中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [Türkçe](https://docs.ultralytics.com/tr/) | [Tiếng Việt](https://docs.ultralytics.com/vi/) | [العربية](https://docs.ultralytics.com/ar/)\n", - "\n", - " \"Run\n", - " \"Open\n", - " \"Open\n", - "\n", - "Welcome to the Ultralytics Explorer API notebook! This notebook serves as the starting point for exploring the various resources available to help you get started with using Ultralytics to explore your datasets using with the power of semantic search. You can utilities out of the box that allow you to examine specific types of labels using vector search or even SQL queries.\n", - "\n", - "We hope that the resources in this notebook will help you get the most out of Ultralytics. Please browse the Explorer Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions!\n", - "\n", - "Try `yolo explorer` powered by Exlorer API\n", - "\n", - "Simply `pip install ultralytics` and run `yolo explorer` in your terminal to run custom queries and semantic search on your datasets right inside your browser!\n", - "\n", - "
" - ] - }, - { - "cell_type": "markdown", - "id": "2454d9ba-9db4-4b37-98e8-201ba285c92f", - "metadata": { - "id": "2454d9ba-9db4-4b37-98e8-201ba285c92f" - }, - "source": [ - "## Setup\n", - "Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) and check software and hardware." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "433f3a4d-a914-42cb-b0b6-be84a84e5e41", - "metadata": { - "id": "433f3a4d-a914-42cb-b0b6-be84a84e5e41" - }, - "outputs": [], - "source": [ - "%pip install ultralytics[explorer] openai\n", - "import ultralytics\n", - "ultralytics.checks()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ae602549-3419-4909-9f82-35cba515483f", - "metadata": { - "id": "ae602549-3419-4909-9f82-35cba515483f" - }, - "outputs": [], - "source": [ - "from ultralytics import Explorer" - ] - }, - { - "cell_type": "markdown", - "id": "d8c06350-be8e-45cf-b3a6-b5017bbd943c", - "metadata": { - "id": "d8c06350-be8e-45cf-b3a6-b5017bbd943c" - }, - "source": [ - "## Similarity search\n", - "Utilize the power of vector similarity search to find the similar data points in your dataset along with their distance in the embedding space. Simply create an embeddings table for the given dataset-model pair. It is only needed once and it is reused automatically.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "334619da-6deb-4b32-9fe0-74e0a79cee20", - "metadata": { - "id": "334619da-6deb-4b32-9fe0-74e0a79cee20" - }, - "outputs": [], - "source": [ - "exp = Explorer(\"VOC.yaml\", model=\"yolov8n.pt\")\n", - "exp.create_embeddings_table()" - ] - }, - { - "cell_type": "markdown", - "id": "b6c5e42d-bc7e-4b4c-bde0-643072a2165d", - "metadata": { - "id": "b6c5e42d-bc7e-4b4c-bde0-643072a2165d" - }, - "source": [ - "One the embeddings table is built, you can get run semantic search in any of the following ways:\n", - "- On a given index / list of indices in the dataset like - `exp.get_similar(idx=[1,10], limit=10)`\n", - "- On any image/ list of images not in the dataset - `exp.get_similar(img=[\"path/to/img1\", \"path/to/img2\"], limit=10)`\n", - "In case of multiple inputs, the aggregade of their embeddings is used.\n", - "\n", - "You get a pandas dataframe with the `limit` number of most similar data points to the input, along with their distance in the embedding space. You can use this dataset to perform further filtering\n", - "\"Screenshot\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b485f05b-d92d-42bc-8da7-5e361667b341", - "metadata": { - "id": "b485f05b-d92d-42bc-8da7-5e361667b341" - }, - "outputs": [], - "source": [ - "similar = exp.get_similar(idx=1, limit=10)\n", - "similar.head()" - ] - }, - { - "cell_type": "markdown", - "id": "acf4b489-2161-4176-a1fe-d1d067d8083d", - "metadata": { - "id": "acf4b489-2161-4176-a1fe-d1d067d8083d" - }, - "source": [ - "You can use the also plot the similar samples directly using the `plot_similar` util\n", - "

\n", - "\n", - " \n", - "

\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9dbfe7d0-8613-4529-adb6-6e0632d7cce7", - "metadata": { - "id": "9dbfe7d0-8613-4529-adb6-6e0632d7cce7" - }, - "outputs": [], - "source": [ - "exp.plot_similar(idx=6500, limit=20)\n", - "#exp.plot_similar(idx=[100,101], limit=10) # Can also pass list of idxs or imgs\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "260e09bf-4960-4089-a676-cb0e76ff3c0d", - "metadata": { - "id": "260e09bf-4960-4089-a676-cb0e76ff3c0d" - }, - "outputs": [], - "source": [ - "exp.plot_similar(img=\"https://ultralytics.com/images/bus.jpg\", limit=10, labels=False) # Can also pass any external images\n" - ] - }, - { - "cell_type": "markdown", - "id": "faa0b7a7-6318-40e4-b0f4-45a8113bdc3a", - "metadata": { - "id": "faa0b7a7-6318-40e4-b0f4-45a8113bdc3a" - }, - "source": [ - "

\n", - "\n", - "\n", - "

" - ] - }, - { - "cell_type": "markdown", - "id": "0cea63f1-71f1-46da-af2b-b1b7d8f73553", - "metadata": { - "id": "0cea63f1-71f1-46da-af2b-b1b7d8f73553" - }, - "source": [ - "## 2. Ask AI: Search or filter with Natural Language\n", - "You can prompt the Explorer object with the kind of data points you want to see and it'll try to return a dataframe with those. Because it is powered by LLMs, it doesn't always get it right. In that case, it'll return None.\n", - "

\n", - "\"Screenshot\n", - "\n", - "

\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "92fb92ac-7f76-465a-a9ba-ea7492498d9c", - "metadata": { - "id": "92fb92ac-7f76-465a-a9ba-ea7492498d9c" - }, - "outputs": [], - "source": [ - "df = exp.ask_ai(\"show me images containing more than 10 objects with at least 2 persons\")\n", - "df.head(5)" - ] - }, - { - "cell_type": "markdown", - "id": "f2a7d26e-0ce5-4578-ad1a-b1253805280f", - "metadata": { - "id": "f2a7d26e-0ce5-4578-ad1a-b1253805280f" - }, - "source": [ - "for plotting these results you can use `plot_query_result` util\n", - "Example:\n", - "```\n", - "plt = plot_query_result(exp.ask_ai(\"show me 10 images containing exactly 2 persons\"))\n", - "Image.fromarray(plt)\n", - "```\n", - "

\n", - " \n", - "\n", - "

" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b1cfab84-9835-4da0-8e9a-42b30cf84511", - "metadata": { - "id": "b1cfab84-9835-4da0-8e9a-42b30cf84511" - }, - "outputs": [], - "source": [ - "# plot\n", - "from ultralytics.data.explorer import plot_query_result\n", - "from PIL import Image\n", - "\n", - "plt = plot_query_result(exp.ask_ai(\"show me 10 images containing exactly 2 persons\"))\n", - "Image.fromarray(plt)" - ] - }, - { - "cell_type": "markdown", - "id": "35315ae6-d827-40e4-8813-279f97a83b34", - "metadata": { - "id": "35315ae6-d827-40e4-8813-279f97a83b34" - }, - "source": [ - "## 3. Run SQL queries on your Dataset!\n", - "Sometimes you might want to investigate a certain type of entries in your dataset. For this Explorer allows you to execute SQL queries.\n", - "It accepts either of the formats:\n", - "- Queries beginning with \"WHERE\" will automatically select all columns. This can be thought of as a short-hand query\n", - "- You can also write full queries where you can specify which columns to select\n", - "\n", - "This can be used to investigate model performance and specific data points. For example:\n", - "- let's say your model struggles on images that have humans and dogs. You can write a query like this to select the points that have at least 2 humans AND at least one dog.\n", - "\n", - "You can combine SQL query and semantic search to filter down to specific type of results\n", - "\"Screenshot\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8cd1072f-3100-4331-a0e3-4e2f6b1005bf", - "metadata": { - "id": "8cd1072f-3100-4331-a0e3-4e2f6b1005bf" - }, - "outputs": [], - "source": [ - "table = exp.sql_query(\"WHERE labels LIKE '%person, person%' AND labels LIKE '%dog%' LIMIT 10\")\n", - "table" - ] - }, - { - "cell_type": "markdown", - "id": "debf8a00-c9f6-448b-bd3b-454cf62f39ab", - "metadata": { - "id": "debf8a00-c9f6-448b-bd3b-454cf62f39ab" - }, - "source": [ - "Just like similarity search, you also get a util to directly plot the sql queries using `exp.plot_sql_query`\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "18b977e7-d048-4b22-b8c4-084a03b04f23", - "metadata": { - "id": "18b977e7-d048-4b22-b8c4-084a03b04f23" - }, - "outputs": [], - "source": [ - "exp.plot_sql_query(\"WHERE labels LIKE '%person, person%' AND labels LIKE '%dog%' LIMIT 10\", labels=True)" - ] - }, - { - "cell_type": "markdown", - "id": "f26804c5-840b-4fd1-987f-e362f29e3e06", - "metadata": { - "id": "f26804c5-840b-4fd1-987f-e362f29e3e06" - }, - "source": [ - "## 3. Working with embeddings Table (Advanced)\n", - "Explorer works on [LanceDB](https://lancedb.github.io/lancedb/) tables internally. You can access this table directly, using `Explorer.table` object and run raw queries, push down pre and post filters, etc." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ea69260a-3407-40c9-9f42-8b34a6e6af7a", - "metadata": { - "id": "ea69260a-3407-40c9-9f42-8b34a6e6af7a" - }, - "outputs": [], - "source": [ - "table = exp.table\n", - "table.schema" - ] - }, - { - "cell_type": "markdown", - "id": "238db292-8610-40b3-9af7-dfd6be174892", - "metadata": { - "id": "238db292-8610-40b3-9af7-dfd6be174892" - }, - "source": [ - "### Run raw queries\n", - "Vector Search finds the nearest vectors from the database. In a recommendation system or search engine, you can find similar products from the one you searched. In LLM and other AI applications, each data point can be presented by the embeddings generated from some models, it returns the most relevant features.\n", - "\n", - "A search in high-dimensional vector space, is to find K-Nearest-Neighbors (KNN) of the query vector.\n", - "\n", - "Metric\n", - "In LanceDB, a Metric is the way to describe the distance between a pair of vectors. Currently, it supports the following metrics:\n", - "- L2\n", - "- Cosine\n", - "- Dot\n", - "Explorer's similarity search uses L2 by default. You can run queries on tables directly, or use the lance format to build custom utilities to manage datasets. More details on available LanceDB table ops in the [docs](https://lancedb.github.io/lancedb/)\n", - "\n", - "\"Screenshot\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d74430fe-5aee-45a1-8863-3f2c31338792", - "metadata": { - "id": "d74430fe-5aee-45a1-8863-3f2c31338792" - }, - "outputs": [], - "source": [ - "dummy_img_embedding = [i for i in range(256)]\n", - "table.search(dummy_img_embedding).limit(5).to_pandas()" - ] - }, - { - "cell_type": "markdown", - "id": "587486b4-0d19-4214-b994-f032fb2e8eb5", - "metadata": { - "id": "587486b4-0d19-4214-b994-f032fb2e8eb5" - }, - "source": [ - "### Inter-conversion to popular data formats" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bb2876ea-999b-4eba-96bc-c196ba02c41c", - "metadata": { - "id": "bb2876ea-999b-4eba-96bc-c196ba02c41c" - }, - "outputs": [], - "source": [ - "df = table.to_pandas()\n", - "pa_table = table.to_arrow()\n" - ] - }, - { - "cell_type": "markdown", - "id": "42659d63-ad76-49d6-8dfc-78d77278db72", - "metadata": { - "id": "42659d63-ad76-49d6-8dfc-78d77278db72" - }, - "source": [ - "### Work with Embeddings\n", - "You can access the raw embedding from lancedb Table and analyse it. The image embeddings are stored in column `vector`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "66d69e9b-046e-41c8-80d7-c0ee40be3bca", - "metadata": { - "id": "66d69e9b-046e-41c8-80d7-c0ee40be3bca" - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "embeddings = table.to_pandas()[\"vector\"].tolist()\n", - "embeddings = np.array(embeddings)" - ] - }, - { - "cell_type": "markdown", - "id": "e8df0a49-9596-4399-954b-b8ae1fd7a602", - "metadata": { - "id": "e8df0a49-9596-4399-954b-b8ae1fd7a602" - }, - "source": [ - "### Scatterplot\n", - "One of the preliminary steps in analysing embeddings is by plotting them in 2D space via dimensionality reduction. Let's try an example\n", - "\n", - "\"Screenshot\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d9a150e8-8092-41b3-82f8-2247f8187fc8", - "metadata": { - "id": "d9a150e8-8092-41b3-82f8-2247f8187fc8" - }, - "outputs": [], - "source": [ - "!pip install scikit-learn --q" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "196079c3-45a9-4325-81ab-af79a881e37a", - "metadata": { - "id": "196079c3-45a9-4325-81ab-af79a881e37a" - }, - "outputs": [], - "source": [ - "%matplotlib inline\n", - "import numpy as np\n", - "from sklearn.decomposition import PCA\n", - "import matplotlib.pyplot as plt\n", - "from mpl_toolkits.mplot3d import Axes3D\n", - "\n", - "# Reduce dimensions using PCA to 3 components for visualization in 3D\n", - "pca = PCA(n_components=3)\n", - "reduced_data = pca.fit_transform(embeddings)\n", - "\n", - "# Create a 3D scatter plot using Matplotlib's Axes3D\n", - "fig = plt.figure(figsize=(8, 6))\n", - "ax = fig.add_subplot(111, projection='3d')\n", - "\n", - "# Scatter plot\n", - "ax.scatter(reduced_data[:, 0], reduced_data[:, 1], reduced_data[:, 2], alpha=0.5)\n", - "ax.set_title('3D Scatter Plot of Reduced 256-Dimensional Data (PCA)')\n", - "ax.set_xlabel('Component 1')\n", - "ax.set_ylabel('Component 2')\n", - "ax.set_zlabel('Component 3')\n", - "\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "1c843c23-e3f2-490e-8d6c-212fa038a149", - "metadata": { - "id": "1c843c23-e3f2-490e-8d6c-212fa038a149" - }, - "source": [ - "## 4. Similarity Index\n", - "Here's a simple example of an operation powered by the embeddings table. Explorer comes with a `similarity_index` operation-\n", - "* It tries to estimate how similar each data point is with the rest of the dataset.\n", - "* It does that by counting how many image embeddings lie closer than `max_dist` to the current image in the generated embedding space, considering `top_k` similar images at a time.\n", - "\n", - "For a given dataset, model, `max_dist` & `top_k` the similarity index once generated will be reused. In case, your dataset has changed, or you simply need to regenerate the similarity index, you can pass `force=True`.\n", - "Similar to vector and SQL search, this also comes with a util to directly plot it. Let's look at the plot first\n", - "\"Screenshot\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "953c2a5f-1b61-4acf-a8e4-ed08547dbafc", - "metadata": { - "id": "953c2a5f-1b61-4acf-a8e4-ed08547dbafc" - }, - "outputs": [], - "source": [ - "exp.plot_similarity_index(max_dist=0.2, top_k=0.01)" - ] - }, - { - "cell_type": "markdown", - "id": "28228a9a-b727-45b5-8ca7-8db662c0b937", - "metadata": { - "id": "28228a9a-b727-45b5-8ca7-8db662c0b937" - }, - "source": [ - "Now let's look at the output of the operation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f4161aaa-20e6-4df0-8e87-d2293ee0530a", - "metadata": { - "id": "f4161aaa-20e6-4df0-8e87-d2293ee0530a" - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "sim_idx = exp.similarity_index(max_dist=0.2, top_k=0.01, force=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b01d5b1a-9adb-4c3c-a873-217c71527c8d", - "metadata": { - "id": "b01d5b1a-9adb-4c3c-a873-217c71527c8d" - }, - "outputs": [], - "source": [ - "sim_idx" - ] - }, - { - "cell_type": "markdown", - "id": "22b28e54-4fbb-400e-ad8c-7068cbba11c4", - "metadata": { - "id": "22b28e54-4fbb-400e-ad8c-7068cbba11c4" - }, - "source": [ - "Let's create a query to see what data points have similarity count of more than 30 and plot images similar to them." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "58d2557b-d401-43cf-937d-4f554c7bc808", - "metadata": { - "id": "58d2557b-d401-43cf-937d-4f554c7bc808" - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "sim_count = np.array(sim_idx[\"count\"])\n", - "sim_idx['im_file'][sim_count > 30]" - ] - }, - { - "cell_type": "markdown", - "id": "a5ec8d76-271a-41ab-ac74-cf8c0084ba5e", - "metadata": { - "id": "a5ec8d76-271a-41ab-ac74-cf8c0084ba5e" - }, - "source": [ - "You should see something like this\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3a7b2ee3-9f35-48a2-9c38-38379516f4d2", - "metadata": { - "id": "3a7b2ee3-9f35-48a2-9c38-38379516f4d2" - }, - "outputs": [], - "source": [ - "exp.plot_similar(idx=[7146, 14035]) # Using avg embeddings of 2 images" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.6" - }, - "colab": { - "provenance": [] - } + "cells": [ + { + "cell_type": "markdown", + "id": "aa923c26-81c8-4565-9277-1cb686e3702e", + "metadata": { + "id": "aa923c26-81c8-4565-9277-1cb686e3702e" + }, + "source": [ + "# VOC Exploration Example\n", + "
\n", + "\n", + " \n", + " \n", + "\n", + " [中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [Türkçe](https://docs.ultralytics.com/tr/) | [Tiếng Việt](https://docs.ultralytics.com/vi/) | [العربية](https://docs.ultralytics.com/ar/)\n", + "\n", + " \"Run\n", + " \"Open\n", + " \"Open\n", + "\n", + "Welcome to the Ultralytics Explorer API notebook! This notebook serves as the starting point for exploring the various resources available to help you get started with using Ultralytics to explore your datasets using with the power of semantic search. You can utilities out of the box that allow you to examine specific types of labels using vector search or even SQL queries.\n", + "\n", + "We hope that the resources in this notebook will help you get the most out of Ultralytics. Please browse the Explorer Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions!\n", + "\n", + "Try `yolo explorer` powered by Exlorer API\n", + "\n", + "Simply `pip install ultralytics` and run `yolo explorer` in your terminal to run custom queries and semantic search on your datasets right inside your browser!\n", + "\n", + "
" + ] }, - "nbformat": 4, - "nbformat_minor": 5 + { + "cell_type": "markdown", + "id": "2454d9ba-9db4-4b37-98e8-201ba285c92f", + "metadata": { + "id": "2454d9ba-9db4-4b37-98e8-201ba285c92f" + }, + "source": [ + "## Setup\n", + "Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) and check software and hardware." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "433f3a4d-a914-42cb-b0b6-be84a84e5e41", + "metadata": { + "id": "433f3a4d-a914-42cb-b0b6-be84a84e5e41" + }, + "outputs": [], + "source": [ + "%pip install ultralytics[explorer] openai\n", + "import ultralytics\n", + "\n", + "ultralytics.checks()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ae602549-3419-4909-9f82-35cba515483f", + "metadata": { + "id": "ae602549-3419-4909-9f82-35cba515483f" + }, + "outputs": [], + "source": [ + "from ultralytics import Explorer" + ] + }, + { + "cell_type": "markdown", + "id": "d8c06350-be8e-45cf-b3a6-b5017bbd943c", + "metadata": { + "id": "d8c06350-be8e-45cf-b3a6-b5017bbd943c" + }, + "source": [ + "## Similarity search\n", + "Utilize the power of vector similarity search to find the similar data points in your dataset along with their distance in the embedding space. Simply create an embeddings table for the given dataset-model pair. It is only needed once and it is reused automatically.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "334619da-6deb-4b32-9fe0-74e0a79cee20", + "metadata": { + "id": "334619da-6deb-4b32-9fe0-74e0a79cee20" + }, + "outputs": [], + "source": [ + "exp = Explorer(\"VOC.yaml\", model=\"yolov8n.pt\")\n", + "exp.create_embeddings_table()" + ] + }, + { + "cell_type": "markdown", + "id": "b6c5e42d-bc7e-4b4c-bde0-643072a2165d", + "metadata": { + "id": "b6c5e42d-bc7e-4b4c-bde0-643072a2165d" + }, + "source": [ + "One the embeddings table is built, you can get run semantic search in any of the following ways:\n", + "- On a given index / list of indices in the dataset like - `exp.get_similar(idx=[1,10], limit=10)`\n", + "- On any image/ list of images not in the dataset - `exp.get_similar(img=[\"path/to/img1\", \"path/to/img2\"], limit=10)`\n", + "In case of multiple inputs, the aggregade of their embeddings is used.\n", + "\n", + "You get a pandas dataframe with the `limit` number of most similar data points to the input, along with their distance in the embedding space. You can use this dataset to perform further filtering\n", + "\"Screenshot\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b485f05b-d92d-42bc-8da7-5e361667b341", + "metadata": { + "id": "b485f05b-d92d-42bc-8da7-5e361667b341" + }, + "outputs": [], + "source": [ + "similar = exp.get_similar(idx=1, limit=10)\n", + "similar.head()" + ] + }, + { + "cell_type": "markdown", + "id": "acf4b489-2161-4176-a1fe-d1d067d8083d", + "metadata": { + "id": "acf4b489-2161-4176-a1fe-d1d067d8083d" + }, + "source": [ + "You can use the also plot the similar samples directly using the `plot_similar` util\n", + "

\n", + "\n", + " \n", + "

\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9dbfe7d0-8613-4529-adb6-6e0632d7cce7", + "metadata": { + "id": "9dbfe7d0-8613-4529-adb6-6e0632d7cce7" + }, + "outputs": [], + "source": [ + "exp.plot_similar(idx=6500, limit=20)\n", + "# exp.plot_similar(idx=[100,101], limit=10) # Can also pass list of idxs or imgs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "260e09bf-4960-4089-a676-cb0e76ff3c0d", + "metadata": { + "id": "260e09bf-4960-4089-a676-cb0e76ff3c0d" + }, + "outputs": [], + "source": [ + "exp.plot_similar(\n", + " img=\"https://ultralytics.com/images/bus.jpg\", limit=10, labels=False\n", + ") # Can also pass any external images" + ] + }, + { + "cell_type": "markdown", + "id": "faa0b7a7-6318-40e4-b0f4-45a8113bdc3a", + "metadata": { + "id": "faa0b7a7-6318-40e4-b0f4-45a8113bdc3a" + }, + "source": [ + "

\n", + "\n", + "\n", + "

" + ] + }, + { + "cell_type": "markdown", + "id": "0cea63f1-71f1-46da-af2b-b1b7d8f73553", + "metadata": { + "id": "0cea63f1-71f1-46da-af2b-b1b7d8f73553" + }, + "source": [ + "## 2. Ask AI: Search or filter with Natural Language\n", + "You can prompt the Explorer object with the kind of data points you want to see and it'll try to return a dataframe with those. Because it is powered by LLMs, it doesn't always get it right. In that case, it'll return None.\n", + "

\n", + "\"Screenshot\n", + "\n", + "

\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "92fb92ac-7f76-465a-a9ba-ea7492498d9c", + "metadata": { + "id": "92fb92ac-7f76-465a-a9ba-ea7492498d9c" + }, + "outputs": [], + "source": [ + "df = exp.ask_ai(\"show me images containing more than 10 objects with at least 2 persons\")\n", + "df.head(5)" + ] + }, + { + "cell_type": "markdown", + "id": "f2a7d26e-0ce5-4578-ad1a-b1253805280f", + "metadata": { + "id": "f2a7d26e-0ce5-4578-ad1a-b1253805280f" + }, + "source": [ + "for plotting these results you can use `plot_query_result` util\n", + "Example:\n", + "```\n", + "plt = plot_query_result(exp.ask_ai(\"show me 10 images containing exactly 2 persons\"))\n", + "Image.fromarray(plt)\n", + "```\n", + "

\n", + " \n", + "\n", + "

" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1cfab84-9835-4da0-8e9a-42b30cf84511", + "metadata": { + "id": "b1cfab84-9835-4da0-8e9a-42b30cf84511" + }, + "outputs": [], + "source": [ + "# plot\n", + "from PIL import Image\n", + "\n", + "from ultralytics.data.explorer import plot_query_result\n", + "\n", + "plt = plot_query_result(exp.ask_ai(\"show me 10 images containing exactly 2 persons\"))\n", + "Image.fromarray(plt)" + ] + }, + { + "cell_type": "markdown", + "id": "35315ae6-d827-40e4-8813-279f97a83b34", + "metadata": { + "id": "35315ae6-d827-40e4-8813-279f97a83b34" + }, + "source": [ + "## 3. Run SQL queries on your Dataset!\n", + "Sometimes you might want to investigate a certain type of entries in your dataset. For this Explorer allows you to execute SQL queries.\n", + "It accepts either of the formats:\n", + "- Queries beginning with \"WHERE\" will automatically select all columns. This can be thought of as a short-hand query\n", + "- You can also write full queries where you can specify which columns to select\n", + "\n", + "This can be used to investigate model performance and specific data points. For example:\n", + "- let's say your model struggles on images that have humans and dogs. You can write a query like this to select the points that have at least 2 humans AND at least one dog.\n", + "\n", + "You can combine SQL query and semantic search to filter down to specific type of results\n", + "\"Screenshot\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8cd1072f-3100-4331-a0e3-4e2f6b1005bf", + "metadata": { + "id": "8cd1072f-3100-4331-a0e3-4e2f6b1005bf" + }, + "outputs": [], + "source": [ + "table = exp.sql_query(\"WHERE labels LIKE '%person, person%' AND labels LIKE '%dog%' LIMIT 10\")\n", + "table" + ] + }, + { + "cell_type": "markdown", + "id": "debf8a00-c9f6-448b-bd3b-454cf62f39ab", + "metadata": { + "id": "debf8a00-c9f6-448b-bd3b-454cf62f39ab" + }, + "source": [ + "Just like similarity search, you also get a util to directly plot the sql queries using `exp.plot_sql_query`\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18b977e7-d048-4b22-b8c4-084a03b04f23", + "metadata": { + "id": "18b977e7-d048-4b22-b8c4-084a03b04f23" + }, + "outputs": [], + "source": [ + "exp.plot_sql_query(\"WHERE labels LIKE '%person, person%' AND labels LIKE '%dog%' LIMIT 10\", labels=True)" + ] + }, + { + "cell_type": "markdown", + "id": "f26804c5-840b-4fd1-987f-e362f29e3e06", + "metadata": { + "id": "f26804c5-840b-4fd1-987f-e362f29e3e06" + }, + "source": [ + "## 3. Working with embeddings Table (Advanced)\n", + "Explorer works on [LanceDB](https://lancedb.github.io/lancedb/) tables internally. You can access this table directly, using `Explorer.table` object and run raw queries, push down pre and post filters, etc." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea69260a-3407-40c9-9f42-8b34a6e6af7a", + "metadata": { + "id": "ea69260a-3407-40c9-9f42-8b34a6e6af7a" + }, + "outputs": [], + "source": [ + "table = exp.table\n", + "table.schema" + ] + }, + { + "cell_type": "markdown", + "id": "238db292-8610-40b3-9af7-dfd6be174892", + "metadata": { + "id": "238db292-8610-40b3-9af7-dfd6be174892" + }, + "source": [ + "### Run raw queries\n", + "Vector Search finds the nearest vectors from the database. In a recommendation system or search engine, you can find similar products from the one you searched. In LLM and other AI applications, each data point can be presented by the embeddings generated from some models, it returns the most relevant features.\n", + "\n", + "A search in high-dimensional vector space, is to find K-Nearest-Neighbors (KNN) of the query vector.\n", + "\n", + "Metric\n", + "In LanceDB, a Metric is the way to describe the distance between a pair of vectors. Currently, it supports the following metrics:\n", + "- L2\n", + "- Cosine\n", + "- Dot\n", + "Explorer's similarity search uses L2 by default. You can run queries on tables directly, or use the lance format to build custom utilities to manage datasets. More details on available LanceDB table ops in the [docs](https://lancedb.github.io/lancedb/)\n", + "\n", + "\"Screenshot\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d74430fe-5aee-45a1-8863-3f2c31338792", + "metadata": { + "id": "d74430fe-5aee-45a1-8863-3f2c31338792" + }, + "outputs": [], + "source": [ + "dummy_img_embedding = [i for i in range(256)]\n", + "table.search(dummy_img_embedding).limit(5).to_pandas()" + ] + }, + { + "cell_type": "markdown", + "id": "587486b4-0d19-4214-b994-f032fb2e8eb5", + "metadata": { + "id": "587486b4-0d19-4214-b994-f032fb2e8eb5" + }, + "source": [ + "### Inter-conversion to popular data formats" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb2876ea-999b-4eba-96bc-c196ba02c41c", + "metadata": { + "id": "bb2876ea-999b-4eba-96bc-c196ba02c41c" + }, + "outputs": [], + "source": [ + "df = table.to_pandas()\n", + "pa_table = table.to_arrow()" + ] + }, + { + "cell_type": "markdown", + "id": "42659d63-ad76-49d6-8dfc-78d77278db72", + "metadata": { + "id": "42659d63-ad76-49d6-8dfc-78d77278db72" + }, + "source": [ + "### Work with Embeddings\n", + "You can access the raw embedding from lancedb Table and analyse it. The image embeddings are stored in column `vector`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66d69e9b-046e-41c8-80d7-c0ee40be3bca", + "metadata": { + "id": "66d69e9b-046e-41c8-80d7-c0ee40be3bca" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "embeddings = table.to_pandas()[\"vector\"].tolist()\n", + "embeddings = np.array(embeddings)" + ] + }, + { + "cell_type": "markdown", + "id": "e8df0a49-9596-4399-954b-b8ae1fd7a602", + "metadata": { + "id": "e8df0a49-9596-4399-954b-b8ae1fd7a602" + }, + "source": [ + "### Scatterplot\n", + "One of the preliminary steps in analysing embeddings is by plotting them in 2D space via dimensionality reduction. Let's try an example\n", + "\n", + "\"Screenshot\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d9a150e8-8092-41b3-82f8-2247f8187fc8", + "metadata": { + "id": "d9a150e8-8092-41b3-82f8-2247f8187fc8" + }, + "outputs": [], + "source": [ + "!pip install scikit-learn --q" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "196079c3-45a9-4325-81ab-af79a881e37a", + "metadata": { + "id": "196079c3-45a9-4325-81ab-af79a881e37a" + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from sklearn.decomposition import PCA\n", + "\n", + "# Reduce dimensions using PCA to 3 components for visualization in 3D\n", + "pca = PCA(n_components=3)\n", + "reduced_data = pca.fit_transform(embeddings)\n", + "\n", + "# Create a 3D scatter plot using Matplotlib's Axes3D\n", + "fig = plt.figure(figsize=(8, 6))\n", + "ax = fig.add_subplot(111, projection=\"3d\")\n", + "\n", + "# Scatter plot\n", + "ax.scatter(reduced_data[:, 0], reduced_data[:, 1], reduced_data[:, 2], alpha=0.5)\n", + "ax.set_title(\"3D Scatter Plot of Reduced 256-Dimensional Data (PCA)\")\n", + "ax.set_xlabel(\"Component 1\")\n", + "ax.set_ylabel(\"Component 2\")\n", + "ax.set_zlabel(\"Component 3\")\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "1c843c23-e3f2-490e-8d6c-212fa038a149", + "metadata": { + "id": "1c843c23-e3f2-490e-8d6c-212fa038a149" + }, + "source": [ + "## 4. Similarity Index\n", + "Here's a simple example of an operation powered by the embeddings table. Explorer comes with a `similarity_index` operation-\n", + "* It tries to estimate how similar each data point is with the rest of the dataset.\n", + "* It does that by counting how many image embeddings lie closer than `max_dist` to the current image in the generated embedding space, considering `top_k` similar images at a time.\n", + "\n", + "For a given dataset, model, `max_dist` & `top_k` the similarity index once generated will be reused. In case, your dataset has changed, or you simply need to regenerate the similarity index, you can pass `force=True`.\n", + "Similar to vector and SQL search, this also comes with a util to directly plot it. Let's look at the plot first\n", + "\"Screenshot\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "953c2a5f-1b61-4acf-a8e4-ed08547dbafc", + "metadata": { + "id": "953c2a5f-1b61-4acf-a8e4-ed08547dbafc" + }, + "outputs": [], + "source": [ + "exp.plot_similarity_index(max_dist=0.2, top_k=0.01)" + ] + }, + { + "cell_type": "markdown", + "id": "28228a9a-b727-45b5-8ca7-8db662c0b937", + "metadata": { + "id": "28228a9a-b727-45b5-8ca7-8db662c0b937" + }, + "source": [ + "Now let's look at the output of the operation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f4161aaa-20e6-4df0-8e87-d2293ee0530a", + "metadata": { + "id": "f4161aaa-20e6-4df0-8e87-d2293ee0530a" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "sim_idx = exp.similarity_index(max_dist=0.2, top_k=0.01, force=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b01d5b1a-9adb-4c3c-a873-217c71527c8d", + "metadata": { + "id": "b01d5b1a-9adb-4c3c-a873-217c71527c8d" + }, + "outputs": [], + "source": [ + "sim_idx" + ] + }, + { + "cell_type": "markdown", + "id": "22b28e54-4fbb-400e-ad8c-7068cbba11c4", + "metadata": { + "id": "22b28e54-4fbb-400e-ad8c-7068cbba11c4" + }, + "source": [ + "Let's create a query to see what data points have similarity count of more than 30 and plot images similar to them." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "58d2557b-d401-43cf-937d-4f554c7bc808", + "metadata": { + "id": "58d2557b-d401-43cf-937d-4f554c7bc808" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "sim_count = np.array(sim_idx[\"count\"])\n", + "sim_idx[\"im_file\"][sim_count > 30]" + ] + }, + { + "cell_type": "markdown", + "id": "a5ec8d76-271a-41ab-ac74-cf8c0084ba5e", + "metadata": { + "id": "a5ec8d76-271a-41ab-ac74-cf8c0084ba5e" + }, + "source": [ + "You should see something like this\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3a7b2ee3-9f35-48a2-9c38-38379516f4d2", + "metadata": { + "id": "3a7b2ee3-9f35-48a2-9c38-38379516f4d2" + }, + "outputs": [], + "source": [ + "exp.plot_similar(idx=[7146, 14035]) # Using avg embeddings of 2 images" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/docs/en/datasets/index.md b/docs/en/datasets/index.md index eab1b49d..97603b42 100644 --- a/docs/en/datasets/index.md +++ b/docs/en/datasets/index.md @@ -155,6 +155,7 @@ By following these steps, you can contribute a new dataset that integrates well ### What datasets does Ultralytics support for object detection? Ultralytics supports a wide variety of datasets for object detection, including: + - [COCO](detect/coco.md): A large-scale object detection, segmentation, and captioning dataset with 80 object categories. - [LVIS](detect/lvis.md): An extensive dataset with 1203 object categories, designed for more fine-grained object detection and segmentation. - [Argoverse](detect/argoverse.md): A dataset containing 3D tracking and motion forecasting data from urban environments with rich annotations. @@ -166,6 +167,7 @@ These datasets facilitate training robust models for various object detection ap ### How do I contribute a new dataset to Ultralytics? Contributing a new dataset involves several steps: + 1. **Collect Images**: Gather images from public databases or personal collections. 2. **Annotate Images**: Apply bounding boxes, segments, or keypoints, depending on the task. 3. **Export Annotations**: Convert annotations into the YOLO `*.txt` format. @@ -180,6 +182,7 @@ Visit [Contribute New Datasets](#contribute-new-datasets) for a comprehensive gu ### Why should I use Ultralytics Explorer for my dataset? Ultralytics Explorer offers powerful features for dataset analysis, including: + - **Embeddings Generation**: Create vector embeddings for images. - **Semantic Search**: Search for similar images using embeddings or AI. - **SQL Queries**: Run advanced SQL queries for detailed data analysis. @@ -190,6 +193,7 @@ Explore the [Ultralytics Explorer](explorer/index.md) for more information and t ### What are the unique features of Ultralytics YOLO models for computer vision? Ultralytics YOLO models provide several unique features: + - **Real-time Performance**: High-speed inference and training. - **Versatility**: Suitable for detection, segmentation, classification, and pose estimation tasks. - **Pretrained Models**: Access to high-performing, pretrained models for various applications. @@ -204,7 +208,7 @@ To optimize and zip a dataset using Ultralytics tools, follow this example code: !!! Example "Optimize and Zip a Dataset" === "Python" - + ```python from pathlib import Path diff --git a/docs/en/datasets/obb/dota-v2.md b/docs/en/datasets/obb/dota-v2.md index 240cf3eb..7de8209a 100644 --- a/docs/en/datasets/obb/dota-v2.md +++ b/docs/en/datasets/obb/dota-v2.md @@ -195,9 +195,7 @@ For more details on how to split and preprocess the DOTA images, refer to the [s ### What are the differences between DOTA-v1.0, DOTA-v1.5, and DOTA-v2.0? - **DOTA-v1.0**: Includes 15 common categories across 2,806 images with 188,282 instances. The dataset is split into training, validation, and testing sets. - - **DOTA-v1.5**: Builds upon DOTA-v1.0 by annotating very small instances (less than 10 pixels) and adding a new category, "container crane," totaling 403,318 instances. - - **DOTA-v2.0**: Expands further with annotations from Google Earth and GF-2 Satellite, featuring 11,268 images and 1,793,658 instances. It includes new categories like "airport" and "helipad." For a detailed comparison and additional specifics, check the [dataset versions section](#dataset-versions). diff --git a/docs/en/datasets/obb/dota8.md b/docs/en/datasets/obb/dota8.md index 0bfa723a..5a8bb295 100644 --- a/docs/en/datasets/obb/dota8.md +++ b/docs/en/datasets/obb/dota8.md @@ -93,7 +93,7 @@ To train a YOLOv8n-obb model on the DOTA8 dataset for 100 epochs with an image s !!! Example "Train Example" === "Python" - + ```python from ultralytics import YOLO @@ -105,7 +105,7 @@ To train a YOLOv8n-obb model on the DOTA8 dataset for 100 epochs with an image s ``` === "CLI" - + ```bash # Start training from a pretrained *.pt model yolo obb train data=dota8.yaml model=yolov8n-obb.pt epochs=100 imgsz=640 diff --git a/docs/en/datasets/obb/index.md b/docs/en/datasets/obb/index.md index 10631703..d22d1121 100644 --- a/docs/en/datasets/obb/index.md +++ b/docs/en/datasets/obb/index.md @@ -109,7 +109,7 @@ Training a YOLOv8 model with OBBs involves ensuring your dataset is in the YOLO !!! Example === "Python" - + ```python from ultralytics import YOLO @@ -119,15 +119,14 @@ Training a YOLOv8 model with OBBs involves ensuring your dataset is in the YOLO # Train the model on the custom dataset results = model.train(data="your_dataset.yaml", epochs=100, imgsz=640) ``` - === "CLI" - + ```bash # Train a new YOLOv8n-OBB model on the custom dataset yolo obb train data=your_dataset.yaml model=yolov8n-obb.yaml epochs=100 imgsz=640 ``` - + This ensures your model leverages the detailed OBB annotations for improved detection accuracy. ### What datasets are currently supported for OBB training in Ultralytics YOLO models? diff --git a/docs/en/datasets/pose/coco.md b/docs/en/datasets/pose/coco.md index 5adb09d3..02bdb3e1 100644 --- a/docs/en/datasets/pose/coco.md +++ b/docs/en/datasets/pose/coco.md @@ -13,7 +13,7 @@ The [COCO-Pose](https://cocodataset.org/#keypoints-2017) dataset is a specialize ## COCO-Pose Pretrained Models | Model | size
(pixels) | mAPpose
50-95 | mAPpose
50 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | -|------------------------------------------------------------------------------------------------------|-----------------------|-----------------------|--------------------|--------------------------------|-------------------------------------|--------------------|-------------------| +| ---------------------------------------------------------------------------------------------------- | --------------------- | --------------------- | ------------------ | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | | [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n-pose.pt) | 640 | 50.4 | 80.1 | 131.8 | 1.18 | 3.3 | 9.2 | | [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s-pose.pt) | 640 | 60.0 | 86.2 | 233.2 | 1.42 | 11.6 | 30.2 | | [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m-pose.pt) | 640 | 65.0 | 88.8 | 456.3 | 2.00 | 26.4 | 81.0 | diff --git a/docs/en/datasets/pose/coco8-pose.md b/docs/en/datasets/pose/coco8-pose.md index dfa8e301..eeb0c3dd 100644 --- a/docs/en/datasets/pose/coco8-pose.md +++ b/docs/en/datasets/pose/coco8-pose.md @@ -91,7 +91,7 @@ To train a YOLOv8n-pose model on the COCO8-Pose dataset for 100 epochs with an i !!! Example "Train Example" === "Python" - + ```python from ultralytics import YOLO @@ -103,7 +103,7 @@ To train a YOLOv8n-pose model on the COCO8-Pose dataset for 100 epochs with an i ``` === "CLI" - + ```bash yolo pose train data=coco8-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640 ``` diff --git a/docs/en/datasets/pose/index.md b/docs/en/datasets/pose/index.md index a2e76234..700713d7 100644 --- a/docs/en/datasets/pose/index.md +++ b/docs/en/datasets/pose/index.md @@ -42,18 +42,18 @@ The Ultralytics framework uses a YAML file format to define the dataset and mode ```yaml # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] -path: ../datasets/coco8-pose # dataset root dir -train: images/train # train images (relative to 'path') 4 images -val: images/val # val images (relative to 'path') 4 images -test: # test images (optional) +path: ../datasets/coco8-pose # dataset root dir +train: images/train # train images (relative to 'path') 4 images +val: images/val # val images (relative to 'path') 4 images +test: # test images (optional) # Keypoints -kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) +kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15] # Classes dictionary names: - 0: person + 0: person ``` The `train` and `val` fields specify the paths to the directories containing the training and validation images, respectively. @@ -142,7 +142,7 @@ This conversion tool can be used to convert the COCO dataset or any dataset in t ### What is the Ultralytics YOLO format for pose estimation? -The Ultralytics YOLO format for pose estimation datasets involves labeling each image with a corresponding text file. Each row of the text file stores information about an object instance: +The Ultralytics YOLO format for pose estimation datasets involves labeling each image with a corresponding text file. Each row of the text file stores information about an object instance: - Object class index - Object center coordinates (normalized x and y) @@ -154,6 +154,7 @@ For 2D poses, keypoints include pixel coordinates. For 3D, each keypoint also ha ### How do I use the COCO-Pose dataset with Ultralytics YOLO? To use the COCO-Pose dataset with Ultralytics YOLO: + 1. Download the dataset and prepare your label files in the YOLO format. 2. Create a YAML configuration file specifying paths to training and validation images, keypoint shape, and class names. 3. Use the configuration file for training: @@ -164,12 +165,13 @@ To use the COCO-Pose dataset with Ultralytics YOLO: model = YOLO("yolov8n-pose.pt") # load pretrained model results = model.train(data="coco-pose.yaml", epochs=100, imgsz=640) ``` - + For more information, visit [COCO-Pose](coco.md) and [train](../../modes/train.md) sections. ### How can I add my own dataset for pose estimation in Ultralytics YOLO? To add your dataset: + 1. Convert your annotations to the Ultralytics YOLO format. 2. Create a YAML configuration file specifying the dataset paths, number of classes, and class names. 3. Use the configuration file to train your model: @@ -180,7 +182,7 @@ To add your dataset: model = YOLO("yolov8n-pose.pt") results = model.train(data="your-dataset.yaml", epochs=100, imgsz=640) ``` - + For complete steps, check the [Adding your own dataset](#adding-your-own-dataset) section. ### What is the purpose of the dataset YAML file in Ultralytics YOLO? @@ -192,7 +194,7 @@ path: ../datasets/coco8-pose train: images/train val: images/val names: - 0: person + 0: person ``` Read more about creating YAML configuration files in [Dataset YAML format](#dataset-yaml-format). diff --git a/docs/en/datasets/pose/tiger-pose.md b/docs/en/datasets/pose/tiger-pose.md index 3e8b5566..2462cf10 100644 --- a/docs/en/datasets/pose/tiger-pose.md +++ b/docs/en/datasets/pose/tiger-pose.md @@ -110,7 +110,7 @@ To train a YOLOv8n-pose model on the Tiger-Pose dataset for 100 epochs with an i !!! Example "Train Example" === "Python" - + ```python from ultralytics import YOLO @@ -120,10 +120,10 @@ To train a YOLOv8n-pose model on the Tiger-Pose dataset for 100 epochs with an i # Train the model results = model.train(data="tiger-pose.yaml", epochs=100, imgsz=640) ``` - + === "CLI" - + ```bash # Start training from a pretrained *.pt model yolo task=pose mode=train data=tiger-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640 @@ -140,7 +140,7 @@ To perform inference using a YOLOv8 model trained on the Tiger-Pose dataset, you !!! Example "Inference Example" === "Python" - + ```python from ultralytics import YOLO @@ -150,10 +150,10 @@ To perform inference using a YOLOv8 model trained on the Tiger-Pose dataset, you # Run inference results = model.predict(source="https://youtu.be/MIBAT6BGE6U", show=True) ``` - + === "CLI" - + ```bash # Run inference using a tiger-pose trained model yolo task=pose mode=predict source="https://youtu.be/MIBAT6BGE6U" show=True model="path/to/best.pt" diff --git a/docs/en/datasets/segment/carparts-seg.md b/docs/en/datasets/segment/carparts-seg.md index f0d020ff..621fe9f2 100644 --- a/docs/en/datasets/segment/carparts-seg.md +++ b/docs/en/datasets/segment/carparts-seg.md @@ -115,7 +115,7 @@ To train a YOLOv8 model on the Carparts Segmentation dataset, you can follow the !!! Example "Train Example" === "Python" - + ```python from ultralytics import YOLO @@ -138,6 +138,7 @@ For more details, refer to the [Training](../../modes/train.md) documentation. ### What are some applications of Carparts Segmentation? Carparts Segmentation can be widely applied in various fields such as: + - **Automotive quality control** - **Auto repair and maintenance** - **E-commerce cataloging** @@ -155,6 +156,6 @@ The dataset configuration file for the Carparts Segmentation dataset, `carparts- ### Why should I use the Carparts Segmentation Dataset? -The Carparts Segmentation Dataset provides rich, annotated data essential for developing high-accuracy segmentation models in automotive computer vision. This dataset's diversity and detailed annotations improve model training, making it ideal for applications like vehicle maintenance automation, enhancing vehicle safety systems, and supporting autonomous driving technologies. Partnering with a robust dataset accelerates AI development and ensures better model performance. +The Carparts Segmentation Dataset provides rich, annotated data essential for developing high-accuracy segmentation models in automotive computer vision. This dataset's diversity and detailed annotations improve model training, making it ideal for applications like vehicle maintenance automation, enhancing vehicle safety systems, and supporting autonomous driving technologies. Partnering with a robust dataset accelerates AI development and ensures better model performance. For more details, visit the [CarParts Segmentation Dataset Page](https://universe.roboflow.com/gianmarco-russo-vt9xr/car-seg-un1pm?ref=ultralytics). diff --git a/docs/en/datasets/segment/coco.md b/docs/en/datasets/segment/coco.md index bb88a232..ad372675 100644 --- a/docs/en/datasets/segment/coco.md +++ b/docs/en/datasets/segment/coco.md @@ -11,7 +11,7 @@ The [COCO-Seg](https://cocodataset.org/#home) dataset, an extension of the COCO ## COCO-Seg Pretrained Models | Model | size
(pixels) | mAPbox
50-95 | mAPmask
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | -|----------------------------------------------------------------------------------------------|-----------------------|----------------------|-----------------------|--------------------------------|-------------------------------------|--------------------|-------------------| +| -------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | | [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 | | [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 | | [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 | @@ -116,7 +116,7 @@ To train a YOLOv8n-seg model on the COCO-Seg dataset for 100 epochs with an imag !!! Example "Train Example" === "Python" - + ```python from ultralytics import YOLO @@ -128,7 +128,7 @@ To train a YOLOv8n-seg model on the COCO-Seg dataset for 100 epochs with an imag ``` === "CLI" - + ```bash # Start training from a pretrained *.pt model yolo segment train data=coco-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640 @@ -148,7 +148,7 @@ The COCO-Seg dataset includes several key features: The COCO-Seg dataset supports multiple pretrained YOLOv8 segmentation models with varying performance metrics. Here's a summary of the available models and their key metrics: | Model | size
(pixels) | mAPbox
50-95 | mAPmask
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | -|----------------------------------------------------------------------------------------------|-----------------------|----------------------|-----------------------|--------------------------------|-------------------------------------|--------------------|-------------------| +| -------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | | [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 | | [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 | | [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 | diff --git a/docs/en/datasets/segment/coco8-seg.md b/docs/en/datasets/segment/coco8-seg.md index e4aa6bef..15128ed2 100644 --- a/docs/en/datasets/segment/coco8-seg.md +++ b/docs/en/datasets/segment/coco8-seg.md @@ -91,7 +91,7 @@ To train a **YOLOv8n-seg** model on the COCO8-Seg dataset for 100 epochs with an !!! Example "Train Example" === "Python" - + ```python from ultralytics import YOLO @@ -103,7 +103,7 @@ To train a **YOLOv8n-seg** model on the COCO8-Seg dataset for 100 epochs with an ``` === "CLI" - + ```bash # Start training from a pretrained *.pt model yolo segment train data=coco8-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640 diff --git a/docs/en/datasets/segment/index.md b/docs/en/datasets/segment/index.md index 160bb3e1..09c92797 100644 --- a/docs/en/datasets/segment/index.md +++ b/docs/en/datasets/segment/index.md @@ -44,20 +44,20 @@ The Ultralytics framework uses a YAML file format to define the dataset and mode ```yaml # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] -path: ../datasets/coco8-seg # dataset root dir -train: images/train # train images (relative to 'path') 4 images -val: images/val # val images (relative to 'path') 4 images -test: # test images (optional) +path: ../datasets/coco8-seg # dataset root dir +train: images/train # train images (relative to 'path') 4 images +val: images/val # val images (relative to 'path') 4 images +test: # test images (optional) # Classes (80 COCO classes) names: - 0: person - 1: bicycle - 2: car - # ... - 77: teddy bear - 78: hair drier - 79: toothbrush + 0: person + 1: bicycle + 2: car + # ... + 77: teddy bear + 78: hair drier + 79: toothbrush ``` The `train` and `val` fields specify the paths to the directories containing the training and validation images, respectively. @@ -141,7 +141,7 @@ To auto-annotate your dataset using the Ultralytics framework, you can use the ` ``` | Argument | Type | Description | Default | -|--------------|-------------------------|-------------------------------------------------------------------------------------------------------------|----------------| +| ------------ | ----------------------- | ----------------------------------------------------------------------------------------------------------- | -------------- | | `data` | `str` | Path to a folder containing images to be annotated. | `None` | | `det_model` | `str, optional` | Pre-trained YOLO detection model. Defaults to `'yolov8x.pt'`. | `'yolov8x.pt'` | | `sam_model` | `str, optional` | Pre-trained SAM segmentation model. Defaults to `'sam_b.pt'`. | `'sam_b.pt'` | @@ -175,15 +175,15 @@ This script converts your COCO dataset annotations to the required YOLO format, To prepare a YAML file for training YOLO models with Ultralytics, you need to define the dataset paths and class names. Here's an example YAML configuration: ```yaml -path: ../datasets/coco8-seg # dataset root dir -train: images/train # train images (relative to 'path') -val: images/val # val images (relative to 'path') +path: ../datasets/coco8-seg # dataset root dir +train: images/train # train images (relative to 'path') +val: images/val # val images (relative to 'path') names: - 0: person - 1: bicycle - 2: car - # ... + 0: person + 1: bicycle + 2: car + # ... ``` Ensure you update the paths and class names according to your dataset. For more information, check the [Dataset YAML Format](#dataset-yaml-format) section. diff --git a/docs/en/datasets/segment/package-seg.md b/docs/en/datasets/segment/package-seg.md index 2aec99a2..e228c4c5 100644 --- a/docs/en/datasets/segment/package-seg.md +++ b/docs/en/datasets/segment/package-seg.md @@ -104,7 +104,7 @@ You can train an Ultralytics YOLOv8n model using both Python and CLI methods. Us !!! Example "Train Example" === "Python" - + ```python from ultralytics import YOLO @@ -116,7 +116,7 @@ You can train an Ultralytics YOLOv8n model using both Python and CLI methods. Us ``` === "CLI" - + ```bash # Start training from a pretrained *.pt model yolo segment train data=package-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640 @@ -127,6 +127,7 @@ Refer to the model [Training](../../modes/train.md) page for more details. ### What are the components of the Package Segmentation Dataset, and how is it structured? The dataset is structured into three main components: + - **Training set**: Contains 1920 images with annotations. - **Testing set**: Comprises 89 images with corresponding annotations. - **Validation set**: Includes 188 images with annotations. @@ -139,6 +140,6 @@ Ultralytics YOLOv8 provides state-of-the-art accuracy and speed for real-time ob ### How can I access and use the package-seg.yaml file for the Package Segmentation Dataset? -The `package-seg.yaml` file is hosted on Ultralytics' GitHub repository and contains essential information about the dataset's paths, classes, and configuration. You can download it from [here](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/package-seg.yaml). This file is crucial for configuring your models to utilize the dataset efficiently. +The `package-seg.yaml` file is hosted on Ultralytics' GitHub repository and contains essential information about the dataset's paths, classes, and configuration. You can download it from [here](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/package-seg.yaml). This file is crucial for configuring your models to utilize the dataset efficiently. For more insights and practical examples, explore our [Usage](https://docs.ultralytics.com/usage/python/) section. diff --git a/docs/en/datasets/track/index.md b/docs/en/datasets/track/index.md index 507a2a02..2e7735d1 100644 --- a/docs/en/datasets/track/index.md +++ b/docs/en/datasets/track/index.md @@ -38,7 +38,7 @@ To use Multi-Object Tracking with Ultralytics YOLO, you can start by using the P !!! Example === "Python" - + ```python from ultralytics import YOLO @@ -51,7 +51,7 @@ To use Multi-Object Tracking with Ultralytics YOLO, you can start by using the P ```bash yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" conf=0.3 iou=0.5 show ``` - + These commands load the YOLOv8 model and use it for tracking objects in the given video source with specific confidence (`conf`) and Intersection over Union (`iou`) thresholds. For more details, refer to the [track mode documentation](../../modes/track.md). ### What are the upcoming features for training trackers in Ultralytics? diff --git a/ultralytics/data/converter.py b/ultralytics/data/converter.py index 1ba09e0b..03dbf0ad 100644 --- a/ultralytics/data/converter.py +++ b/ultralytics/data/converter.py @@ -385,7 +385,7 @@ def convert_segment_masks_to_yolo_seg(masks_dir, output_dir, classes): continue # Skip background class_index = pixel_to_class_mapping.get(value, -1) if class_index == -1: - LOGGER.warning(f"Unknown class for pixel value {value} in file {mask_filename}, skipping.") + LOGGER.warning(f"Unknown class for pixel value {value} in file {mask_path}, skipping.") continue # Create a binary mask for the current class and find contours @@ -403,7 +403,7 @@ def convert_segment_masks_to_yolo_seg(masks_dir, output_dir, classes): yolo_format.append(round(point[1] / img_height, 6)) yolo_format_data.append(yolo_format) # Save Ultralytics YOLO format data to file - output_path = Path(output_dir) / f"{Path(mask_filename).stem}.txt" + output_path = Path(output_dir) / f"{mask_path.stem}.txt" with open(output_path, "w") as file: for item in yolo_format_data: line = " ".join(map(str, item))