Docs updates: Add Explorer to tab, YOLOv5 in Guides and Usage in Quickstart (#7438)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: Haixuan Xavier Tao <tao.xavier@outlook.com>
2024-01-10 04:20:26 +05:30 · 2024-01-10 04:20:26 +05:30 · a92adf8231
commit a92adf8231
parent 53150a925b
30 changed files with 227 additions and 105 deletions
--- a/ultralytics/data/explorer/init.py
+++ b/ultralytics/data/explorer/init.py
@ -1,3 +1,5 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+
 from .utils import plot_query_result

 __all__ = ['plot_query_result']
--- a/ultralytics/data/explorer/explorer.py
+++ b/ultralytics/data/explorer/explorer.py
@ -1,3 +1,5 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+
 from io import BytesIO
 from pathlib import Path
 from typing import Any, List, Tuple, Union
@ -24,9 +26,8 @@ class ExplorerDataset(YOLODataset):
    def __init__(self, *args, data: dict = None, **kwargs) -> None:
        super().__init__(*args, data=data, **kwargs)

-    # NOTE: Load the image directly without any resize operations.
    def load_image(self, i: int) -> Union[Tuple[np.ndarray, Tuple[int, int], Tuple[int, int]], Tuple[None, None, None]]:
-        """Loads 1 image from dataset index 'i', returns (im, resized hw)."""
+        """Loads 1 image from dataset index 'i' without any resize ops."""
        im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i]
        if im is None:  # not cached in RAM
            if fn.exists():  # load npy
@ -41,6 +42,7 @@ class ExplorerDataset(YOLODataset):
        return self.ims[i], self.im_hw0[i], self.im_hw[i]

    def build_transforms(self, hyp: IterableSimpleNamespace = None):
+        """Creates transforms for dataset images without resizing."""
        return Format(
            bbox_format='xyxy',
            normalize=False,
@ -122,7 +124,7 @@ class Explorer:
        self.table = table

    def _yield_batches(self, dataset: ExplorerDataset, data_info: dict, model: YOLO, exclude_keys: List[str]):
-        # Implement Batching
+        """Generates batches of data for embedding, excluding specified keys."""
        for i in tqdm(range(len(dataset))):
            self.progress = float(i + 1) / len(dataset)
            batch = dataset[i]
@ -143,7 +145,7 @@ class Explorer:
            limit (int): Number of results to return.

        Returns:
-            An arrow table containing the results. Supports converting to:
+            (pyarrow.Table): An arrow table containing the results. Supports converting to:
                - pandas dataframe: `result.to_pandas()`
                - dict of lists: `result.to_pydict()`

@ -175,7 +177,7 @@ class Explorer:
            return_type (str): Type of the result to return. Can be either 'pandas' or 'arrow'. Defaults to 'pandas'.

        Returns:
-            An arrow table containing the results.
+            (pyarrow.Table): An arrow table containing the results.

        Example:
            ```python
@ -216,7 +218,7 @@ class Explorer:
            labels (bool): Whether to plot the labels or not.

        Returns:
-            PIL Image containing the plot.
+            (PIL.Image): Image containing the plot.

        Example:
            ```python
@ -248,7 +250,7 @@ class Explorer:
            return_type (str): Type of the result to return. Can be either 'pandas' or 'arrow'. Defaults to 'pandas'.

        Returns:
-            A table or pandas dataframe containing the results.
+            (pandas.DataFrame): A dataframe containing the results.

        Example:
            ```python
@ -282,7 +284,7 @@ class Explorer:
            limit (int): Number of results to return. Defaults to 25.

        Returns:
-            PIL Image containing the plot.
+            (PIL.Image): Image containing the plot.

        Example:
            ```python
@ -306,11 +308,12 @@ class Explorer:
        Args:
            max_dist (float): maximum L2 distance between the embeddings to consider. Defaults to 0.2.
            top_k (float): Percentage of the closest data points to consider when counting. Used to apply limit when running
-                            vector search. Defaults: None.
+                           vector search. Defaults: None.
            force (bool): Whether to overwrite the existing similarity index or not. Defaults to True.

        Returns:
-            A pandas dataframe containing the similarity index.
+            (pandas.DataFrame): A dataframe containing the similarity index. Each row corresponds to an image, and columns
+                                include indices of similar images and their respective distances.

        Example:
            ```python
@ -340,6 +343,7 @@ class Explorer:
        sim_table = self.connection.create_table(sim_idx_table_name, schema=get_sim_index_schema(), mode='overwrite')

        def _yield_sim_idx():
+            """Generates a dataframe with similarity indices and distances for images."""
            for i in tqdm(range(len(embeddings))):
                sim_idx = self.table.search(embeddings[i]).limit(top_k).to_pandas().query(f'_distance <= {max_dist}')
                yield [{
@ -364,7 +368,7 @@ class Explorer:
            force (bool): Whether to overwrite the existing similarity index or not. Defaults to True.

        Returns:
-            PIL.PngImagePlugin.PngImageFile containing the plot.
+            (PIL.Image): Image containing the plot.

        Example:
            ```python
@ -416,7 +420,7 @@ class Explorer:
            query (str): Question to ask.

        Returns:
-            Answer from AI.
+            (pandas.DataFrame): A dataframe containing filtered results to the SQL query.

        Example:
            ```python
@ -436,14 +440,17 @@ class Explorer:

    def visualize(self, result):
        """
-        Visualize the results of a query.
+        Visualize the results of a query. TODO.

        Args:
-            result (arrow table): Arrow table containing the results of a query.
+            result (pyarrow.Table): Table containing the results of a query.
        """
-        # TODO:
        pass

    def generate_report(self, result):
-        """Generate a report of the dataset."""
+        """
+        Generate a report of the dataset.
+
+        TODO
+        """
        pass
--- a/ultralytics/data/explorer/gui/init.py
+++ b/ultralytics/data/explorer/gui/init.py
@ -0,0 +1 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
--- a/ultralytics/data/explorer/gui/dash.py
+++ b/ultralytics/data/explorer/gui/dash.py
@ -1,3 +1,5 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+
 import time
 from threading import Thread

@ -7,13 +9,13 @@ from ultralytics import Explorer
 from ultralytics.utils import ROOT, SETTINGS
 from ultralytics.utils.checks import check_requirements

-check_requirements('streamlit>=1.29.0')
-check_requirements('streamlit-select>=0.2')
+check_requirements(('streamlit>=1.29.0', 'streamlit-select>=0.2'))
 import streamlit as st
 from streamlit_select import image_select


 def _get_explorer():
+    """Initializes and returns an instance of the Explorer class."""
    exp = Explorer(data=st.session_state.get('dataset'), model=st.session_state.get('model'))
    thread = Thread(target=exp.create_embeddings_table,
                    kwargs={'force': st.session_state.get('force_recreate_embeddings')})
@ -28,6 +30,7 @@ def _get_explorer():


 def init_explorer_form():
+    """Initializes an Explorer instance and creates embeddings table with progress tracking."""
    datasets = ROOT / 'cfg' / 'datasets'
    ds = [d.name for d in datasets.glob('*.yaml')]
    models = [
@ -46,6 +49,7 @@ def init_explorer_form():


 def query_form():
+    """Sets up a form in Streamlit to initialize Explorer with dataset and model selection."""
    with st.form('query_form'):
        col1, col2 = st.columns([0.8, 0.2])
        with col1:
@ -58,6 +62,7 @@ def query_form():


 def ai_query_form():
+    """Sets up a Streamlit form for user input to initialize Explorer with dataset and model selection."""
    with st.form('ai_query_form'):
        col1, col2 = st.columns([0.8, 0.2])
        with col1:
@ -67,6 +72,7 @@ def ai_query_form():


 def find_similar_imgs(imgs):
+    """Initializes a Streamlit form for AI-based image querying with custom input."""
    exp = st.session_state['explorer']
    similar = exp.get_similar(img=imgs, limit=st.session_state.get('limit'), return_type='arrow')
    paths = similar.to_pydict()['im_file']
@ -74,6 +80,7 @@ def find_similar_imgs(imgs):


 def similarity_form(selected_imgs):
+    """Initializes a form for AI-based image querying with custom input in Streamlit."""
    st.write('Similarity Search')
    with st.form('similarity_form'):
        subcol1, subcol2 = st.columns([1, 1])
@ -109,6 +116,7 @@ def similarity_form(selected_imgs):


 def run_sql_query():
+    """Executes an SQL query and returns the results."""
    st.session_state['error'] = None
    query = st.session_state.get('query')
    if query.rstrip().lstrip():
@ -118,6 +126,7 @@ def run_sql_query():


 def run_ai_query():
+    """Execute SQL query and update session state with query results."""
    if not SETTINGS['openai_api_key']:
        st.session_state[
            'error'] = 'OpenAI API key not found in settings. Please run yolo settings openai_api_key="..."'
@ -134,12 +143,14 @@ def run_ai_query():


 def reset_explorer():
+    """Resets the explorer to its initial state by clearing session variables."""
    st.session_state['explorer'] = None
    st.session_state['imgs'] = None
    st.session_state['error'] = None


 def utralytics_explorer_docs_callback():
+    """Resets the explorer to its initial state by clearing session variables."""
    with st.container(border=True):
        st.image('https://raw.githubusercontent.com/ultralytics/assets/main/logo/Ultralytics_Logotype_Original.svg',
                 width=100)
@ -151,6 +162,7 @@ def utralytics_explorer_docs_callback():


 def layout():
+    """Resets explorer session variables and provides documentation with a link to API docs."""
    st.set_page_config(layout='wide', initial_sidebar_state='collapsed')
    st.markdown("<h1 style='text-align: center;'>Ultralytics Explorer Demo</h1>", unsafe_allow_html=True)

--- a/ultralytics/data/explorer/utils.py
+++ b/ultralytics/data/explorer/utils.py
@ -1,3 +1,5 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+
 import getpass
 from typing import List

@ -14,6 +16,7 @@ from ultralytics.utils.plotting import plot_images


 def get_table_schema(vector_size):
+    """Extracts and returns the schema of a database table."""
    from lancedb.pydantic import LanceModel, Vector

    class Schema(LanceModel):
@ -29,6 +32,7 @@ def get_table_schema(vector_size):


 def get_sim_index_schema():
+    """Returns a LanceModel schema for a database table with specified vector size."""
    from lancedb.pydantic import LanceModel

    class Schema(LanceModel):
@ -41,6 +45,7 @@ def get_sim_index_schema():


 def sanitize_batch(batch, dataset_info):
+    """Sanitizes input batch for inference, ensuring correct format and dimensions."""
    batch['cls'] = batch['cls'].flatten().int().tolist()
    box_cls_pair = sorted(zip(batch['bboxes'].tolist(), batch['cls']), key=lambda x: x[1])
    batch['bboxes'] = [box for box, _ in box_cls_pair]
@ -111,6 +116,7 @@ def plot_query_result(similar_set, plot_labels=True):


 def prompt_sql_query(query):
+    """Plots images with optional labels from a similar data set."""
    check_requirements('openai>=1.6.1')
    from openai import OpenAI

--- a/ultralytics/data/split_dota.py
+++ b/ultralytics/data/split_dota.py
@ -1,3 +1,5 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+
 import itertools
 import os
 from glob import glob
@ -53,10 +55,13 @@ def bbox_iof(polygon1, bbox2, eps=1e-6):


 def load_yolo_dota(data_root, split='train'):
-    """Load DOTA dataset.
+    """
+    Load DOTA dataset.
+
    Args:
        data_root (str): Data root.
        split (str): The split data set, could be train or val.
+
    Notes:
        The directory structure assumed for the DOTA dataset:
            - data_root
@ -133,7 +138,7 @@ def get_window_obj(anno, windows, iof_thr=0.7):
        label[:, 1::2] *= w
        label[:, 2::2] *= h
        iofs = bbox_iof(label[:, 1:], windows)
-        # unnormalized and misaligned coordinates
+        # Unnormalized and misaligned coordinates
        window_anns = [(label[iofs[:, i] >= iof_thr]) for i in range(len(windows))]
    else:
        window_anns = [np.zeros((0, 9), dtype=np.float32) for _ in range(len(windows))]
@ -141,13 +146,16 @@ def get_window_obj(anno, windows, iof_thr=0.7):


 def crop_and_save(anno, windows, window_objs, im_dir, lb_dir):
-    """Crop images and save new labels.
+    """
+    Crop images and save new labels.
+
    Args:
        anno (dict): Annotation dict, including `filepath`, `label`, `ori_size` as its keys.
        windows (list): A list of windows coordinates.
        window_objs (list): A list of labels inside each window.
        im_dir (str): The output directory path of images.
        lb_dir (str): The output directory path of labels.
+
    Notes:
        The directory structure assumed for the DOTA dataset:
            - data_root
@ -185,7 +193,7 @@ def split_images_and_labels(data_root, save_dir, split='train', crop_sizes=[1024
    """
    Split both images and labels.

-    NOTES:
+    Notes:
        The directory structure assumed for the DOTA dataset:
            - data_root
                - images
@ -215,7 +223,7 @@ def split_trainval(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
    """
    Split train and val set of DOTA.

-    NOTES:
+    Notes:
        The directory structure assumed for the DOTA dataset:
            - data_root
                - images
@ -245,7 +253,7 @@ def split_test(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
    """
    Split test set of DOTA, labels are not included within this set.

-    NOTES:
+    Notes:
        The directory structure assumed for the DOTA dataset:
            - data_root
                - images
--- a/ultralytics/models/rtdetr/val.py
+++ b/ultralytics/models/rtdetr/val.py
@ -107,6 +107,7 @@ class RTDETRValidator(DetectionValidator):
        return outputs

    def _prepare_batch(self, si, batch):
+        """Prepares a batch for training or inference by applying transformations."""
        idx = batch['batch_idx'] == si
        cls = batch['cls'][idx].squeeze(-1)
        bbox = batch['bboxes'][idx]
@ -121,6 +122,7 @@ class RTDETRValidator(DetectionValidator):
        return prepared_batch

    def _prepare_pred(self, pred, pbatch):
+        """Prepares and returns a batch with transformed bounding boxes and class labels."""
        predn = pred.clone()
        predn[..., [0, 2]] *= pbatch['ori_shape'][1] / self.args.imgsz  # native-space pred
        predn[..., [1, 3]] *= pbatch['ori_shape'][0] / self.args.imgsz  # native-space pred
--- a/ultralytics/models/yolo/detect/val.py
+++ b/ultralytics/models/yolo/detect/val.py
@ -87,6 +87,7 @@ class DetectionValidator(BaseValidator):
                                       max_det=self.args.max_det)

    def _prepare_batch(self, si, batch):
+        """Prepares a batch of images and annotations for validation."""
        idx = batch['batch_idx'] == si
        cls = batch['cls'][idx].squeeze(-1)
        bbox = batch['bboxes'][idx]
@ -100,6 +101,7 @@ class DetectionValidator(BaseValidator):
        return prepared_batch

    def _prepare_pred(self, pred, pbatch):
+        """Prepares a batch of images and annotations for validation."""
        predn = pred.clone()
        ops.scale_boxes(pbatch['imgsz'], predn[:, :4], pbatch['ori_shape'],
                        ratio_pad=pbatch['ratio_pad'])  # native-space pred
--- a/ultralytics/models/yolo/obb/predict.py
+++ b/ultralytics/models/yolo/obb/predict.py
@ -23,6 +23,7 @@ class OBBPredictor(DetectionPredictor):
    """

    def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
+        """Initializes OBBPredictor with optional model and data configuration overrides."""
        super().__init__(cfg, overrides, _callbacks)
        self.args.task = 'obb'

--- a/ultralytics/models/yolo/obb/val.py
+++ b/ultralytics/models/yolo/obb/val.py
@ -65,6 +65,7 @@ class OBBValidator(DetectionValidator):
        return self.match_predictions(detections[:, 5], gt_cls, iou)

    def _prepare_batch(self, si, batch):
+        """Prepares and returns a batch for OBB validation."""
        idx = batch['batch_idx'] == si
        cls = batch['cls'][idx].squeeze(-1)
        bbox = batch['bboxes'][idx]
@ -78,6 +79,7 @@ class OBBValidator(DetectionValidator):
        return prepared_batch

    def _prepare_pred(self, pred, pbatch):
+        """Prepares and returns a batch for OBB validation with scaled and padded bounding boxes."""
        predn = pred.clone()
        ops.scale_boxes(pbatch['imgsz'], predn[:, :4], pbatch['ori_shape'], ratio_pad=pbatch['ratio_pad'],
                        xywh=True)  # native-space pred
--- a/ultralytics/models/yolo/pose/val.py
+++ b/ultralytics/models/yolo/pose/val.py
@ -69,6 +69,7 @@ class PoseValidator(DetectionValidator):
        self.stats = dict(tp_p=[], tp=[], conf=[], pred_cls=[], target_cls=[])

    def _prepare_batch(self, si, batch):
+        """Prepares a batch for processing by converting keypoints to float and moving to device."""
        pbatch = super()._prepare_batch(si, batch)
        kpts = batch['keypoints'][batch['batch_idx'] == si]
        h, w = pbatch['imgsz']
@ -80,6 +81,7 @@ class PoseValidator(DetectionValidator):
        return pbatch

    def _prepare_pred(self, pred, pbatch):
+        """Prepares and scales keypoints in a batch for pose processing."""
        predn = super()._prepare_pred(pred, pbatch)
        nk = pbatch['kpts'].shape[1]
        pred_kpts = predn[:, 6:].view(len(predn), nk, -1)
--- a/ultralytics/models/yolo/segment/val.py
+++ b/ultralytics/models/yolo/segment/val.py
@ -72,12 +72,14 @@ class SegmentationValidator(DetectionValidator):
        return p, proto

    def _prepare_batch(self, si, batch):
+        """Prepares a batch for training or inference by processing images and targets."""
        prepared_batch = super()._prepare_batch(si, batch)
        midx = [si] if self.args.overlap_mask else batch['batch_idx'] == si
        prepared_batch['masks'] = batch['masks'][midx]
        return prepared_batch

    def _prepare_pred(self, pred, pbatch, proto):
+        """Prepares a batch for training or inference by processing images and targets."""
        predn = super()._prepare_pred(pred, pbatch)
        pred_masks = self.process(proto, pred[:, 6:], pred[:, :4], shape=pbatch['imgsz'])
        return predn, pred_masks
--- a/ultralytics/nn/modules/head.py
+++ b/ultralytics/nn/modules/head.py
@ -116,6 +116,7 @@ class OBB(Detect):
    """YOLOv8 OBB detection head for detection with rotation models."""

    def __init__(self, nc=80, ne=1, ch=()):
+        """Initialize OBB with number of classes `nc` and layer channels `ch`."""
        super().__init__(nc, ch)
        self.ne = ne  # number of extra parameters
        self.detect = Detect.forward
@ -124,6 +125,7 @@ class OBB(Detect):
        self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.ne, 1)) for x in ch)

    def forward(self, x):
+        """Concatenates and returns predicted bounding boxes and class probabilities."""
        bs = x[0].shape[0]  # batch size
        angle = torch.cat([self.cv4[i](x[i]).view(bs, self.ne, -1) for i in range(self.nl)], 2)  # OBB theta logits
        # NOTE: set `angle` as an attribute so that `decode_bboxes` could use it.
--- a/ultralytics/nn/tasks.py
+++ b/ultralytics/nn/tasks.py
@ -306,6 +306,7 @@ class OBBModel(DetectionModel):
        super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)

    def init_criterion(self):
+        """Initialize the loss criterion for the model."""
        return v8OBBLoss(self)


--- a/ultralytics/solutions/object_counter.py
+++ b/ultralytics/solutions/object_counter.py
@ -153,6 +153,7 @@ class ObjectCounter:
            self.selected_point = None

    def extract_and_process_tracks(self, tracks):
+        """Extracts and processes tracks for object counting in a video stream."""
        boxes = tracks[0].boxes.xyxy.cpu()
        clss = tracks[0].boxes.cls.cpu().tolist()
        track_ids = tracks[0].boxes.id.int().cpu().tolist()
--- a/ultralytics/trackers/basetrack.py
+++ b/ultralytics/trackers/basetrack.py
@ -55,6 +55,7 @@ class BaseTrack:
    _count = 0

    def __init__(self):
+        """Initializes a new track with unique ID and foundational tracking attributes."""
        self.track_id = 0
        self.is_activated = False
        self.state = TrackState.New
--- a/ultralytics/utils/init.py
+++ b/ultralytics/utils/init.py
@ -245,6 +245,7 @@ def set_logging(name=LOGGING_NAME, verbose=True):
            class CustomFormatter(logging.Formatter):

                def format(self, record):
+                    """Sets up logging with UTF-8 encoding and configurable verbosity."""
                    return emojis(super().format(record))

            formatter = CustomFormatter('%(message)s')  # Use CustomFormatter to eliminate UTF-8 output as last recourse
--- a/ultralytics/utils/downloads.py
+++ b/ultralytics/utils/downloads.py
@ -206,7 +206,7 @@ def check_disk_space(url='https://ultralytics.com/assets/coco128.zip', sf=1.5, h
    # Check file size
    gib = 1 << 30  # bytes per GiB
    data = int(r.headers.get('Content-Length', 0)) / gib  # file size (GB)
-    total, used, free = (x / gib for x in shutil.disk_usage('/'))  # bytes
+    total, used, free = (x / gib for x in shutil.disk_usage(Path.cwd()))  # bytes
    if data * sf < free:
        return True  # sufficient space