ultralytics 8.2.14 add task + OBB to hub.check_dataset() (#12573)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: UltralyticsAssistant <web@ultralytics.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
2024-05-12 15:27:44 -04:00 · 2024-05-12 15:27:44 -04:00 · fd748e3c7a
commit fd748e3c7a
parent cf24349efb
5 changed files with 36 additions and 24 deletions
--- a/tests/test_python.py
+++ b/tests/test_python.py
@ -12,7 +12,7 @@ import yaml
 from PIL import Image
 from ultralytics import RTDETR, YOLO
-from ultralytics.cfg import MODELS, TASK2DATA
+from ultralytics.cfg import MODELS, TASKS, TASK2DATA
 from ultralytics.data.build import load_inference_source
 from ultralytics.utils import (
    ASSETS,
@ -98,6 +98,12 @@ def test_predict_img(model_name):
    assert len(model(batch, imgsz=32)) == len(batch)  # multiple sources in a batch
@pytest.mark.parametrize("model", MODELS)
 def test_predict_visualize(model):
    """Test model predict methods with 'visualize=True' arguments."""
    YOLO(WEIGHTS_DIR / model)(SOURCE, imgsz=32, visualize=True)
 def test_predict_grey_and_4ch():
    """Test YOLO prediction on SOURCE converted to greyscale and 4-channel images."""
    im = Image.open(SOURCE)
@ -267,7 +273,7 @@ def test_data_utils():
    # from ultralytics.utils.files import WorkingDirectory
    # with WorkingDirectory(ROOT.parent / 'tests'):
-    for task in "detect", "segment", "pose", "classify":
+    for task in TASKS:
        file = Path(TASK2DATA[task]).with_suffix(".zip")  # i.e. coco8.zip
        download(f"https://github.com/ultralytics/hub/raw/main/example_datasets/{file}", unzip=False, dir=TMP)
        stats = HUBDatasetStats(TMP / file, task=task)
--- a/ultralytics/init.py
+++ b/ultralytics/init.py
@ -1,6 +1,6 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-__version__ = "8.2.13"
+__version__ = "8.2.14"
 from ultralytics.data.explorer.explorer import Explorer
 from ultralytics.models import RTDETR, SAM, YOLO, YOLOWorld
--- a/ultralytics/data/utils.py
+++ b/ultralytics/data/utils.py
@ -441,6 +441,7 @@ class HUBDatasetStats:
        stats = HUBDatasetStats('path/to/coco8.zip', task='detect')  # detect dataset
        stats = HUBDatasetStats('path/to/coco8-seg.zip', task='segment')  # segment dataset
        stats = HUBDatasetStats('path/to/coco8-pose.zip', task='pose')  # pose dataset
        stats = HUBDatasetStats('path/to/dota8.zip', task='obb')  # OBB dataset
        stats = HUBDatasetStats('path/to/imagenet10.zip', task='classify')  # classification dataset
        stats.get_json(save=True)
@ -497,13 +498,13 @@ class HUBDatasetStats:
            """Update labels to integer class and 4 decimal place floats."""
            if self.task == "detect":
                coordinates = labels["bboxes"]
-            elif self.task == "segment":
+            elif self.task in {"segment", "obb"}:  # Segment and OBB use segments. OBB segments are normalized xyxyxyxy
                coordinates = [x.flatten() for x in labels["segments"]]
            elif self.task == "pose":
                n, nk, nd = labels["keypoints"].shape
                coordinates = np.concatenate((labels["bboxes"], labels["keypoints"].reshape(n, nk * nd)), 1)
            else:
-                raise ValueError("Undefined dataset task.")
+                raise ValueError(f"Undefined dataset task={self.task}.")
            zipped = zip(labels["cls"], coordinates)
            return [[int(c[0]), *(round(float(x), 4) for x in points)] for c, points in zipped]
--- a/ultralytics/hub/init.py
+++ b/ultralytics/hub/init.py
@ -106,22 +106,26 @@ def get_export(model_id="", format="torchscript"):
    return r.json()
-def check_dataset(path="", task="detect"):
+def check_dataset(path: str, task: str) -> None:
    """
    Function for error-checking HUB dataset Zip file before upload. It checks a dataset for errors before it is uploaded
    to the HUB. Usage examples are given below.
    Args:
-        path (str, optional): Path to data.zip (with data.yaml inside data.zip). Defaults to ''.
+        path (str): Path to data.zip (with data.yaml inside data.zip).
-        task (str, optional): Dataset task. Options are 'detect', 'segment', 'pose', 'classify'. Defaults to 'detect'.
+        task (str): Dataset task. Options are 'detect', 'segment', 'pose', 'classify', 'obb'.
    Example:
        Download *.zip files from https://github.com/ultralytics/hub/tree/main/example_datasets
            i.e. https://github.com/ultralytics/hub/raw/main/example_datasets/coco8.zip for coco8.zip.
        ```python
        from ultralytics.hub import check_dataset
        check_dataset('path/to/coco8.zip', task='detect')  # detect dataset
        check_dataset('path/to/coco8-seg.zip', task='segment')  # segment dataset
        check_dataset('path/to/coco8-pose.zip', task='pose')  # pose dataset
        check_dataset('path/to/dota8.zip', task='obb')  # OBB dataset
        check_dataset('path/to/imagenet10.zip', task='classify')  # classification dataset
        ```
    """
    HUBDatasetStats(path=path, task=task).get_json()
--- a/ultralytics/utils/plotting.py
+++ b/ultralytics/utils/plotting.py
@ -1105,23 +1105,24 @@ def feature_visualization(x, module_type, stage, n=32, save_dir=Path("runs/detec
        n (int, optional): Maximum number of feature maps to plot. Defaults to 32.
        save_dir (Path, optional): Directory to save results. Defaults to Path('runs/detect/exp').
    """
-    for m in ["Detect", "Pose", "Segment"]:
+    for m in {"Detect", "Segment", "Pose", "Classify", "OBB", "RTDETRDecoder"}:  # all model heads
        if m in module_type:
            return
-    _, channels, height, width = x.shape  # batch, channels, height, width
+    if isinstance(x, torch.Tensor):
-    if height > 1 and width > 1:
+        _, channels, height, width = x.shape  # batch, channels, height, width
-        f = save_dir / f"stage{stage}_{module_type.split('.')[-1]}_features.png"  # filename
+        if height > 1 and width > 1:
            f = save_dir / f"stage{stage}_{module_type.split('.')[-1]}_features.png"  # filename
-        blocks = torch.chunk(x[0].cpu(), channels, dim=0)  # select batch index 0, block by channels
+            blocks = torch.chunk(x[0].cpu(), channels, dim=0)  # select batch index 0, block by channels
-        n = min(n, channels)  # number of plots
+            n = min(n, channels)  # number of plots
-        _, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True)  # 8 rows x n/8 cols
+            _, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True)  # 8 rows x n/8 cols
-        ax = ax.ravel()
+            ax = ax.ravel()
-        plt.subplots_adjust(wspace=0.05, hspace=0.05)
+            plt.subplots_adjust(wspace=0.05, hspace=0.05)
-        for i in range(n):
+            for i in range(n):
-            ax[i].imshow(blocks[i].squeeze())  # cmap='gray'
+                ax[i].imshow(blocks[i].squeeze())  # cmap='gray'
-            ax[i].axis("off")
+                ax[i].axis("off")
-        LOGGER.info(f"Saving {f}... ({n}/{channels})")
+            LOGGER.info(f"Saving {f}... ({n}/{channels})")
-        plt.savefig(f, dpi=300, bbox_inches="tight")
+            plt.savefig(f, dpi=300, bbox_inches="tight")
-        plt.close()
+            plt.close()
-        np.save(str(f.with_suffix(".npy")), x[0].cpu().numpy())  # npy save
+            np.save(str(f.with_suffix(".npy")), x[0].cpu().numpy())  # npy save