From 5323ee0d583e203e2f09ed65f9addaca0ee6b355 Mon Sep 17 00:00:00 2001 From: Muhammad Rizwan Munawar Date: Fri, 26 Apr 2024 20:04:25 +0500 Subject: [PATCH] Add benchmarking for RF100 datasets (#10190) Co-authored-by: UltralyticsAssistant Co-authored-by: Glenn Jocher --- docs/en/datasets/detect/roboflow-100.md | 48 +++++++++ ultralytics/utils/benchmarks.py | 132 ++++++++++++++++++++++++ 2 files changed, 180 insertions(+) diff --git a/docs/en/datasets/detect/roboflow-100.md b/docs/en/datasets/detect/roboflow-100.md index af130e35..fe54138d 100644 --- a/docs/en/datasets/detect/roboflow-100.md +++ b/docs/en/datasets/detect/roboflow-100.md @@ -33,6 +33,54 @@ The Roboflow 100 dataset is organized into seven categories, each with a distinc This structure enables a diverse and extensive testing ground for object detection models, reflecting real-world application scenarios. +## Benchmarking + +Dataset benchmarking evaluates machine learning model performance on specific datasets using standardized metrics like accuracy, mean average precision and F1-score. + +!!! Tip "Benchmarking" + + Benchmarking results will be stored in "ultralytics-benchmarks/evaluation.txt" + +!!! Example "Benchmarking example" + + === "Python" + + ```python + from pathlib import Path + import shutil + from ultralytics.utils.benchmarks import RF100Benchmark + + # Initialize RF100Benchmark and set API key + benchmark = RF100Benchmark() + benchmark.set_key(api_key="YOUR_ROBOFLOW_API_KEY") + + # Parse dataset and define file paths + names, cfg_yamls = benchmark.parse_dataset() + val_log_file = Path("ultralytics-benchmarks") / "validation.txt" + eval_log_file = Path("ultralytics-benchmarks") / "evaluation.txt" + + # Run benchmarks on each dataset in RF100 + for ind, path in enumerate(cfg_yamls): + path = Path(path) + if path.exists(): + # Fix YAML file and run training + benchmark.fix_yaml(str(path)) + Path.cwd().system(f'yolo detect train data={path} model=yolov8s.pt epochs=1 batch=16') + + # Run validation and evaluate + Path.cwd().system(f'yolo detect val data={path} model=runs/detect/train/weights/best.pt > {val_log_file} 2>&1') + benchmark.evaluate(str(path), str(val_log_file), str(eval_log_file), ind) + + # Remove the 'runs' directory + runs_dir = Path.cwd() / "runs" + shutil.rmtree(runs_dir) + else: + print("YAML file path does not exist") + continue + + print("RF100 Benchmarking completed!") + ``` + ## Applications Roboflow 100 is invaluable for various applications related to computer vision and deep learning. Researchers and engineers can use this benchmark to: diff --git a/ultralytics/utils/benchmarks.py b/ultralytics/utils/benchmarks.py index 62787dcf..4ea89322 100644 --- a/ultralytics/utils/benchmarks.py +++ b/ultralytics/utils/benchmarks.py @@ -25,18 +25,23 @@ NCNN | `ncnn` | yolov8n_ncnn_model/ """ import glob +import os import platform +import re +import shutil import time from pathlib import Path import numpy as np import torch.cuda +import yaml from ultralytics import YOLO, YOLOWorld from ultralytics.cfg import TASK2DATA, TASK2METRIC from ultralytics.engine.exporter import export_formats from ultralytics.utils import ARM64, ASSETS, IS_JETSON, IS_RASPBERRYPI, LINUX, LOGGER, MACOS, TQDM, WEIGHTS_DIR from ultralytics.utils.checks import IS_PYTHON_3_12, check_requirements, check_yolo +from ultralytics.utils.downloads import safe_download from ultralytics.utils.files import file_size from ultralytics.utils.torch_utils import select_device @@ -152,6 +157,133 @@ def benchmark( return df +class RF100Benchmark: + def __init__(self): + """Function for initialization of RF100Benchmark.""" + self.ds_names = [] + self.ds_cfg_list = [] + self.rf = None + self.val_metrics = ["class", "images", "targets", "precision", "recall", "map50", "map95"] + + def set_key(self, api_key): + """ + Set Roboflow API key for processing. + + Args: + api_key (str): The API key. + """ + + check_requirements("roboflow") + from roboflow import Roboflow + + self.rf = Roboflow(api_key=api_key) + + def parse_dataset(self, ds_link_txt="datasets_links.txt"): + """ + Parse dataset links and downloads datasets. + + Args: + ds_link_txt (str): Path to dataset_links file. + """ + + (shutil.rmtree("rf-100"), os.mkdir("rf-100")) if os.path.exists("rf-100") else os.mkdir("rf-100") + os.chdir("rf-100") + os.mkdir("ultralytics-benchmarks") + safe_download("https://ultralytics.com/assets/datasets_links.txt") + + with open(ds_link_txt, "r") as file: + for line in file: + try: + _, url, workspace, project, version = re.split("/+", line.strip()) + self.ds_names.append(project) + proj_version = f"{project}-{version}" + if not Path(proj_version).exists(): + self.rf.workspace(workspace).project(project).version(version).download("yolov8") + else: + print("Dataset already downloaded.") + self.ds_cfg_list.append(Path.cwd() / proj_version / "data.yaml") + except Exception: + continue + + return self.ds_names, self.ds_cfg_list + + def fix_yaml(self, path): + """ + Function to fix yaml train and val path. + + Args: + path (str): YAML file path. + """ + + with open(path, "r") as file: + yaml_data = yaml.safe_load(file) + yaml_data["train"] = "train/images" + yaml_data["val"] = "valid/images" + with open(path, "w") as file: + yaml.safe_dump(yaml_data, file) + + def evaluate(self, yaml_path, val_log_file, eval_log_file, list_ind): + """ + Model evaluation on validation results. + + Args: + yaml_path (str): YAML file path. + val_log_file (str): val_log_file path. + eval_log_file (str): eval_log_file path. + list_ind (int): Index for current dataset. + """ + skip_symbols = ["🚀", "⚠️", "💡", "❌"] + with open(yaml_path) as stream: + class_names = yaml.safe_load(stream)["names"] + with open(val_log_file, "r", encoding="utf-8") as f: + lines = f.readlines() + eval_lines = [] + for line in lines: + if any(symbol in line for symbol in skip_symbols): + continue + entries = line.split(" ") + entries = list(filter(lambda val: val != "", entries)) + entries = [e.strip("\n") for e in entries] + start_class = False + for e in entries: + if e == "all": + if "(AP)" not in entries: + if "(AR)" not in entries: + # parse all + eval = {} + eval["class"] = entries[0] + eval["images"] = entries[1] + eval["targets"] = entries[2] + eval["precision"] = entries[3] + eval["recall"] = entries[4] + eval["map50"] = entries[5] + eval["map95"] = entries[6] + eval_lines.append(eval) + + if e in class_names: + eval = {} + eval["class"] = entries[0] + eval["images"] = entries[1] + eval["targets"] = entries[2] + eval["precision"] = entries[3] + eval["recall"] = entries[4] + eval["map50"] = entries[5] + eval["map95"] = entries[6] + eval_lines.append(eval) + map_val = 0.0 + if len(eval_lines) > 1: + print("There's more dicts") + for lst in eval_lines: + if lst["class"] == "all": + map_val = lst["map50"] + else: + print("There's only one dict res") + map_val = [res["map50"] for res in eval_lines][0] + + with open(eval_log_file, "a") as f: + f.write(f"{self.ds_names[list_ind]}: {map_val}\n") + + class ProfileModels: """ ProfileModels class for profiling different models on ONNX and TensorRT.