New create_synthetic_coco_dataset function (#16742)
Signed-off-by: UltralyticsAssistant <web@ultralytics.com>
This commit is contained in:
parent
d88e57f143
commit
53e5d02a28
2 changed files with 68 additions and 6 deletions
|
|
@ -41,4 +41,8 @@ keywords: Ultralytics, data conversion, YOLO models, COCO, DOTA, YOLO bbox2segme
|
||||||
|
|
||||||
## ::: ultralytics.data.converter.yolo_bbox2segment
|
## ::: ultralytics.data.converter.yolo_bbox2segment
|
||||||
|
|
||||||
|
<br><br><hr><br>
|
||||||
|
|
||||||
|
## ::: ultralytics.data.converter.create_synthetic_coco_dataset
|
||||||
|
|
||||||
<br><br>
|
<br><br>
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,18 @@
|
||||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import random
|
||||||
|
import shutil
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
from ultralytics.utils import LOGGER, TQDM
|
from ultralytics.utils import DATASETS_DIR, LOGGER, NUM_THREADS, TQDM
|
||||||
|
from ultralytics.utils.downloads import download
|
||||||
from ultralytics.utils.files import increment_path
|
from ultralytics.utils.files import increment_path
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -588,15 +593,13 @@ def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"):
|
||||||
|
|
||||||
- im_dir
|
- im_dir
|
||||||
├─ 001.jpg
|
├─ 001.jpg
|
||||||
├─ ..
|
├─ ...
|
||||||
└─ NNN.jpg
|
└─ NNN.jpg
|
||||||
- labels
|
- labels
|
||||||
├─ 001.txt
|
├─ 001.txt
|
||||||
├─ ..
|
├─ ...
|
||||||
└─ NNN.txt
|
└─ NNN.txt
|
||||||
"""
|
"""
|
||||||
from tqdm import tqdm
|
|
||||||
|
|
||||||
from ultralytics import SAM
|
from ultralytics import SAM
|
||||||
from ultralytics.data import YOLODataset
|
from ultralytics.data import YOLODataset
|
||||||
from ultralytics.utils import LOGGER
|
from ultralytics.utils import LOGGER
|
||||||
|
|
@ -610,7 +613,7 @@ def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"):
|
||||||
|
|
||||||
LOGGER.info("Detection labels detected, generating segment labels by SAM model!")
|
LOGGER.info("Detection labels detected, generating segment labels by SAM model!")
|
||||||
sam_model = SAM(sam_model)
|
sam_model = SAM(sam_model)
|
||||||
for label in tqdm(dataset.labels, total=len(dataset.labels), desc="Generating segment labels"):
|
for label in TQDM(dataset.labels, total=len(dataset.labels), desc="Generating segment labels"):
|
||||||
h, w = label["shape"]
|
h, w = label["shape"]
|
||||||
boxes = label["bboxes"]
|
boxes = label["bboxes"]
|
||||||
if len(boxes) == 0: # skip empty labels
|
if len(boxes) == 0: # skip empty labels
|
||||||
|
|
@ -635,3 +638,58 @@ def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"):
|
||||||
with open(txt_file, "a") as f:
|
with open(txt_file, "a") as f:
|
||||||
f.writelines(text + "\n" for text in texts)
|
f.writelines(text + "\n" for text in texts)
|
||||||
LOGGER.info(f"Generated segment labels saved in {save_dir}")
|
LOGGER.info(f"Generated segment labels saved in {save_dir}")
|
||||||
|
|
||||||
|
|
||||||
|
def create_synthetic_coco_dataset():
|
||||||
|
"""
|
||||||
|
Creates a synthetic COCO dataset with random images and existing labels.
|
||||||
|
|
||||||
|
This function downloads COCO labels, creates synthetic images for train2017 and val2017 subsets, and organizes
|
||||||
|
them in the COCO dataset structure. It uses multithreading to generate images efficiently.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
>>> create_synthetic_coco_dataset()
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
- Requires internet connection to download label files.
|
||||||
|
- Generates random RGB images of varying sizes (480x480 to 640x640 pixels).
|
||||||
|
- Existing test2017 directory is removed as it's not needed.
|
||||||
|
- If label directories don't exist, image creation for that subset is skipped.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def create_synthetic_image(image_file):
|
||||||
|
"""Generates synthetic images with random sizes and colors for dataset augmentation or testing purposes."""
|
||||||
|
if not image_file.exists():
|
||||||
|
size = (random.randint(480, 640), random.randint(480, 640))
|
||||||
|
Image.new(
|
||||||
|
"RGB",
|
||||||
|
size=size,
|
||||||
|
color=(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)),
|
||||||
|
).save(image_file)
|
||||||
|
|
||||||
|
# Download labels
|
||||||
|
dir = DATASETS_DIR / "coco"
|
||||||
|
url = "https://github.com/ultralytics/assets/releases/download/v0.0.0/"
|
||||||
|
label_zip = "coco2017labels-segments.zip"
|
||||||
|
download([url + label_zip], dir=dir.parent)
|
||||||
|
|
||||||
|
# Create synthetic images
|
||||||
|
shutil.rmtree(dir / "labels" / "test2017", ignore_errors=True) # Remove test2017 directory as not needed
|
||||||
|
with ThreadPoolExecutor(max_workers=NUM_THREADS) as executor:
|
||||||
|
for subset in ["train2017", "val2017"]:
|
||||||
|
subset_dir = dir / "images" / subset
|
||||||
|
subset_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
label_dir = dir / "labels" / subset
|
||||||
|
if label_dir.exists():
|
||||||
|
label_files = list(label_dir.glob("*.txt"))
|
||||||
|
image_files = [subset_dir / f"{label_file.stem}.jpg" for label_file in label_files]
|
||||||
|
|
||||||
|
# Submit all tasks
|
||||||
|
futures = [executor.submit(create_synthetic_image, image_file) for image_file in image_files]
|
||||||
|
for _ in TQDM(as_completed(futures), total=len(futures), desc=f"Generating images for {subset}"):
|
||||||
|
pass # The actual work is done in the background
|
||||||
|
else:
|
||||||
|
print(f"Warning: Label directory {label_dir} does not exist. Skipping image creation for {subset}.")
|
||||||
|
|
||||||
|
print("Synthetic COCO dataset created successfully.")
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue