ultralytics 8.1.39 add YOLO-World training (#9268)
Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: UltralyticsAssistant <web@ultralytics.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
This commit is contained in:
parent
18036908d4
commit
e9187c1296
34 changed files with 2166 additions and 100 deletions
|
|
@ -1,15 +1,31 @@
|
|||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
from .base import BaseDataset
|
||||
from .build import build_dataloader, build_yolo_dataset, load_inference_source
|
||||
from .dataset import ClassificationDataset, SemanticDataset, YOLODataset
|
||||
from .build import (
|
||||
build_dataloader,
|
||||
build_yolo_dataset,
|
||||
build_grounding,
|
||||
load_inference_source,
|
||||
)
|
||||
from .dataset import (
|
||||
ClassificationDataset,
|
||||
SemanticDataset,
|
||||
YOLODataset,
|
||||
YOLOMultiModalDataset,
|
||||
GroundingDataset,
|
||||
YOLOConcatDataset,
|
||||
)
|
||||
|
||||
__all__ = (
|
||||
"BaseDataset",
|
||||
"ClassificationDataset",
|
||||
"SemanticDataset",
|
||||
"YOLODataset",
|
||||
"YOLOMultiModalDataset",
|
||||
"YOLOConcatDataset",
|
||||
"GroundingDataset",
|
||||
"build_yolo_dataset",
|
||||
"build_grounding",
|
||||
"build_dataloader",
|
||||
"load_inference_source",
|
||||
)
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
import math
|
||||
import random
|
||||
from copy import deepcopy
|
||||
from typing import Tuple, Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
|
@ -66,7 +67,7 @@ class Compose:
|
|||
|
||||
def __init__(self, transforms):
|
||||
"""Initializes the Compose object with a list of transforms."""
|
||||
self.transforms = transforms
|
||||
self.transforms = transforms if isinstance(transforms, list) else [transforms]
|
||||
|
||||
def __call__(self, data):
|
||||
"""Applies a series of transformations to input data."""
|
||||
|
|
@ -78,6 +79,29 @@ class Compose:
|
|||
"""Appends a new transform to the existing list of transforms."""
|
||||
self.transforms.append(transform)
|
||||
|
||||
def insert(self, index, transform):
|
||||
"""Inserts a new transform to the existing list of transforms."""
|
||||
self.transforms.insert(index, transform)
|
||||
|
||||
def __getitem__(self, index: Union[list, int]) -> "Compose":
|
||||
"""Retrieve a specific transform or a set of transforms using indexing."""
|
||||
assert isinstance(index, (int, list)), f"The indices should be either list or int type but got {type(index)}"
|
||||
index = [index] if isinstance(index, int) else index
|
||||
return Compose([self.transforms[i] for i in index])
|
||||
|
||||
def __setitem__(self, index: Union[list, int], value: Union[list, int]) -> None:
|
||||
"""Retrieve a specific transform or a set of transforms using indexing."""
|
||||
assert isinstance(index, (int, list)), f"The indices should be either list or int type but got {type(index)}"
|
||||
if isinstance(index, list):
|
||||
assert isinstance(
|
||||
value, list
|
||||
), f"The indices should be the same type as values, but got {type(index)} and {type(value)}"
|
||||
if isinstance(index, int):
|
||||
index, value = [index], [value]
|
||||
for i, v in zip(index, value):
|
||||
assert i < len(self.transforms), f"list index {i} out of range {len(self.transforms)}."
|
||||
self.transforms[i] = v
|
||||
|
||||
def tolist(self):
|
||||
"""Converts the list of transforms to a standard Python list."""
|
||||
return self.transforms
|
||||
|
|
@ -118,6 +142,8 @@ class BaseMixTransform:
|
|||
mix_labels[i] = self.pre_transform(data)
|
||||
labels["mix_labels"] = mix_labels
|
||||
|
||||
# Update cls and texts
|
||||
labels = self._update_label_text(labels)
|
||||
# Mosaic or MixUp
|
||||
labels = self._mix_transform(labels)
|
||||
labels.pop("mix_labels", None)
|
||||
|
|
@ -131,6 +157,22 @@ class BaseMixTransform:
|
|||
"""Gets a list of shuffled indexes for mosaic augmentation."""
|
||||
raise NotImplementedError
|
||||
|
||||
def _update_label_text(self, labels):
|
||||
"""Update label text."""
|
||||
if "texts" not in labels:
|
||||
return labels
|
||||
|
||||
mix_texts = sum([labels["texts"]] + [x["texts"] for x in labels["mix_labels"]], [])
|
||||
mix_texts = list({tuple(x) for x in mix_texts})
|
||||
text2id = {text: i for i, text in enumerate(mix_texts)}
|
||||
|
||||
for label in [labels] + labels["mix_labels"]:
|
||||
for i, l in enumerate(label["cls"].squeeze(-1).tolist()):
|
||||
text = label["texts"][int(l)]
|
||||
label["cls"][i] = text2id[tuple(text)]
|
||||
label["texts"] = mix_texts
|
||||
return labels
|
||||
|
||||
|
||||
class Mosaic(BaseMixTransform):
|
||||
"""
|
||||
|
|
@ -320,6 +362,8 @@ class Mosaic(BaseMixTransform):
|
|||
final_labels["instances"].clip(imgsz, imgsz)
|
||||
good = final_labels["instances"].remove_zero_area_boxes()
|
||||
final_labels["cls"] = final_labels["cls"][good]
|
||||
if "texts" in mosaic_labels[0]:
|
||||
final_labels["texts"] = mosaic_labels[0]["texts"]
|
||||
return final_labels
|
||||
|
||||
|
||||
|
|
@ -970,6 +1014,83 @@ class Format:
|
|||
return masks, instances, cls
|
||||
|
||||
|
||||
class RandomLoadText:
|
||||
"""
|
||||
Randomly sample positive texts and negative texts and update the class indices accordingly to the number of samples.
|
||||
|
||||
Attributes:
|
||||
prompt_format (str): Format for prompt. Default is '{}'.
|
||||
neg_samples (tuple[int]): A ranger to randomly sample negative texts, Default is (80, 80).
|
||||
max_samples (int): The max number of different text samples in one image, Default is 80.
|
||||
padding (bool): Whether to pad texts to max_samples. Default is False.
|
||||
padding_value (str): The padding text. Default is "".
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
prompt_format: str = "{}",
|
||||
neg_samples: Tuple[int, int] = (80, 80),
|
||||
max_samples: int = 80,
|
||||
padding: bool = False,
|
||||
padding_value: str = "",
|
||||
) -> None:
|
||||
"""Initializes the RandomLoadText class with given parameters."""
|
||||
self.prompt_format = prompt_format
|
||||
self.neg_samples = neg_samples
|
||||
self.max_samples = max_samples
|
||||
self.padding = padding
|
||||
self.padding_value = padding_value
|
||||
|
||||
def __call__(self, labels: dict) -> dict:
|
||||
"""Return updated classes and texts."""
|
||||
assert "texts" in labels, "No texts found in labels."
|
||||
class_texts = labels["texts"]
|
||||
num_classes = len(class_texts)
|
||||
cls = np.asarray(labels.pop("cls"), dtype=int)
|
||||
pos_labels = np.unique(cls).tolist()
|
||||
|
||||
if len(pos_labels) > self.max_samples:
|
||||
pos_labels = set(random.sample(pos_labels, k=self.max_samples))
|
||||
|
||||
neg_samples = min(min(num_classes, self.max_samples) - len(pos_labels), random.randint(*self.neg_samples))
|
||||
neg_labels = []
|
||||
for i in range(num_classes):
|
||||
if i not in pos_labels:
|
||||
neg_labels.append(i)
|
||||
neg_labels = random.sample(neg_labels, k=neg_samples)
|
||||
|
||||
sampled_labels = pos_labels + neg_labels
|
||||
random.shuffle(sampled_labels)
|
||||
|
||||
label2ids = {label: i for i, label in enumerate(sampled_labels)}
|
||||
valid_idx = np.zeros(len(labels["instances"]), dtype=bool)
|
||||
new_cls = []
|
||||
for i, label in enumerate(cls.squeeze(-1).tolist()):
|
||||
if label not in label2ids:
|
||||
continue
|
||||
valid_idx[i] = True
|
||||
new_cls.append([label2ids[label]])
|
||||
labels["instances"] = labels["instances"][valid_idx]
|
||||
labels["cls"] = np.array(new_cls)
|
||||
|
||||
# Randomly select one prompt when there's more than one prompts
|
||||
texts = []
|
||||
for label in sampled_labels:
|
||||
prompts = class_texts[label]
|
||||
assert len(prompts) > 0
|
||||
prompt = self.prompt_format.format(prompts[random.randrange(len(prompts))])
|
||||
texts.append(prompt)
|
||||
|
||||
if self.padding:
|
||||
valid_labels = len(pos_labels) + len(neg_labels)
|
||||
num_padding = self.max_samples - valid_labels
|
||||
if num_padding > 0:
|
||||
texts += [self.padding_value] * num_padding
|
||||
|
||||
labels["texts"] = texts
|
||||
return labels
|
||||
|
||||
|
||||
def v8_transforms(dataset, imgsz, hyp, stretch=False):
|
||||
"""Convert images to a size suitable for YOLOv8 training."""
|
||||
pre_transform = Compose(
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ from ultralytics.data.loaders import (
|
|||
from ultralytics.data.utils import IMG_FORMATS, VID_FORMATS
|
||||
from ultralytics.utils import RANK, colorstr
|
||||
from ultralytics.utils.checks import check_file
|
||||
from .dataset import YOLODataset
|
||||
from .dataset import YOLODataset, YOLOMultiModalDataset, GroundingDataset
|
||||
from .utils import PIN_MEMORY
|
||||
|
||||
|
||||
|
|
@ -82,9 +82,10 @@ def seed_worker(worker_id): # noqa
|
|||
random.seed(worker_seed)
|
||||
|
||||
|
||||
def build_yolo_dataset(cfg, img_path, batch, data, mode="train", rect=False, stride=32):
|
||||
def build_yolo_dataset(cfg, img_path, batch, data, mode="train", rect=False, stride=32, multi_modal=False):
|
||||
"""Build YOLO Dataset."""
|
||||
return YOLODataset(
|
||||
dataset = YOLOMultiModalDataset if multi_modal else YOLODataset
|
||||
return dataset(
|
||||
img_path=img_path,
|
||||
imgsz=cfg.imgsz,
|
||||
batch_size=batch,
|
||||
|
|
@ -103,6 +104,27 @@ def build_yolo_dataset(cfg, img_path, batch, data, mode="train", rect=False, str
|
|||
)
|
||||
|
||||
|
||||
def build_grounding(cfg, img_path, json_file, batch, mode="train", rect=False, stride=32):
|
||||
"""Build YOLO Dataset."""
|
||||
return GroundingDataset(
|
||||
img_path=img_path,
|
||||
json_file=json_file,
|
||||
imgsz=cfg.imgsz,
|
||||
batch_size=batch,
|
||||
augment=mode == "train", # augmentation
|
||||
hyp=cfg, # TODO: probably add a get_hyps_from_cfg function
|
||||
rect=cfg.rect or rect, # rectangular batches
|
||||
cache=cfg.cache or None,
|
||||
single_cls=cfg.single_cls or False,
|
||||
stride=int(stride),
|
||||
pad=0.0 if mode == "train" else 0.5,
|
||||
prefix=colorstr(f"{mode}: "),
|
||||
task=cfg.task,
|
||||
classes=cfg.classes,
|
||||
fraction=cfg.fraction if mode == "train" else 1.0,
|
||||
)
|
||||
|
||||
|
||||
def build_dataloader(dataset, batch, workers, shuffle=True, rank=-1):
|
||||
"""Return an InfiniteDataLoader or DataLoader for training or validation set."""
|
||||
batch = min(batch, len(dataset))
|
||||
|
|
|
|||
|
|
@ -219,6 +219,7 @@ def convert_coco(
|
|||
use_segments=False,
|
||||
use_keypoints=False,
|
||||
cls91to80=True,
|
||||
lvis=False,
|
||||
):
|
||||
"""
|
||||
Converts COCO dataset annotations to a YOLO annotation format suitable for training YOLO models.
|
||||
|
|
@ -229,12 +230,14 @@ def convert_coco(
|
|||
use_segments (bool, optional): Whether to include segmentation masks in the output.
|
||||
use_keypoints (bool, optional): Whether to include keypoint annotations in the output.
|
||||
cls91to80 (bool, optional): Whether to map 91 COCO class IDs to the corresponding 80 COCO class IDs.
|
||||
lvis (bool, optional): Whether to convert data in lvis dataset way.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from ultralytics.data.converter import convert_coco
|
||||
|
||||
convert_coco('../datasets/coco/annotations/', use_segments=True, use_keypoints=False, cls91to80=True)
|
||||
convert_coco('../datasets/lvis/annotations/', use_segments=True, use_keypoints=False, cls91to80=False, lvis=True)
|
||||
```
|
||||
|
||||
Output:
|
||||
|
|
@ -251,8 +254,14 @@ def convert_coco(
|
|||
|
||||
# Import json
|
||||
for json_file in sorted(Path(labels_dir).resolve().glob("*.json")):
|
||||
fn = Path(save_dir) / "labels" / json_file.stem.replace("instances_", "") # folder name
|
||||
lname = "" if lvis else json_file.stem.replace("instances_", "")
|
||||
fn = Path(save_dir) / "labels" / lname # folder name
|
||||
fn.mkdir(parents=True, exist_ok=True)
|
||||
if lvis:
|
||||
# NOTE: create folders for both train and val in advance,
|
||||
# since LVIS val set contains images from COCO 2017 train in addition to the COCO 2017 val split.
|
||||
(fn / "train2017").mkdir(parents=True, exist_ok=True)
|
||||
(fn / "val2017").mkdir(parents=True, exist_ok=True)
|
||||
with open(json_file) as f:
|
||||
data = json.load(f)
|
||||
|
||||
|
|
@ -263,16 +272,20 @@ def convert_coco(
|
|||
for ann in data["annotations"]:
|
||||
imgToAnns[ann["image_id"]].append(ann)
|
||||
|
||||
image_txt = []
|
||||
# Write labels file
|
||||
for img_id, anns in TQDM(imgToAnns.items(), desc=f"Annotations {json_file}"):
|
||||
img = images[f"{img_id:d}"]
|
||||
h, w, f = img["height"], img["width"], img["file_name"]
|
||||
h, w = img["height"], img["width"]
|
||||
f = str(Path(img["coco_url"]).relative_to("http://images.cocodataset.org")) if lvis else img["file_name"]
|
||||
if lvis:
|
||||
image_txt.append(str(Path("./images") / f))
|
||||
|
||||
bboxes = []
|
||||
segments = []
|
||||
keypoints = []
|
||||
for ann in anns:
|
||||
if ann["iscrowd"]:
|
||||
if ann.get("iscrowd", False):
|
||||
continue
|
||||
# The COCO box format is [top left x, top left y, width, height]
|
||||
box = np.array(ann["bbox"], dtype=np.float64)
|
||||
|
|
@ -314,7 +327,12 @@ def convert_coco(
|
|||
) # cls, box or segments
|
||||
file.write(("%g " * len(line)).rstrip() % line + "\n")
|
||||
|
||||
LOGGER.info(f"COCO data converted successfully.\nResults saved to {save_dir.resolve()}")
|
||||
if lvis:
|
||||
with open((Path(save_dir) / json_file.name.replace("lvis_v1_", "").replace(".json", ".txt")), "a") as f:
|
||||
for l in image_txt:
|
||||
f.write(f"{l}\n")
|
||||
|
||||
LOGGER.info(f"{'LVIS' if lvis else 'COCO'} data converted successfully.\nResults saved to {save_dir.resolve()}")
|
||||
|
||||
|
||||
def convert_dota_to_yolo_obb(dota_root_path: str):
|
||||
|
|
|
|||
|
|
@ -1,20 +1,41 @@
|
|||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
import contextlib
|
||||
from itertools import repeat
|
||||
from collections import defaultdict
|
||||
from multiprocessing.pool import ThreadPool
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import json
|
||||
import numpy as np
|
||||
import torch
|
||||
import torchvision
|
||||
from PIL import Image
|
||||
|
||||
from ultralytics.utils import LOCAL_RANK, NUM_THREADS, TQDM, colorstr, is_dir_writeable
|
||||
from torch.utils.data import ConcatDataset
|
||||
from ultralytics.utils import LOCAL_RANK, NUM_THREADS, TQDM, colorstr
|
||||
from ultralytics.utils.ops import resample_segments
|
||||
from .augment import Compose, Format, Instances, LetterBox, classify_augmentations, classify_transforms, v8_transforms
|
||||
from .augment import (
|
||||
Compose,
|
||||
Format,
|
||||
Instances,
|
||||
LetterBox,
|
||||
RandomLoadText,
|
||||
classify_augmentations,
|
||||
classify_transforms,
|
||||
v8_transforms,
|
||||
)
|
||||
from .base import BaseDataset
|
||||
from .utils import HELP_URL, LOGGER, get_hash, img2label_paths, verify_image, verify_image_label
|
||||
from .utils import (
|
||||
HELP_URL,
|
||||
LOGGER,
|
||||
get_hash,
|
||||
img2label_paths,
|
||||
verify_image,
|
||||
verify_image_label,
|
||||
load_dataset_cache_file,
|
||||
save_dataset_cache_file,
|
||||
)
|
||||
|
||||
# Ultralytics dataset *.cache version, >= 1.0.0 for YOLOv8
|
||||
DATASET_CACHE_VERSION = "1.0.3"
|
||||
|
|
@ -105,7 +126,7 @@ class YOLODataset(BaseDataset):
|
|||
x["hash"] = get_hash(self.label_files + self.im_files)
|
||||
x["results"] = nf, nm, ne, nc, len(self.im_files)
|
||||
x["msgs"] = msgs # warnings
|
||||
save_dataset_cache_file(self.prefix, path, x)
|
||||
save_dataset_cache_file(self.prefix, path, x, DATASET_CACHE_VERSION)
|
||||
return x
|
||||
|
||||
def get_labels(self):
|
||||
|
|
@ -339,31 +360,125 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
|
|||
x["hash"] = get_hash([x[0] for x in self.samples])
|
||||
x["results"] = nf, nc, len(samples), samples
|
||||
x["msgs"] = msgs # warnings
|
||||
save_dataset_cache_file(self.prefix, path, x)
|
||||
save_dataset_cache_file(self.prefix, path, x, DATASET_CACHE_VERSION)
|
||||
return samples
|
||||
|
||||
|
||||
def load_dataset_cache_file(path):
|
||||
"""Load an Ultralytics *.cache dictionary from path."""
|
||||
import gc
|
||||
class YOLOMultiModalDataset(YOLODataset):
|
||||
"""
|
||||
Dataset class for loading object detection and/or segmentation labels in YOLO format.
|
||||
|
||||
gc.disable() # reduce pickle load time https://github.com/ultralytics/ultralytics/pull/1585
|
||||
cache = np.load(str(path), allow_pickle=True).item() # load dict
|
||||
gc.enable()
|
||||
return cache
|
||||
Args:
|
||||
data (dict, optional): A dataset YAML dictionary. Defaults to None.
|
||||
task (str): An explicit arg to point current task, Defaults to 'detect'.
|
||||
|
||||
Returns:
|
||||
(torch.utils.data.Dataset): A PyTorch dataset object that can be used for training an object detection model.
|
||||
"""
|
||||
|
||||
def __init__(self, *args, data=None, task="detect", **kwargs):
|
||||
"""Initializes a dataset object for object detection tasks with optional specifications."""
|
||||
super().__init__(*args, data=data, task=task, **kwargs)
|
||||
|
||||
def update_labels_info(self, label):
|
||||
"""Add texts information for multi modal model training."""
|
||||
labels = super().update_labels_info(label)
|
||||
# NOTE: some categories are concatenated with its synonyms by `/`.
|
||||
labels["texts"] = [v.split("/") for _, v in self.data["names"].items()]
|
||||
return labels
|
||||
|
||||
def build_transforms(self, hyp=None):
|
||||
"""Enhances data transformations with optional text augmentation for multi-modal training."""
|
||||
transforms = super().build_transforms(hyp)
|
||||
if self.augment:
|
||||
# NOTE: hard-coded the args for now.
|
||||
transforms.insert(-1, RandomLoadText(max_samples=min(self.data["nc"], 80), padding=True))
|
||||
return transforms
|
||||
|
||||
|
||||
def save_dataset_cache_file(prefix, path, x):
|
||||
"""Save an Ultralytics dataset *.cache dictionary x to path."""
|
||||
x["version"] = DATASET_CACHE_VERSION # add cache version
|
||||
if is_dir_writeable(path.parent):
|
||||
if path.exists():
|
||||
path.unlink() # remove *.cache file if exists
|
||||
np.save(str(path), x) # save cache for next time
|
||||
path.with_suffix(".cache.npy").rename(path) # remove .npy suffix
|
||||
LOGGER.info(f"{prefix}New cache created: {path}")
|
||||
else:
|
||||
LOGGER.warning(f"{prefix}WARNING ⚠️ Cache directory {path.parent} is not writeable, cache not saved.")
|
||||
class GroundingDataset(YOLODataset):
|
||||
def __init__(self, *args, task="detect", json_file, **kwargs):
|
||||
"""Initializes a GroundingDataset for object detection, loading annotations from a specified JSON file."""
|
||||
assert task == "detect", "`GroundingDataset` only support `detect` task for now!"
|
||||
self.json_file = json_file
|
||||
super().__init__(*args, task=task, data={}, **kwargs)
|
||||
|
||||
def get_img_files(self, img_path):
|
||||
"""The image files would be read in `get_labels` function, return empty list here."""
|
||||
return []
|
||||
|
||||
def get_labels(self):
|
||||
"""Loads annotations from a JSON file, filters, and normalizes bounding boxes for each image."""
|
||||
labels = []
|
||||
LOGGER.info("Loading annotation file...")
|
||||
with open(self.json_file, "r") as f:
|
||||
annotations = json.load(f)
|
||||
images = {f'{x["id"]:d}': x for x in annotations["images"]}
|
||||
imgToAnns = defaultdict(list)
|
||||
for ann in annotations["annotations"]:
|
||||
imgToAnns[ann["image_id"]].append(ann)
|
||||
for img_id, anns in TQDM(imgToAnns.items(), desc=f"Reading annotations {self.json_file}"):
|
||||
img = images[f"{img_id:d}"]
|
||||
h, w, f = img["height"], img["width"], img["file_name"]
|
||||
im_file = Path(self.img_path) / f
|
||||
if not im_file.exists():
|
||||
continue
|
||||
self.im_files.append(str(im_file))
|
||||
bboxes = []
|
||||
cat2id = {}
|
||||
texts = []
|
||||
for ann in anns:
|
||||
if ann["iscrowd"]:
|
||||
continue
|
||||
box = np.array(ann["bbox"], dtype=np.float32)
|
||||
box[:2] += box[2:] / 2
|
||||
box[[0, 2]] /= float(w)
|
||||
box[[1, 3]] /= float(h)
|
||||
if box[2] <= 0 or box[3] <= 0:
|
||||
continue
|
||||
|
||||
cat_name = " ".join([img["caption"][t[0] : t[1]] for t in ann["tokens_positive"]])
|
||||
if cat_name not in cat2id:
|
||||
cat2id[cat_name] = len(cat2id)
|
||||
texts.append([cat_name])
|
||||
cls = cat2id[cat_name] # class
|
||||
box = [cls] + box.tolist()
|
||||
if box not in bboxes:
|
||||
bboxes.append(box)
|
||||
lb = np.array(bboxes, dtype=np.float32) if len(bboxes) else np.zeros((0, 5), dtype=np.float32)
|
||||
labels.append(
|
||||
dict(
|
||||
im_file=im_file,
|
||||
shape=(h, w),
|
||||
cls=lb[:, 0:1], # n, 1
|
||||
bboxes=lb[:, 1:], # n, 4
|
||||
normalized=True,
|
||||
bbox_format="xywh",
|
||||
texts=texts,
|
||||
)
|
||||
)
|
||||
return labels
|
||||
|
||||
def build_transforms(self, hyp=None):
|
||||
"""Configures augmentations for training with optional text loading; `hyp` adjusts augmentation intensity."""
|
||||
transforms = super().build_transforms(hyp)
|
||||
if self.augment:
|
||||
# NOTE: hard-coded the args for now.
|
||||
transforms.insert(-1, RandomLoadText(max_samples=80, padding=True))
|
||||
return transforms
|
||||
|
||||
|
||||
class YOLOConcatDataset(ConcatDataset):
|
||||
"""
|
||||
Dataset as a concatenation of multiple datasets.
|
||||
|
||||
This class is useful to assemble different existing datasets.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def collate_fn(batch):
|
||||
"""Collates data samples into batches."""
|
||||
return YOLODataset.collate_fn(batch)
|
||||
|
||||
|
||||
# TODO: support semantic segmentation
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ from ultralytics.utils import (
|
|||
emojis,
|
||||
yaml_load,
|
||||
yaml_save,
|
||||
is_dir_writeable,
|
||||
)
|
||||
from ultralytics.utils.checks import check_file, check_font, is_ascii
|
||||
from ultralytics.utils.downloads import download, safe_download, unzip_file
|
||||
|
|
@ -303,7 +304,7 @@ def check_det_dataset(dataset, autodownload=True):
|
|||
|
||||
# Set paths
|
||||
data["path"] = path # download scripts
|
||||
for k in "train", "val", "test":
|
||||
for k in "train", "val", "test", "minival":
|
||||
if data.get(k): # prepend path
|
||||
if isinstance(data[k], str):
|
||||
x = (path / data[k]).resolve()
|
||||
|
|
@ -649,3 +650,26 @@ def autosplit(path=DATASETS_DIR / "coco8/images", weights=(0.9, 0.1, 0.0), annot
|
|||
if not annotated_only or Path(img2label_paths([str(img)])[0]).exists(): # check label
|
||||
with open(path.parent / txt[i], "a") as f:
|
||||
f.write(f"./{img.relative_to(path.parent).as_posix()}" + "\n") # add image to txt file
|
||||
|
||||
|
||||
def load_dataset_cache_file(path):
|
||||
"""Load an Ultralytics *.cache dictionary from path."""
|
||||
import gc
|
||||
|
||||
gc.disable() # reduce pickle load time https://github.com/ultralytics/ultralytics/pull/1585
|
||||
cache = np.load(str(path), allow_pickle=True).item() # load dict
|
||||
gc.enable()
|
||||
return cache
|
||||
|
||||
|
||||
def save_dataset_cache_file(prefix, path, x, version):
|
||||
"""Save an Ultralytics dataset *.cache dictionary x to path."""
|
||||
x["version"] = version # add cache version
|
||||
if is_dir_writeable(path.parent):
|
||||
if path.exists():
|
||||
path.unlink() # remove *.cache file if exists
|
||||
np.save(str(path), x) # save cache for next time
|
||||
path.with_suffix(".cache.npy").rename(path) # remove .npy suffix
|
||||
LOGGER.info(f"{prefix}New cache created: {path}")
|
||||
else:
|
||||
LOGGER.warning(f"{prefix}WARNING ⚠️ Cache directory {path.parent} is not writeable, cache not saved.")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue