ultralytics 8.0.235 YOLOv8 OBB train, val, predict and export (#4499)
Co-authored-by: Yash Khurana <ykhurana6@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Swamita Gupta <swamita2001@gmail.com> Co-authored-by: Ayush Chaurasia <ayush.chaurarsia@gmail.com> Co-authored-by: Laughing-q <1185102784@qq.com> Co-authored-by: Laughing <61612323+Laughing-q@users.noreply.github.com> Co-authored-by: Laughing-q <1182102784@qq.com>
This commit is contained in:
parent
f702b34a50
commit
072291bc78
52 changed files with 2090 additions and 524 deletions
|
|
@ -13,7 +13,7 @@ from ultralytics.utils import LOGGER, colorstr
|
|||
from ultralytics.utils.checks import check_version
|
||||
from ultralytics.utils.instance import Instances
|
||||
from ultralytics.utils.metrics import bbox_ioa
|
||||
from ultralytics.utils.ops import segment2box
|
||||
from ultralytics.utils.ops import segment2box, xyxyxyxy2xywhr
|
||||
from ultralytics.utils.torch_utils import TORCHVISION_0_10, TORCHVISION_0_11, TORCHVISION_0_13
|
||||
|
||||
from .utils import polygons2masks, polygons2masks_overlap
|
||||
|
|
@ -485,6 +485,8 @@ class RandomPerspective:
|
|||
xy = xy[:, :2] / xy[:, 2:3]
|
||||
segments = xy.reshape(n, -1, 2)
|
||||
bboxes = np.stack([segment2box(xy, self.size[0], self.size[1]) for xy in segments], 0)
|
||||
segments[..., 0] = segments[..., 0].clip(bboxes[:, 0:1], bboxes[:, 2:3])
|
||||
segments[..., 1] = segments[..., 1].clip(bboxes[:, 1:2], bboxes[:, 3:4])
|
||||
return bboxes, segments
|
||||
|
||||
def apply_keypoints(self, keypoints, M):
|
||||
|
|
@ -891,6 +893,7 @@ class Format:
|
|||
normalize=True,
|
||||
return_mask=False,
|
||||
return_keypoint=False,
|
||||
return_obb=False,
|
||||
mask_ratio=4,
|
||||
mask_overlap=True,
|
||||
batch_idx=True):
|
||||
|
|
@ -899,6 +902,7 @@ class Format:
|
|||
self.normalize = normalize
|
||||
self.return_mask = return_mask # set False when training detection only
|
||||
self.return_keypoint = return_keypoint
|
||||
self.return_obb = return_obb
|
||||
self.mask_ratio = mask_ratio
|
||||
self.mask_overlap = mask_overlap
|
||||
self.batch_idx = batch_idx # keep the batch indexes
|
||||
|
|
@ -928,6 +932,9 @@ class Format:
|
|||
labels['bboxes'] = torch.from_numpy(instances.bboxes) if nl else torch.zeros((nl, 4))
|
||||
if self.return_keypoint:
|
||||
labels['keypoints'] = torch.from_numpy(instances.keypoints)
|
||||
if self.return_obb:
|
||||
labels['bboxes'] = xyxyxyxy2xywhr(torch.from_numpy(instances.segments)) if len(
|
||||
instances.segments) else torch.zeros((0, 5))
|
||||
# Then we can use collate_fn
|
||||
if self.batch_idx:
|
||||
labels['batch_idx'] = torch.zeros(nl)
|
||||
|
|
|
|||
|
|
@ -89,8 +89,7 @@ def build_yolo_dataset(cfg, img_path, batch, data, mode='train', rect=False, str
|
|||
stride=int(stride),
|
||||
pad=0.0 if mode == 'train' else 0.5,
|
||||
prefix=colorstr(f'{mode}: '),
|
||||
use_segments=cfg.task == 'segment',
|
||||
use_keypoints=cfg.task == 'pose',
|
||||
task=cfg.task,
|
||||
classes=cfg.classes,
|
||||
data=data,
|
||||
fraction=cfg.fraction if mode == 'train' else 1.0)
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import torchvision
|
|||
from PIL import Image
|
||||
|
||||
from ultralytics.utils import LOCAL_RANK, NUM_THREADS, TQDM, colorstr, is_dir_writeable
|
||||
from ultralytics.utils.ops import resample_segments
|
||||
|
||||
from .augment import Compose, Format, Instances, LetterBox, classify_augmentations, classify_transforms, v8_transforms
|
||||
from .base import BaseDataset
|
||||
|
|
@ -26,17 +27,17 @@ class YOLODataset(BaseDataset):
|
|||
|
||||
Args:
|
||||
data (dict, optional): A dataset YAML dictionary. Defaults to None.
|
||||
use_segments (bool, optional): If True, segmentation masks are used as labels. Defaults to False.
|
||||
use_keypoints (bool, optional): If True, keypoints are used as labels. Defaults to False.
|
||||
task (str): An explicit arg to point current task, Defaults to 'detect'.
|
||||
|
||||
Returns:
|
||||
(torch.utils.data.Dataset): A PyTorch dataset object that can be used for training an object detection model.
|
||||
"""
|
||||
|
||||
def __init__(self, *args, data=None, use_segments=False, use_keypoints=False, **kwargs):
|
||||
def __init__(self, *args, data=None, task='detect', **kwargs):
|
||||
"""Initializes the YOLODataset with optional configurations for segments and keypoints."""
|
||||
self.use_segments = use_segments
|
||||
self.use_keypoints = use_keypoints
|
||||
self.use_segments = task == 'segment'
|
||||
self.use_keypoints = task == 'pose'
|
||||
self.use_obb = task == 'obb'
|
||||
self.data = data
|
||||
assert not (self.use_segments and self.use_keypoints), 'Can not use both segments and keypoints.'
|
||||
super().__init__(*args, **kwargs)
|
||||
|
|
@ -148,6 +149,7 @@ class YOLODataset(BaseDataset):
|
|||
normalize=True,
|
||||
return_mask=self.use_segments,
|
||||
return_keypoint=self.use_keypoints,
|
||||
return_obb=self.use_obb,
|
||||
batch_idx=True,
|
||||
mask_ratio=hyp.mask_ratio,
|
||||
mask_overlap=hyp.overlap_mask))
|
||||
|
|
@ -165,10 +167,19 @@ class YOLODataset(BaseDataset):
|
|||
# NOTE: cls is not with bboxes now, classification and semantic segmentation need an independent cls label
|
||||
# We can make it also support classification and semantic segmentation by add or remove some dict keys there.
|
||||
bboxes = label.pop('bboxes')
|
||||
segments = label.pop('segments')
|
||||
segments = label.pop('segments', [])
|
||||
keypoints = label.pop('keypoints', None)
|
||||
bbox_format = label.pop('bbox_format')
|
||||
normalized = label.pop('normalized')
|
||||
|
||||
# NOTE: do NOT resample oriented boxes
|
||||
segment_resamples = 100 if self.use_obb else 1000
|
||||
if len(segments) > 0:
|
||||
# list[np.array(1000, 2)] * num_samples
|
||||
# (N, 1000, 2)
|
||||
segments = np.stack(resample_segments(segments, n=segment_resamples), axis=0)
|
||||
else:
|
||||
segments = np.zeros((0, segment_resamples, 2), dtype=np.float32)
|
||||
label['instances'] = Instances(bboxes, segments, keypoints, bbox_format=bbox_format, normalized=normalized)
|
||||
return label
|
||||
|
||||
|
|
@ -182,7 +193,7 @@ class YOLODataset(BaseDataset):
|
|||
value = values[i]
|
||||
if k == 'img':
|
||||
value = torch.stack(value, 0)
|
||||
if k in ['masks', 'keypoints', 'bboxes', 'cls']:
|
||||
if k in ['masks', 'keypoints', 'bboxes', 'cls', 'segments', 'obb']:
|
||||
value = torch.cat(value, 0)
|
||||
new_batch[k] = value
|
||||
new_batch['batch_idx'] = list(new_batch['batch_idx'])
|
||||
|
|
|
|||
288
ultralytics/data/split_dota.py
Normal file
288
ultralytics/data/split_dota.py
Normal file
|
|
@ -0,0 +1,288 @@
|
|||
import itertools
|
||||
import os
|
||||
from glob import glob
|
||||
from math import ceil
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from tqdm import tqdm
|
||||
|
||||
from ultralytics.data.utils import exif_size, img2label_paths
|
||||
from ultralytics.utils.checks import check_requirements
|
||||
|
||||
check_requirements('shapely')
|
||||
from shapely.geometry import Polygon
|
||||
|
||||
|
||||
def bbox_iof(polygon1, bbox2, eps=1e-6):
|
||||
"""
|
||||
Calculate iofs between bbox1 and bbox2.
|
||||
|
||||
Args:
|
||||
polygon1 (np.ndarray): Polygon coordinates, (n, 8).
|
||||
bbox2 (np.ndarray): Bounding boxes, (n ,4).
|
||||
"""
|
||||
polygon1 = polygon1.reshape(-1, 4, 2)
|
||||
lt_point = np.min(polygon1, axis=-2)
|
||||
rb_point = np.max(polygon1, axis=-2)
|
||||
bbox1 = np.concatenate([lt_point, rb_point], axis=-1)
|
||||
|
||||
lt = np.maximum(bbox1[:, None, :2], bbox2[..., :2])
|
||||
rb = np.minimum(bbox1[:, None, 2:], bbox2[..., 2:])
|
||||
wh = np.clip(rb - lt, 0, np.inf)
|
||||
h_overlaps = wh[..., 0] * wh[..., 1]
|
||||
|
||||
l, t, r, b = (bbox2[..., i] for i in range(4))
|
||||
polygon2 = np.stack([l, t, r, t, r, b, l, b], axis=-1).reshape(-1, 4, 2)
|
||||
|
||||
sg_polys1 = [Polygon(p) for p in polygon1]
|
||||
sg_polys2 = [Polygon(p) for p in polygon2]
|
||||
overlaps = np.zeros(h_overlaps.shape)
|
||||
for p in zip(*np.nonzero(h_overlaps)):
|
||||
overlaps[p] = sg_polys1[p[0]].intersection(sg_polys2[p[-1]]).area
|
||||
unions = np.array([p.area for p in sg_polys1], dtype=np.float32)
|
||||
unions = unions[..., None]
|
||||
|
||||
unions = np.clip(unions, eps, np.inf)
|
||||
outputs = overlaps / unions
|
||||
if outputs.ndim == 1:
|
||||
outputs = outputs[..., None]
|
||||
return outputs
|
||||
|
||||
|
||||
def load_yolo_dota(data_root, split='train'):
|
||||
"""Load DOTA dataset.
|
||||
Args:
|
||||
data_root (str): Data root.
|
||||
split (str): The split data set, could be train or val.
|
||||
Notes:
|
||||
The directory structure assumed for the DOTA dataset:
|
||||
- data_root
|
||||
- images
|
||||
- train
|
||||
- val
|
||||
- labels
|
||||
- train
|
||||
- val
|
||||
"""
|
||||
assert split in ['train', 'val']
|
||||
im_dir = os.path.join(data_root, f'images/{split}')
|
||||
assert Path(im_dir).exists(), f"Can't find {im_dir}, please check your data root."
|
||||
im_files = glob(os.path.join(data_root, f'images/{split}/*'))
|
||||
lb_files = img2label_paths(im_files)
|
||||
annos = []
|
||||
for im_file, lb_file in zip(im_files, lb_files):
|
||||
w, h = exif_size(Image.open(im_file))
|
||||
with open(lb_file) as f:
|
||||
lb = [x.split() for x in f.read().strip().splitlines() if len(x)]
|
||||
lb = np.array(lb, dtype=np.float32)
|
||||
annos.append(dict(ori_size=(h, w), label=lb, filepath=im_file))
|
||||
return annos
|
||||
|
||||
|
||||
def get_windows(im_size, crop_sizes=[1024], gaps=[200], im_rate_thr=0.6, eps=0.01):
|
||||
"""
|
||||
Get the coordinates of windows.
|
||||
|
||||
Args:
|
||||
im_size (tuple): Original image size, (h, w).
|
||||
crop_sizes (List(int)): Crop size of windows.
|
||||
gaps (List(int)): Gap between each crops.
|
||||
im_rate_thr (float): Threshold of windows areas divided by image ares.
|
||||
"""
|
||||
h, w = im_size
|
||||
windows = []
|
||||
for crop_size, gap in zip(crop_sizes, gaps):
|
||||
assert crop_size > gap, f'invaild crop_size gap pair [{crop_size} {gap}]'
|
||||
step = crop_size - gap
|
||||
|
||||
xn = 1 if w <= crop_size else ceil((w - crop_size) / step + 1)
|
||||
xs = [step * i for i in range(xn)]
|
||||
if len(xs) > 1 and xs[-1] + crop_size > w:
|
||||
xs[-1] = w - crop_size
|
||||
|
||||
yn = 1 if h <= crop_size else ceil((h - crop_size) / step + 1)
|
||||
ys = [step * i for i in range(yn)]
|
||||
if len(ys) > 1 and ys[-1] + crop_size > h:
|
||||
ys[-1] = h - crop_size
|
||||
|
||||
start = np.array(list(itertools.product(xs, ys)), dtype=np.int64)
|
||||
stop = start + crop_size
|
||||
windows.append(np.concatenate([start, stop], axis=1))
|
||||
windows = np.concatenate(windows, axis=0)
|
||||
|
||||
im_in_wins = windows.copy()
|
||||
im_in_wins[:, 0::2] = np.clip(im_in_wins[:, 0::2], 0, w)
|
||||
im_in_wins[:, 1::2] = np.clip(im_in_wins[:, 1::2], 0, h)
|
||||
im_areas = (im_in_wins[:, 2] - im_in_wins[:, 0]) * (im_in_wins[:, 3] - im_in_wins[:, 1])
|
||||
win_areas = (windows[:, 2] - windows[:, 0]) * (windows[:, 3] - windows[:, 1])
|
||||
im_rates = im_areas / win_areas
|
||||
if not (im_rates > im_rate_thr).any():
|
||||
max_rate = im_rates.max()
|
||||
im_rates[abs(im_rates - max_rate) < eps] = 1
|
||||
return windows[im_rates > im_rate_thr]
|
||||
|
||||
|
||||
def get_window_obj(anno, windows, iof_thr=0.7):
|
||||
"""Get objects for each window."""
|
||||
h, w = anno['ori_size']
|
||||
label = anno['label']
|
||||
if len(label):
|
||||
label[:, 1::2] *= w
|
||||
label[:, 2::2] *= h
|
||||
iofs = bbox_iof(label[:, 1:], windows)
|
||||
# unnormalized and misaligned coordinates
|
||||
window_anns = [(label[iofs[:, i] >= iof_thr]) for i in range(len(windows))]
|
||||
else:
|
||||
window_anns = [np.zeros((0, 9), dtype=np.float32) for _ in range(len(windows))]
|
||||
return window_anns
|
||||
|
||||
|
||||
def crop_and_save(anno, windows, window_objs, im_dir, lb_dir):
|
||||
"""Crop images and save new labels.
|
||||
Args:
|
||||
anno (dict): Annotation dict, including `filepath`, `label`, `ori_size` as its keys.
|
||||
windows (list): A list of windows coordinates.
|
||||
window_objs (list): A list of labels inside each window.
|
||||
im_dir (str): The output directory path of images.
|
||||
lb_dir (str): The output directory path of labels.
|
||||
Notes:
|
||||
The directory structure assumed for the DOTA dataset:
|
||||
- data_root
|
||||
- images
|
||||
- train
|
||||
- val
|
||||
- labels
|
||||
- train
|
||||
- val
|
||||
"""
|
||||
im = cv2.imread(anno['filepath'])
|
||||
name = Path(anno['filepath']).stem
|
||||
for i, window in enumerate(windows):
|
||||
x_start, y_start, x_stop, y_stop = window.tolist()
|
||||
new_name = name + '__' + str(x_stop - x_start) + '__' + str(x_start) + '___' + str(y_start)
|
||||
patch_im = im[y_start:y_stop, x_start:x_stop]
|
||||
ph, pw = patch_im.shape[:2]
|
||||
|
||||
cv2.imwrite(os.path.join(im_dir, f'{new_name}.jpg'), patch_im)
|
||||
label = window_objs[i]
|
||||
if len(label) == 0:
|
||||
continue
|
||||
label[:, 1::2] -= x_start
|
||||
label[:, 2::2] -= y_start
|
||||
label[:, 1::2] /= pw
|
||||
label[:, 2::2] /= ph
|
||||
|
||||
with open(os.path.join(lb_dir, f'{new_name}.txt'), 'w') as f:
|
||||
for lb in label:
|
||||
formatted_coords = ['{:.6g}'.format(coord) for coord in lb[1:]]
|
||||
f.write(f"{int(lb[0])} {' '.join(formatted_coords)}\n")
|
||||
|
||||
|
||||
def split_images_and_labels(data_root, save_dir, split='train', crop_sizes=[1024], gaps=[200]):
|
||||
"""
|
||||
Split both images and labels.
|
||||
|
||||
NOTES:
|
||||
The directory structure assumed for the DOTA dataset:
|
||||
- data_root
|
||||
- images
|
||||
- split
|
||||
- labels
|
||||
- split
|
||||
and the output directory structure is:
|
||||
- save_dir
|
||||
- images
|
||||
- split
|
||||
- labels
|
||||
- split
|
||||
"""
|
||||
im_dir = Path(save_dir) / 'images' / split
|
||||
im_dir.mkdir(parents=True, exist_ok=True)
|
||||
lb_dir = Path(save_dir) / 'labels' / split
|
||||
lb_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
annos = load_yolo_dota(data_root, split=split)
|
||||
for anno in tqdm(annos, total=len(annos), desc=split):
|
||||
windows = get_windows(anno['ori_size'], crop_sizes, gaps)
|
||||
window_objs = get_window_obj(anno, windows)
|
||||
crop_and_save(anno, windows, window_objs, str(im_dir), str(lb_dir))
|
||||
|
||||
|
||||
def split_trainval(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
|
||||
"""
|
||||
Split train and val set of DOTA.
|
||||
|
||||
NOTES:
|
||||
The directory structure assumed for the DOTA dataset:
|
||||
- data_root
|
||||
- images
|
||||
- train
|
||||
- val
|
||||
- labels
|
||||
- train
|
||||
- val
|
||||
and the output directory structure is:
|
||||
- save_dir
|
||||
- images
|
||||
- train
|
||||
- val
|
||||
- labels
|
||||
- train
|
||||
- val
|
||||
"""
|
||||
crop_sizes, gaps = [], []
|
||||
for r in rates:
|
||||
crop_sizes.append(int(crop_size / r))
|
||||
gaps.append(int(gap / r))
|
||||
for split in ['train', 'val']:
|
||||
split_images_and_labels(data_root, save_dir, split, crop_sizes, gaps)
|
||||
|
||||
|
||||
def split_test(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
|
||||
"""
|
||||
Split test set of DOTA, labels are not included within this set.
|
||||
|
||||
NOTES:
|
||||
The directory structure assumed for the DOTA dataset:
|
||||
- data_root
|
||||
- images
|
||||
- test
|
||||
and the output directory structure is:
|
||||
- save_dir
|
||||
- images
|
||||
- test
|
||||
"""
|
||||
crop_sizes, gaps = [], []
|
||||
for r in rates:
|
||||
crop_sizes.append(int(crop_size / r))
|
||||
gaps.append(int(gap / r))
|
||||
save_dir = Path(save_dir) / 'images' / 'test'
|
||||
save_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
im_dir = Path(os.path.join(data_root, 'images/test'))
|
||||
assert im_dir.exists(), f"Can't find {str(im_dir)}, please check your data root."
|
||||
im_files = glob(str(im_dir / '*'))
|
||||
for im_file in tqdm(im_files, total=len(im_files), desc='test'):
|
||||
w, h = exif_size(Image.open(im_file))
|
||||
windows = get_windows((h, w), crop_sizes=crop_sizes, gaps=gaps)
|
||||
im = cv2.imread(im_file)
|
||||
name = Path(im_file).stem
|
||||
for window in windows:
|
||||
x_start, y_start, x_stop, y_stop = window.tolist()
|
||||
new_name = (name + '__' + str(x_stop - x_start) + '__' + str(x_start) + '___' + str(y_start))
|
||||
patch_im = im[y_start:y_stop, x_start:x_stop]
|
||||
cv2.imwrite(os.path.join(str(save_dir), f'{new_name}.jpg'), patch_im)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
split_trainval(
|
||||
data_root='DOTAv2',
|
||||
save_dir='DOTAv2-split',
|
||||
)
|
||||
split_test(
|
||||
data_root='DOTAv2',
|
||||
save_dir='DOTAv2-split',
|
||||
)
|
||||
|
|
@ -516,10 +516,7 @@ class HUBDatasetStats:
|
|||
else:
|
||||
from ultralytics.data import YOLODataset
|
||||
|
||||
dataset = YOLODataset(img_path=self.data[split],
|
||||
data=self.data,
|
||||
use_segments=self.task == 'segment',
|
||||
use_keypoints=self.task == 'pose')
|
||||
dataset = YOLODataset(img_path=self.data[split], data=self.data, task=self.task)
|
||||
x = np.array([
|
||||
np.bincount(label['cls'].astype(int).flatten(), minlength=self.data['nc'])
|
||||
for label in TQDM(dataset.labels, total=len(dataset), desc='Statistics')]) # shape(128x80)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue