ultralytics 8.0.235 YOLOv8 OBB train, val, predict and export (#4499)
Co-authored-by: Yash Khurana <ykhurana6@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Swamita Gupta <swamita2001@gmail.com> Co-authored-by: Ayush Chaurasia <ayush.chaurarsia@gmail.com> Co-authored-by: Laughing-q <1185102784@qq.com> Co-authored-by: Laughing <61612323+Laughing-q@users.noreply.github.com> Co-authored-by: Laughing-q <1182102784@qq.com>
This commit is contained in:
parent
f702b34a50
commit
072291bc78
52 changed files with 2090 additions and 524 deletions
|
|
@ -1,6 +1,6 @@
|
|||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
__version__ = '8.0.234'
|
||||
__version__ = '8.0.235'
|
||||
|
||||
from ultralytics.models import RTDETR, SAM, YOLO
|
||||
from ultralytics.models.fastsam import FastSAM
|
||||
|
|
|
|||
|
|
@ -13,18 +13,25 @@ from ultralytics.utils import (ASSETS, DEFAULT_CFG, DEFAULT_CFG_DICT, DEFAULT_CF
|
|||
|
||||
# Define valid tasks and modes
|
||||
MODES = 'train', 'val', 'predict', 'export', 'track', 'benchmark'
|
||||
TASKS = 'detect', 'segment', 'classify', 'pose'
|
||||
TASK2DATA = {'detect': 'coco8.yaml', 'segment': 'coco8-seg.yaml', 'classify': 'imagenet10', 'pose': 'coco8-pose.yaml'}
|
||||
TASKS = 'detect', 'segment', 'classify', 'pose', 'obb'
|
||||
TASK2DATA = {
|
||||
'detect': 'coco8.yaml',
|
||||
'segment': 'coco8-seg.yaml',
|
||||
'classify': 'imagenet10',
|
||||
'pose': 'coco8-pose.yaml',
|
||||
'obb': 'dota8-obb.yaml'} # not implemented yet
|
||||
TASK2MODEL = {
|
||||
'detect': 'yolov8n.pt',
|
||||
'segment': 'yolov8n-seg.pt',
|
||||
'classify': 'yolov8n-cls.pt',
|
||||
'pose': 'yolov8n-pose.pt'}
|
||||
'pose': 'yolov8n-pose.pt',
|
||||
'obb': 'yolov8n-obb.pt'}
|
||||
TASK2METRIC = {
|
||||
'detect': 'metrics/mAP50-95(B)',
|
||||
'segment': 'metrics/mAP50-95(M)',
|
||||
'classify': 'metrics/accuracy_top1',
|
||||
'pose': 'metrics/mAP50-95(P)'}
|
||||
'pose': 'metrics/mAP50-95(P)',
|
||||
'obb': 'metrics/mAP50-95(OBB)'}
|
||||
|
||||
CLI_HELP_MSG = \
|
||||
f"""
|
||||
|
|
@ -72,7 +79,7 @@ CFG_INT_KEYS = ('epochs', 'patience', 'batch', 'workers', 'seed', 'close_mosaic'
|
|||
CFG_BOOL_KEYS = ('save', 'exist_ok', 'verbose', 'deterministic', 'single_cls', 'rect', 'cos_lr', 'overlap_mask', 'val',
|
||||
'save_json', 'save_hybrid', 'half', 'dnn', 'plots', 'show', 'save_txt', 'save_conf', 'save_crop',
|
||||
'save_frames', 'show_labels', 'show_conf', 'visualize', 'augment', 'agnostic_nms', 'retina_masks',
|
||||
'show_boxes', 'keras', 'optimize', 'int8', 'dynamic', 'simplify', 'nms', 'profile')
|
||||
'show_boxes', 'keras', 'optimize', 'int8', 'dynamic', 'simplify', 'nms', 'profile', 'multi_scale')
|
||||
|
||||
|
||||
def cfg2dict(cfg):
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# DOTA 2.0 dataset https://captain-whu.github.io/DOTA/index.html for object detection in aerial images by Wuhan University
|
||||
# DOTA 1.5 dataset https://captain-whu.github.io/DOTA/index.html for object detection in aerial images by Wuhan University
|
||||
# Example usage: yolo train model=yolov8n-obb.pt data=DOTAv2.yaml
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
|
|
@ -7,12 +7,12 @@
|
|||
# └── dota2 ← downloads here (2GB)
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/DOTAv2 # dataset root dir
|
||||
path: ../datasets/DOTAv1.5 # dataset root dir
|
||||
train: images/train # train images (relative to 'path') 1411 images
|
||||
val: images/val # val images (relative to 'path') 458 images
|
||||
test: images/test # test images (optional) 937 images
|
||||
|
||||
# Classes for DOTA 2.0
|
||||
# Classes for DOTA 1.5
|
||||
names:
|
||||
0: plane
|
||||
1: ship
|
||||
|
|
@ -30,8 +30,6 @@ names:
|
|||
13: soccer ball field
|
||||
14: swimming pool
|
||||
15: container crane
|
||||
16: airport
|
||||
17: helipad
|
||||
|
||||
# Download script/URL (optional)
|
||||
download: https://github.com/ultralytics/yolov5/releases/download/v1.0/DOTAv2.zip
|
||||
download: https://github.com/ultralytics/yolov5/releases/download/v1.0/DOTAv1.5.zip
|
||||
34
ultralytics/cfg/datasets/DOTAv1.yaml
Normal file
34
ultralytics/cfg/datasets/DOTAv1.yaml
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# DOTA 1.0 dataset https://captain-whu.github.io/DOTA/index.html for object detection in aerial images by Wuhan University
|
||||
# Example usage: yolo train model=yolov8n-obb.pt data=DOTAv2.yaml
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── dota2 ← downloads here (2GB)
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/DOTAv1 # dataset root dir
|
||||
train: images/train # train images (relative to 'path') 1411 images
|
||||
val: images/val # val images (relative to 'path') 458 images
|
||||
test: images/test # test images (optional) 937 images
|
||||
|
||||
# Classes for DOTA 1.0
|
||||
names:
|
||||
0: plane
|
||||
1: ship
|
||||
2: storage tank
|
||||
3: baseball diamond
|
||||
4: tennis court
|
||||
5: basketball court
|
||||
6: ground track field
|
||||
7: harbor
|
||||
8: bridge
|
||||
9: large vehicle
|
||||
10: small vehicle
|
||||
11: helicopter
|
||||
12: roundabout
|
||||
13: soccer ball field
|
||||
14: swimming pool
|
||||
|
||||
# Download script/URL (optional)
|
||||
download: https://github.com/ultralytics/yolov5/releases/download/v1.0/DOTAv1.zip
|
||||
|
|
@ -34,6 +34,7 @@ amp: True # (bool) Automatic Mixed Precision (AMP) training, choices=[True, Fal
|
|||
fraction: 1.0 # (float) dataset fraction to train on (default is 1.0, all images in train set)
|
||||
profile: False # (bool) profile ONNX and TensorRT speeds during training for loggers
|
||||
freeze: None # (int | list, optional) freeze first n layers, or freeze list of layer indices during training
|
||||
multi_scale: False # (bool) Whether to use multi-scale during training
|
||||
# Segmentation
|
||||
overlap_mask: True # (bool) masks should overlap during training (segment train only)
|
||||
mask_ratio: 4 # (int) mask downsample ratio (segment train only)
|
||||
|
|
|
|||
46
ultralytics/cfg/models/v8/yolov8-obb.yaml
Normal file
46
ultralytics/cfg/models/v8/yolov8-obb.yaml
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# YOLOv8 Oriented Bounding Boxes (OBB) model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
|
||||
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs
|
||||
s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs
|
||||
m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs
|
||||
l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
|
||||
x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
|
||||
|
||||
# YOLOv8.0n backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C2f, [128, True]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C2f, [256, True]]
|
||||
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C2f, [512, True]]
|
||||
- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C2f, [1024, True]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 9
|
||||
|
||||
# YOLOv8.0n head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C2f, [512]] # 12
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C2f, [256]] # 15 (P3/8-small)
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]]
|
||||
- [[-1, 12], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C2f, [512]] # 18 (P4/16-medium)
|
||||
|
||||
- [-1, 1, Conv, [512, 3, 2]]
|
||||
- [[-1, 9], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C2f, [1024]] # 21 (P5/32-large)
|
||||
|
||||
- [[15, 18, 21], 1, OBB, [nc, 1]] # OBB(P3, P4, P5)
|
||||
|
|
@ -13,7 +13,7 @@ from ultralytics.utils import LOGGER, colorstr
|
|||
from ultralytics.utils.checks import check_version
|
||||
from ultralytics.utils.instance import Instances
|
||||
from ultralytics.utils.metrics import bbox_ioa
|
||||
from ultralytics.utils.ops import segment2box
|
||||
from ultralytics.utils.ops import segment2box, xyxyxyxy2xywhr
|
||||
from ultralytics.utils.torch_utils import TORCHVISION_0_10, TORCHVISION_0_11, TORCHVISION_0_13
|
||||
|
||||
from .utils import polygons2masks, polygons2masks_overlap
|
||||
|
|
@ -485,6 +485,8 @@ class RandomPerspective:
|
|||
xy = xy[:, :2] / xy[:, 2:3]
|
||||
segments = xy.reshape(n, -1, 2)
|
||||
bboxes = np.stack([segment2box(xy, self.size[0], self.size[1]) for xy in segments], 0)
|
||||
segments[..., 0] = segments[..., 0].clip(bboxes[:, 0:1], bboxes[:, 2:3])
|
||||
segments[..., 1] = segments[..., 1].clip(bboxes[:, 1:2], bboxes[:, 3:4])
|
||||
return bboxes, segments
|
||||
|
||||
def apply_keypoints(self, keypoints, M):
|
||||
|
|
@ -891,6 +893,7 @@ class Format:
|
|||
normalize=True,
|
||||
return_mask=False,
|
||||
return_keypoint=False,
|
||||
return_obb=False,
|
||||
mask_ratio=4,
|
||||
mask_overlap=True,
|
||||
batch_idx=True):
|
||||
|
|
@ -899,6 +902,7 @@ class Format:
|
|||
self.normalize = normalize
|
||||
self.return_mask = return_mask # set False when training detection only
|
||||
self.return_keypoint = return_keypoint
|
||||
self.return_obb = return_obb
|
||||
self.mask_ratio = mask_ratio
|
||||
self.mask_overlap = mask_overlap
|
||||
self.batch_idx = batch_idx # keep the batch indexes
|
||||
|
|
@ -928,6 +932,9 @@ class Format:
|
|||
labels['bboxes'] = torch.from_numpy(instances.bboxes) if nl else torch.zeros((nl, 4))
|
||||
if self.return_keypoint:
|
||||
labels['keypoints'] = torch.from_numpy(instances.keypoints)
|
||||
if self.return_obb:
|
||||
labels['bboxes'] = xyxyxyxy2xywhr(torch.from_numpy(instances.segments)) if len(
|
||||
instances.segments) else torch.zeros((0, 5))
|
||||
# Then we can use collate_fn
|
||||
if self.batch_idx:
|
||||
labels['batch_idx'] = torch.zeros(nl)
|
||||
|
|
|
|||
|
|
@ -89,8 +89,7 @@ def build_yolo_dataset(cfg, img_path, batch, data, mode='train', rect=False, str
|
|||
stride=int(stride),
|
||||
pad=0.0 if mode == 'train' else 0.5,
|
||||
prefix=colorstr(f'{mode}: '),
|
||||
use_segments=cfg.task == 'segment',
|
||||
use_keypoints=cfg.task == 'pose',
|
||||
task=cfg.task,
|
||||
classes=cfg.classes,
|
||||
data=data,
|
||||
fraction=cfg.fraction if mode == 'train' else 1.0)
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import torchvision
|
|||
from PIL import Image
|
||||
|
||||
from ultralytics.utils import LOCAL_RANK, NUM_THREADS, TQDM, colorstr, is_dir_writeable
|
||||
from ultralytics.utils.ops import resample_segments
|
||||
|
||||
from .augment import Compose, Format, Instances, LetterBox, classify_augmentations, classify_transforms, v8_transforms
|
||||
from .base import BaseDataset
|
||||
|
|
@ -26,17 +27,17 @@ class YOLODataset(BaseDataset):
|
|||
|
||||
Args:
|
||||
data (dict, optional): A dataset YAML dictionary. Defaults to None.
|
||||
use_segments (bool, optional): If True, segmentation masks are used as labels. Defaults to False.
|
||||
use_keypoints (bool, optional): If True, keypoints are used as labels. Defaults to False.
|
||||
task (str): An explicit arg to point current task, Defaults to 'detect'.
|
||||
|
||||
Returns:
|
||||
(torch.utils.data.Dataset): A PyTorch dataset object that can be used for training an object detection model.
|
||||
"""
|
||||
|
||||
def __init__(self, *args, data=None, use_segments=False, use_keypoints=False, **kwargs):
|
||||
def __init__(self, *args, data=None, task='detect', **kwargs):
|
||||
"""Initializes the YOLODataset with optional configurations for segments and keypoints."""
|
||||
self.use_segments = use_segments
|
||||
self.use_keypoints = use_keypoints
|
||||
self.use_segments = task == 'segment'
|
||||
self.use_keypoints = task == 'pose'
|
||||
self.use_obb = task == 'obb'
|
||||
self.data = data
|
||||
assert not (self.use_segments and self.use_keypoints), 'Can not use both segments and keypoints.'
|
||||
super().__init__(*args, **kwargs)
|
||||
|
|
@ -148,6 +149,7 @@ class YOLODataset(BaseDataset):
|
|||
normalize=True,
|
||||
return_mask=self.use_segments,
|
||||
return_keypoint=self.use_keypoints,
|
||||
return_obb=self.use_obb,
|
||||
batch_idx=True,
|
||||
mask_ratio=hyp.mask_ratio,
|
||||
mask_overlap=hyp.overlap_mask))
|
||||
|
|
@ -165,10 +167,19 @@ class YOLODataset(BaseDataset):
|
|||
# NOTE: cls is not with bboxes now, classification and semantic segmentation need an independent cls label
|
||||
# We can make it also support classification and semantic segmentation by add or remove some dict keys there.
|
||||
bboxes = label.pop('bboxes')
|
||||
segments = label.pop('segments')
|
||||
segments = label.pop('segments', [])
|
||||
keypoints = label.pop('keypoints', None)
|
||||
bbox_format = label.pop('bbox_format')
|
||||
normalized = label.pop('normalized')
|
||||
|
||||
# NOTE: do NOT resample oriented boxes
|
||||
segment_resamples = 100 if self.use_obb else 1000
|
||||
if len(segments) > 0:
|
||||
# list[np.array(1000, 2)] * num_samples
|
||||
# (N, 1000, 2)
|
||||
segments = np.stack(resample_segments(segments, n=segment_resamples), axis=0)
|
||||
else:
|
||||
segments = np.zeros((0, segment_resamples, 2), dtype=np.float32)
|
||||
label['instances'] = Instances(bboxes, segments, keypoints, bbox_format=bbox_format, normalized=normalized)
|
||||
return label
|
||||
|
||||
|
|
@ -182,7 +193,7 @@ class YOLODataset(BaseDataset):
|
|||
value = values[i]
|
||||
if k == 'img':
|
||||
value = torch.stack(value, 0)
|
||||
if k in ['masks', 'keypoints', 'bboxes', 'cls']:
|
||||
if k in ['masks', 'keypoints', 'bboxes', 'cls', 'segments', 'obb']:
|
||||
value = torch.cat(value, 0)
|
||||
new_batch[k] = value
|
||||
new_batch['batch_idx'] = list(new_batch['batch_idx'])
|
||||
|
|
|
|||
288
ultralytics/data/split_dota.py
Normal file
288
ultralytics/data/split_dota.py
Normal file
|
|
@ -0,0 +1,288 @@
|
|||
import itertools
|
||||
import os
|
||||
from glob import glob
|
||||
from math import ceil
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from tqdm import tqdm
|
||||
|
||||
from ultralytics.data.utils import exif_size, img2label_paths
|
||||
from ultralytics.utils.checks import check_requirements
|
||||
|
||||
check_requirements('shapely')
|
||||
from shapely.geometry import Polygon
|
||||
|
||||
|
||||
def bbox_iof(polygon1, bbox2, eps=1e-6):
|
||||
"""
|
||||
Calculate iofs between bbox1 and bbox2.
|
||||
|
||||
Args:
|
||||
polygon1 (np.ndarray): Polygon coordinates, (n, 8).
|
||||
bbox2 (np.ndarray): Bounding boxes, (n ,4).
|
||||
"""
|
||||
polygon1 = polygon1.reshape(-1, 4, 2)
|
||||
lt_point = np.min(polygon1, axis=-2)
|
||||
rb_point = np.max(polygon1, axis=-2)
|
||||
bbox1 = np.concatenate([lt_point, rb_point], axis=-1)
|
||||
|
||||
lt = np.maximum(bbox1[:, None, :2], bbox2[..., :2])
|
||||
rb = np.minimum(bbox1[:, None, 2:], bbox2[..., 2:])
|
||||
wh = np.clip(rb - lt, 0, np.inf)
|
||||
h_overlaps = wh[..., 0] * wh[..., 1]
|
||||
|
||||
l, t, r, b = (bbox2[..., i] for i in range(4))
|
||||
polygon2 = np.stack([l, t, r, t, r, b, l, b], axis=-1).reshape(-1, 4, 2)
|
||||
|
||||
sg_polys1 = [Polygon(p) for p in polygon1]
|
||||
sg_polys2 = [Polygon(p) for p in polygon2]
|
||||
overlaps = np.zeros(h_overlaps.shape)
|
||||
for p in zip(*np.nonzero(h_overlaps)):
|
||||
overlaps[p] = sg_polys1[p[0]].intersection(sg_polys2[p[-1]]).area
|
||||
unions = np.array([p.area for p in sg_polys1], dtype=np.float32)
|
||||
unions = unions[..., None]
|
||||
|
||||
unions = np.clip(unions, eps, np.inf)
|
||||
outputs = overlaps / unions
|
||||
if outputs.ndim == 1:
|
||||
outputs = outputs[..., None]
|
||||
return outputs
|
||||
|
||||
|
||||
def load_yolo_dota(data_root, split='train'):
|
||||
"""Load DOTA dataset.
|
||||
Args:
|
||||
data_root (str): Data root.
|
||||
split (str): The split data set, could be train or val.
|
||||
Notes:
|
||||
The directory structure assumed for the DOTA dataset:
|
||||
- data_root
|
||||
- images
|
||||
- train
|
||||
- val
|
||||
- labels
|
||||
- train
|
||||
- val
|
||||
"""
|
||||
assert split in ['train', 'val']
|
||||
im_dir = os.path.join(data_root, f'images/{split}')
|
||||
assert Path(im_dir).exists(), f"Can't find {im_dir}, please check your data root."
|
||||
im_files = glob(os.path.join(data_root, f'images/{split}/*'))
|
||||
lb_files = img2label_paths(im_files)
|
||||
annos = []
|
||||
for im_file, lb_file in zip(im_files, lb_files):
|
||||
w, h = exif_size(Image.open(im_file))
|
||||
with open(lb_file) as f:
|
||||
lb = [x.split() for x in f.read().strip().splitlines() if len(x)]
|
||||
lb = np.array(lb, dtype=np.float32)
|
||||
annos.append(dict(ori_size=(h, w), label=lb, filepath=im_file))
|
||||
return annos
|
||||
|
||||
|
||||
def get_windows(im_size, crop_sizes=[1024], gaps=[200], im_rate_thr=0.6, eps=0.01):
|
||||
"""
|
||||
Get the coordinates of windows.
|
||||
|
||||
Args:
|
||||
im_size (tuple): Original image size, (h, w).
|
||||
crop_sizes (List(int)): Crop size of windows.
|
||||
gaps (List(int)): Gap between each crops.
|
||||
im_rate_thr (float): Threshold of windows areas divided by image ares.
|
||||
"""
|
||||
h, w = im_size
|
||||
windows = []
|
||||
for crop_size, gap in zip(crop_sizes, gaps):
|
||||
assert crop_size > gap, f'invaild crop_size gap pair [{crop_size} {gap}]'
|
||||
step = crop_size - gap
|
||||
|
||||
xn = 1 if w <= crop_size else ceil((w - crop_size) / step + 1)
|
||||
xs = [step * i for i in range(xn)]
|
||||
if len(xs) > 1 and xs[-1] + crop_size > w:
|
||||
xs[-1] = w - crop_size
|
||||
|
||||
yn = 1 if h <= crop_size else ceil((h - crop_size) / step + 1)
|
||||
ys = [step * i for i in range(yn)]
|
||||
if len(ys) > 1 and ys[-1] + crop_size > h:
|
||||
ys[-1] = h - crop_size
|
||||
|
||||
start = np.array(list(itertools.product(xs, ys)), dtype=np.int64)
|
||||
stop = start + crop_size
|
||||
windows.append(np.concatenate([start, stop], axis=1))
|
||||
windows = np.concatenate(windows, axis=0)
|
||||
|
||||
im_in_wins = windows.copy()
|
||||
im_in_wins[:, 0::2] = np.clip(im_in_wins[:, 0::2], 0, w)
|
||||
im_in_wins[:, 1::2] = np.clip(im_in_wins[:, 1::2], 0, h)
|
||||
im_areas = (im_in_wins[:, 2] - im_in_wins[:, 0]) * (im_in_wins[:, 3] - im_in_wins[:, 1])
|
||||
win_areas = (windows[:, 2] - windows[:, 0]) * (windows[:, 3] - windows[:, 1])
|
||||
im_rates = im_areas / win_areas
|
||||
if not (im_rates > im_rate_thr).any():
|
||||
max_rate = im_rates.max()
|
||||
im_rates[abs(im_rates - max_rate) < eps] = 1
|
||||
return windows[im_rates > im_rate_thr]
|
||||
|
||||
|
||||
def get_window_obj(anno, windows, iof_thr=0.7):
|
||||
"""Get objects for each window."""
|
||||
h, w = anno['ori_size']
|
||||
label = anno['label']
|
||||
if len(label):
|
||||
label[:, 1::2] *= w
|
||||
label[:, 2::2] *= h
|
||||
iofs = bbox_iof(label[:, 1:], windows)
|
||||
# unnormalized and misaligned coordinates
|
||||
window_anns = [(label[iofs[:, i] >= iof_thr]) for i in range(len(windows))]
|
||||
else:
|
||||
window_anns = [np.zeros((0, 9), dtype=np.float32) for _ in range(len(windows))]
|
||||
return window_anns
|
||||
|
||||
|
||||
def crop_and_save(anno, windows, window_objs, im_dir, lb_dir):
|
||||
"""Crop images and save new labels.
|
||||
Args:
|
||||
anno (dict): Annotation dict, including `filepath`, `label`, `ori_size` as its keys.
|
||||
windows (list): A list of windows coordinates.
|
||||
window_objs (list): A list of labels inside each window.
|
||||
im_dir (str): The output directory path of images.
|
||||
lb_dir (str): The output directory path of labels.
|
||||
Notes:
|
||||
The directory structure assumed for the DOTA dataset:
|
||||
- data_root
|
||||
- images
|
||||
- train
|
||||
- val
|
||||
- labels
|
||||
- train
|
||||
- val
|
||||
"""
|
||||
im = cv2.imread(anno['filepath'])
|
||||
name = Path(anno['filepath']).stem
|
||||
for i, window in enumerate(windows):
|
||||
x_start, y_start, x_stop, y_stop = window.tolist()
|
||||
new_name = name + '__' + str(x_stop - x_start) + '__' + str(x_start) + '___' + str(y_start)
|
||||
patch_im = im[y_start:y_stop, x_start:x_stop]
|
||||
ph, pw = patch_im.shape[:2]
|
||||
|
||||
cv2.imwrite(os.path.join(im_dir, f'{new_name}.jpg'), patch_im)
|
||||
label = window_objs[i]
|
||||
if len(label) == 0:
|
||||
continue
|
||||
label[:, 1::2] -= x_start
|
||||
label[:, 2::2] -= y_start
|
||||
label[:, 1::2] /= pw
|
||||
label[:, 2::2] /= ph
|
||||
|
||||
with open(os.path.join(lb_dir, f'{new_name}.txt'), 'w') as f:
|
||||
for lb in label:
|
||||
formatted_coords = ['{:.6g}'.format(coord) for coord in lb[1:]]
|
||||
f.write(f"{int(lb[0])} {' '.join(formatted_coords)}\n")
|
||||
|
||||
|
||||
def split_images_and_labels(data_root, save_dir, split='train', crop_sizes=[1024], gaps=[200]):
|
||||
"""
|
||||
Split both images and labels.
|
||||
|
||||
NOTES:
|
||||
The directory structure assumed for the DOTA dataset:
|
||||
- data_root
|
||||
- images
|
||||
- split
|
||||
- labels
|
||||
- split
|
||||
and the output directory structure is:
|
||||
- save_dir
|
||||
- images
|
||||
- split
|
||||
- labels
|
||||
- split
|
||||
"""
|
||||
im_dir = Path(save_dir) / 'images' / split
|
||||
im_dir.mkdir(parents=True, exist_ok=True)
|
||||
lb_dir = Path(save_dir) / 'labels' / split
|
||||
lb_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
annos = load_yolo_dota(data_root, split=split)
|
||||
for anno in tqdm(annos, total=len(annos), desc=split):
|
||||
windows = get_windows(anno['ori_size'], crop_sizes, gaps)
|
||||
window_objs = get_window_obj(anno, windows)
|
||||
crop_and_save(anno, windows, window_objs, str(im_dir), str(lb_dir))
|
||||
|
||||
|
||||
def split_trainval(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
|
||||
"""
|
||||
Split train and val set of DOTA.
|
||||
|
||||
NOTES:
|
||||
The directory structure assumed for the DOTA dataset:
|
||||
- data_root
|
||||
- images
|
||||
- train
|
||||
- val
|
||||
- labels
|
||||
- train
|
||||
- val
|
||||
and the output directory structure is:
|
||||
- save_dir
|
||||
- images
|
||||
- train
|
||||
- val
|
||||
- labels
|
||||
- train
|
||||
- val
|
||||
"""
|
||||
crop_sizes, gaps = [], []
|
||||
for r in rates:
|
||||
crop_sizes.append(int(crop_size / r))
|
||||
gaps.append(int(gap / r))
|
||||
for split in ['train', 'val']:
|
||||
split_images_and_labels(data_root, save_dir, split, crop_sizes, gaps)
|
||||
|
||||
|
||||
def split_test(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
|
||||
"""
|
||||
Split test set of DOTA, labels are not included within this set.
|
||||
|
||||
NOTES:
|
||||
The directory structure assumed for the DOTA dataset:
|
||||
- data_root
|
||||
- images
|
||||
- test
|
||||
and the output directory structure is:
|
||||
- save_dir
|
||||
- images
|
||||
- test
|
||||
"""
|
||||
crop_sizes, gaps = [], []
|
||||
for r in rates:
|
||||
crop_sizes.append(int(crop_size / r))
|
||||
gaps.append(int(gap / r))
|
||||
save_dir = Path(save_dir) / 'images' / 'test'
|
||||
save_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
im_dir = Path(os.path.join(data_root, 'images/test'))
|
||||
assert im_dir.exists(), f"Can't find {str(im_dir)}, please check your data root."
|
||||
im_files = glob(str(im_dir / '*'))
|
||||
for im_file in tqdm(im_files, total=len(im_files), desc='test'):
|
||||
w, h = exif_size(Image.open(im_file))
|
||||
windows = get_windows((h, w), crop_sizes=crop_sizes, gaps=gaps)
|
||||
im = cv2.imread(im_file)
|
||||
name = Path(im_file).stem
|
||||
for window in windows:
|
||||
x_start, y_start, x_stop, y_stop = window.tolist()
|
||||
new_name = (name + '__' + str(x_stop - x_start) + '__' + str(x_start) + '___' + str(y_start))
|
||||
patch_im = im[y_start:y_stop, x_start:x_stop]
|
||||
cv2.imwrite(os.path.join(str(save_dir), f'{new_name}.jpg'), patch_im)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
split_trainval(
|
||||
data_root='DOTAv2',
|
||||
save_dir='DOTAv2-split',
|
||||
)
|
||||
split_test(
|
||||
data_root='DOTAv2',
|
||||
save_dir='DOTAv2-split',
|
||||
)
|
||||
|
|
@ -516,10 +516,7 @@ class HUBDatasetStats:
|
|||
else:
|
||||
from ultralytics.data import YOLODataset
|
||||
|
||||
dataset = YOLODataset(img_path=self.data[split],
|
||||
data=self.data,
|
||||
use_segments=self.task == 'segment',
|
||||
use_keypoints=self.task == 'pose')
|
||||
dataset = YOLODataset(img_path=self.data[split], data=self.data, task=self.task)
|
||||
x = np.array([
|
||||
np.bincount(label['cls'].astype(int).flatten(), minlength=self.data['nc'])
|
||||
for label in TQDM(dataset.labels, total=len(dataset), desc='Statistics')]) # shape(128x80)
|
||||
|
|
|
|||
|
|
@ -89,7 +89,7 @@ class Results(SimpleClass):
|
|||
_keys (tuple): A tuple of attribute names for non-empty attributes.
|
||||
"""
|
||||
|
||||
def __init__(self, orig_img, path, names, boxes=None, masks=None, probs=None, keypoints=None) -> None:
|
||||
def __init__(self, orig_img, path, names, boxes=None, masks=None, probs=None, keypoints=None, obb=None) -> None:
|
||||
"""Initialize the Results class."""
|
||||
self.orig_img = orig_img
|
||||
self.orig_shape = orig_img.shape[:2]
|
||||
|
|
@ -97,11 +97,12 @@ class Results(SimpleClass):
|
|||
self.masks = Masks(masks, self.orig_shape) if masks is not None else None # native size or imgsz masks
|
||||
self.probs = Probs(probs) if probs is not None else None
|
||||
self.keypoints = Keypoints(keypoints, self.orig_shape) if keypoints is not None else None
|
||||
self.obb = OBB(obb, self.orig_shape) if obb is not None else None
|
||||
self.speed = {'preprocess': None, 'inference': None, 'postprocess': None} # milliseconds per image
|
||||
self.names = names
|
||||
self.path = path
|
||||
self.save_dir = None
|
||||
self._keys = 'boxes', 'masks', 'probs', 'keypoints'
|
||||
self._keys = 'boxes', 'masks', 'probs', 'keypoints', 'obb'
|
||||
|
||||
def __getitem__(self, idx):
|
||||
"""Return a Results object for the specified index."""
|
||||
|
|
@ -218,7 +219,8 @@ class Results(SimpleClass):
|
|||
img = (self.orig_img[0].detach().permute(1, 2, 0).contiguous() * 255).to(torch.uint8).cpu().numpy()
|
||||
|
||||
names = self.names
|
||||
pred_boxes, show_boxes = self.boxes, boxes
|
||||
is_obb = self.obb is not None
|
||||
pred_boxes, show_boxes = self.obb if is_obb else self.boxes, boxes
|
||||
pred_masks, show_masks = self.masks, masks
|
||||
pred_probs, show_probs = self.probs, probs
|
||||
annotator = Annotator(
|
||||
|
|
@ -239,12 +241,13 @@ class Results(SimpleClass):
|
|||
annotator.masks(pred_masks.data, colors=[colors(x, True) for x in idx], im_gpu=im_gpu)
|
||||
|
||||
# Plot Detect results
|
||||
if pred_boxes and show_boxes:
|
||||
if pred_boxes is not None and show_boxes:
|
||||
for d in reversed(pred_boxes):
|
||||
c, conf, id = int(d.cls), float(d.conf) if conf else None, None if d.id is None else int(d.id.item())
|
||||
name = ('' if id is None else f'id:{id} ') + names[c]
|
||||
label = (f'{name} {conf:.2f}' if conf else name) if labels else None
|
||||
annotator.box_label(d.xyxy.squeeze(), label, color=colors(c, True))
|
||||
box = d.xyxyxyxy.reshape(-1, 4, 2).squeeze() if is_obb else d.xyxy.squeeze()
|
||||
annotator.box_label(box, label, color=colors(c, True), rotated=is_obb)
|
||||
|
||||
# Plot Classify results
|
||||
if pred_probs is not None and show_probs:
|
||||
|
|
@ -390,7 +393,7 @@ class Boxes(BaseTensor):
|
|||
if boxes.ndim == 1:
|
||||
boxes = boxes[None, :]
|
||||
n = boxes.shape[-1]
|
||||
assert n in (6, 7), f'expected `n` in [6, 7], but got {n}' # xyxy, track_id, conf, cls
|
||||
assert n in (6, 7), f'expected 6 or 7 values but got {n}' # xyxy, track_id, conf, cls
|
||||
super().__init__(boxes, orig_shape)
|
||||
self.is_track = n == 7
|
||||
self.orig_shape = orig_shape
|
||||
|
|
@ -571,3 +574,77 @@ class Probs(BaseTensor):
|
|||
def top5conf(self):
|
||||
"""Return the confidences of top 5."""
|
||||
return self.data[self.top5]
|
||||
|
||||
|
||||
class OBB(BaseTensor):
|
||||
"""
|
||||
A class for storing and manipulating Oriented Bounding Boxes (OBB).
|
||||
|
||||
Args:
|
||||
boxes (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the detection boxes,
|
||||
with shape (num_boxes, 7) or (num_boxes, 8). The last two columns contain confidence and class values.
|
||||
If present, the third last column contains track IDs, and the fifth column from the left contains rotation.
|
||||
orig_shape (tuple): Original image size, in the format (height, width).
|
||||
|
||||
Attributes:
|
||||
xywhr (torch.Tensor | numpy.ndarray): The boxes in [x_center, y_center, width, height, rotation] format.
|
||||
conf (torch.Tensor | numpy.ndarray): The confidence values of the boxes.
|
||||
cls (torch.Tensor | numpy.ndarray): The class values of the boxes.
|
||||
id (torch.Tensor | numpy.ndarray): The track IDs of the boxes (if available).
|
||||
xyxyxyxy (torch.Tensor | numpy.ndarray): The boxes in xyxyxyxy format normalized by original image size.
|
||||
data (torch.Tensor): The raw OBB tensor (alias for `boxes`).
|
||||
|
||||
Methods:
|
||||
cpu(): Move the object to CPU memory.
|
||||
numpy(): Convert the object to a numpy array.
|
||||
cuda(): Move the object to CUDA memory.
|
||||
to(*args, **kwargs): Move the object to the specified device.
|
||||
"""
|
||||
|
||||
def __init__(self, boxes, orig_shape) -> None:
|
||||
"""Initialize the Boxes class."""
|
||||
if boxes.ndim == 1:
|
||||
boxes = boxes[None, :]
|
||||
n = boxes.shape[-1]
|
||||
assert n in (7, 8), f'expected 7 or 8 values but got {n}' # xywh, rotation, track_id, conf, cls
|
||||
super().__init__(boxes, orig_shape)
|
||||
self.is_track = n == 8
|
||||
self.orig_shape = orig_shape
|
||||
|
||||
@property
|
||||
def xywhr(self):
|
||||
"""Return the rotated boxes in xywhr format."""
|
||||
return self.data[:, :5]
|
||||
|
||||
@property
|
||||
def conf(self):
|
||||
"""Return the confidence values of the boxes."""
|
||||
return self.data[:, -2]
|
||||
|
||||
@property
|
||||
def cls(self):
|
||||
"""Return the class values of the boxes."""
|
||||
return self.data[:, -1]
|
||||
|
||||
@property
|
||||
def id(self):
|
||||
"""Return the track IDs of the boxes (if available)."""
|
||||
return self.data[:, -3] if self.is_track else None
|
||||
|
||||
@property
|
||||
@lru_cache(maxsize=2)
|
||||
def xyxyxyxy(self):
|
||||
"""Return the boxes in xyxyxyxy format, (N, 4, 2)."""
|
||||
return ops.xywhr2xyxyxyxy(self.xywhr)
|
||||
|
||||
@property
|
||||
@lru_cache(maxsize=2)
|
||||
def xyxy(self):
|
||||
"""Return the horizontal boxes in xyxy format, (N, 4)."""
|
||||
# This way to fit both torch and numpy version
|
||||
x1 = self.xyxyxyxy[..., 0].min(1).values
|
||||
x2 = self.xyxyxyxy[..., 0].max(1).values
|
||||
y1 = self.xyxyxyxy[..., 1].min(1).values
|
||||
y2 = self.xyxyxyxy[..., 1].max(1).values
|
||||
xyxy = [x1, y1, x2, y2]
|
||||
return np.stack(xyxy, axis=-1) if isinstance(self.data, np.ndarray) else torch.stack(xyxy, dim=-1)
|
||||
|
|
|
|||
|
|
@ -249,6 +249,7 @@ class BaseTrainer:
|
|||
# Check imgsz
|
||||
gs = max(int(self.model.stride.max() if hasattr(self.model, 'stride') else 32), 32) # grid size (max stride)
|
||||
self.args.imgsz = check_imgsz(self.args.imgsz, stride=gs, floor=gs, max_dim=1)
|
||||
self.stride = gs # for multi-scale training
|
||||
|
||||
# Batch size
|
||||
if self.batch_size == -1 and RANK == -1: # single-GPU only, estimate best batch size
|
||||
|
|
@ -258,7 +259,11 @@ class BaseTrainer:
|
|||
batch_size = self.batch_size // max(world_size, 1)
|
||||
self.train_loader = self.get_dataloader(self.trainset, batch_size=batch_size, rank=RANK, mode='train')
|
||||
if RANK in (-1, 0):
|
||||
self.test_loader = self.get_dataloader(self.testset, batch_size=batch_size * 2, rank=-1, mode='val')
|
||||
# NOTE: When training DOTA dataset, double batch size could get OOM cause some images got more than 2000 objects.
|
||||
self.test_loader = self.get_dataloader(self.testset,
|
||||
batch_size=batch_size if self.args.task == 'obb' else batch_size * 2,
|
||||
rank=-1,
|
||||
mode='val')
|
||||
self.validator = self.get_validator()
|
||||
metric_keys = self.validator.metrics.keys + self.label_loss_items(prefix='val')
|
||||
self.metrics = dict(zip(metric_keys, [0] * len(metric_keys)))
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
|
||||
from ultralytics.data import YOLODataset
|
||||
|
|
@ -22,7 +20,7 @@ class RTDETRDataset(YOLODataset):
|
|||
|
||||
def __init__(self, *args, data=None, **kwargs):
|
||||
"""Initialize the RTDETRDataset class by inheriting from the YOLODataset class."""
|
||||
super().__init__(*args, data=data, use_segments=False, use_keypoints=False, **kwargs)
|
||||
super().__init__(*args, data=data, **kwargs)
|
||||
|
||||
# NOTE: add stretch version load_image for RTDETR mosaic
|
||||
def load_image(self, i, rect_mode=False):
|
||||
|
|
@ -108,47 +106,22 @@ class RTDETRValidator(DetectionValidator):
|
|||
|
||||
return outputs
|
||||
|
||||
def update_metrics(self, preds, batch):
|
||||
"""Metrics."""
|
||||
for si, pred in enumerate(preds):
|
||||
idx = batch['batch_idx'] == si
|
||||
cls = batch['cls'][idx]
|
||||
bbox = batch['bboxes'][idx]
|
||||
nl, npr = cls.shape[0], pred.shape[0] # number of labels, predictions
|
||||
shape = batch['ori_shape'][si]
|
||||
correct_bboxes = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init
|
||||
self.seen += 1
|
||||
def _prepare_batch(self, si, batch):
|
||||
idx = batch['batch_idx'] == si
|
||||
cls = batch['cls'][idx].squeeze(-1)
|
||||
bbox = batch['bboxes'][idx]
|
||||
ori_shape = batch['ori_shape'][si]
|
||||
imgsz = batch['img'].shape[2:]
|
||||
ratio_pad = batch['ratio_pad'][si]
|
||||
if len(cls):
|
||||
bbox = ops.xywh2xyxy(bbox) # target boxes
|
||||
bbox[..., [0, 2]] *= ori_shape[1] # native-space pred
|
||||
bbox[..., [1, 3]] *= ori_shape[0] # native-space pred
|
||||
prepared_batch = dict(cls=cls, bbox=bbox, ori_shape=ori_shape, imgsz=imgsz, ratio_pad=ratio_pad)
|
||||
return prepared_batch
|
||||
|
||||
if npr == 0:
|
||||
if nl:
|
||||
self.stats.append((correct_bboxes, *torch.zeros((2, 0), device=self.device), cls.squeeze(-1)))
|
||||
if self.args.plots:
|
||||
self.confusion_matrix.process_batch(detections=None, labels=cls.squeeze(-1))
|
||||
continue
|
||||
|
||||
# Predictions
|
||||
if self.args.single_cls:
|
||||
pred[:, 5] = 0
|
||||
predn = pred.clone()
|
||||
predn[..., [0, 2]] *= shape[1] / self.args.imgsz # native-space pred
|
||||
predn[..., [1, 3]] *= shape[0] / self.args.imgsz # native-space pred
|
||||
|
||||
# Evaluate
|
||||
if nl:
|
||||
tbox = ops.xywh2xyxy(bbox) # target boxes
|
||||
tbox[..., [0, 2]] *= shape[1] # native-space pred
|
||||
tbox[..., [1, 3]] *= shape[0] # native-space pred
|
||||
labelsn = torch.cat((cls, tbox), 1) # native-space labels
|
||||
# NOTE: To get correct metrics, the inputs of `_process_batch` should always be float32 type.
|
||||
correct_bboxes = self._process_batch(predn.float(), labelsn)
|
||||
# TODO: maybe remove these `self.` arguments as they already are member variable
|
||||
if self.args.plots:
|
||||
self.confusion_matrix.process_batch(predn, labelsn)
|
||||
self.stats.append((correct_bboxes, pred[:, 4], pred[:, 5], cls.squeeze(-1))) # (conf, pcls, tcls)
|
||||
|
||||
# Save
|
||||
if self.args.save_json:
|
||||
self.pred_to_json(predn, batch['im_file'][si])
|
||||
if self.args.save_txt:
|
||||
file = self.save_dir / 'labels' / f'{Path(batch["im_file"][si]).stem}.txt'
|
||||
self.save_one_txt(predn, self.args.save_conf, shape, file)
|
||||
def _prepare_pred(self, pred, pbatch):
|
||||
predn = pred.clone()
|
||||
predn[..., [0, 2]] *= pbatch['ori_shape'][1] / self.args.imgsz # native-space pred
|
||||
predn[..., [1, 3]] *= pbatch['ori_shape'][0] / self.args.imgsz # native-space pred
|
||||
return predn.float()
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
from ultralytics.models.yolo import classify, detect, pose, segment
|
||||
from ultralytics.models.yolo import classify, detect, obb, pose, segment
|
||||
|
||||
from .model import YOLO
|
||||
|
||||
__all__ = 'classify', 'segment', 'detect', 'pose', 'YOLO'
|
||||
__all__ = 'classify', 'segment', 'detect', 'pose', 'obb', 'YOLO'
|
||||
|
|
|
|||
|
|
@ -1,8 +1,11 @@
|
|||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
import math
|
||||
import random
|
||||
from copy import copy
|
||||
|
||||
import numpy as np
|
||||
import torch.nn as nn
|
||||
|
||||
from ultralytics.data import build_dataloader, build_yolo_dataset
|
||||
from ultralytics.engine.trainer import BaseTrainer
|
||||
|
|
@ -54,6 +57,16 @@ class DetectionTrainer(BaseTrainer):
|
|||
def preprocess_batch(self, batch):
|
||||
"""Preprocesses a batch of images by scaling and converting to float."""
|
||||
batch['img'] = batch['img'].to(self.device, non_blocking=True).float() / 255
|
||||
if self.args.multi_scale:
|
||||
imgs = batch['img']
|
||||
sz = (random.randrange(self.args.imgsz * 0.5, self.args.imgsz * 1.5 + self.stride) // self.stride *
|
||||
self.stride) # size
|
||||
sf = sz / max(imgs.shape[2:]) # scale factor
|
||||
if sf != 1:
|
||||
ns = [math.ceil(x * sf / self.stride) * self.stride
|
||||
for x in imgs.shape[2:]] # new shape (stretched to gs-multiple)
|
||||
imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
|
||||
batch['img'] = imgs
|
||||
return batch
|
||||
|
||||
def set_model_attributes(self):
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ class DetectionValidator(BaseValidator):
|
|||
self.confusion_matrix = ConfusionMatrix(nc=self.nc, conf=self.args.conf)
|
||||
self.seen = 0
|
||||
self.jdict = []
|
||||
self.stats = []
|
||||
self.stats = dict(tp=[], conf=[], pred_cls=[], target_cls=[])
|
||||
|
||||
def get_desc(self):
|
||||
"""Return a formatted string summarizing class metrics of YOLO model."""
|
||||
|
|
@ -86,51 +86,68 @@ class DetectionValidator(BaseValidator):
|
|||
agnostic=self.args.single_cls,
|
||||
max_det=self.args.max_det)
|
||||
|
||||
def _prepare_batch(self, si, batch):
|
||||
idx = batch['batch_idx'] == si
|
||||
cls = batch['cls'][idx].squeeze(-1)
|
||||
bbox = batch['bboxes'][idx]
|
||||
ori_shape = batch['ori_shape'][si]
|
||||
imgsz = batch['img'].shape[2:]
|
||||
ratio_pad = batch['ratio_pad'][si]
|
||||
if len(cls):
|
||||
bbox = ops.xywh2xyxy(bbox) * torch.tensor(imgsz, device=self.device)[[1, 0, 1, 0]] # target boxes
|
||||
ops.scale_boxes(imgsz, bbox, ori_shape, ratio_pad=ratio_pad) # native-space labels
|
||||
prepared_batch = dict(cls=cls, bbox=bbox, ori_shape=ori_shape, imgsz=imgsz, ratio_pad=ratio_pad)
|
||||
return prepared_batch
|
||||
|
||||
def _prepare_pred(self, pred, pbatch):
|
||||
predn = pred.clone()
|
||||
ops.scale_boxes(pbatch['imgsz'], predn[:, :4], pbatch['ori_shape'],
|
||||
ratio_pad=pbatch['ratio_pad']) # native-space pred
|
||||
return predn
|
||||
|
||||
def update_metrics(self, preds, batch):
|
||||
"""Metrics."""
|
||||
for si, pred in enumerate(preds):
|
||||
idx = batch['batch_idx'] == si
|
||||
cls = batch['cls'][idx]
|
||||
bbox = batch['bboxes'][idx]
|
||||
nl, npr = cls.shape[0], pred.shape[0] # number of labels, predictions
|
||||
shape = batch['ori_shape'][si]
|
||||
correct_bboxes = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init
|
||||
self.seen += 1
|
||||
|
||||
npr = len(pred)
|
||||
stat = dict(conf=torch.zeros(0, device=self.device),
|
||||
pred_cls=torch.zeros(0, device=self.device),
|
||||
tp=torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device))
|
||||
pbatch = self._prepare_batch(si, batch)
|
||||
cls, bbox = pbatch.pop('cls'), pbatch.pop('bbox')
|
||||
nl = len(cls)
|
||||
stat['target_cls'] = cls
|
||||
if npr == 0:
|
||||
if nl:
|
||||
self.stats.append((correct_bboxes, *torch.zeros((2, 0), device=self.device), cls.squeeze(-1)))
|
||||
if self.args.plots:
|
||||
self.confusion_matrix.process_batch(detections=None, labels=cls.squeeze(-1))
|
||||
for k in self.stats.keys():
|
||||
self.stats[k].append(stat[k])
|
||||
# TODO: obb has not supported confusion_matrix yet.
|
||||
if self.args.plots and self.args.task != 'obb':
|
||||
self.confusion_matrix.process_batch(detections=None, gt_bboxes=bbox, gt_cls=cls)
|
||||
continue
|
||||
|
||||
# Predictions
|
||||
if self.args.single_cls:
|
||||
pred[:, 5] = 0
|
||||
predn = pred.clone()
|
||||
ops.scale_boxes(batch['img'][si].shape[1:], predn[:, :4], shape,
|
||||
ratio_pad=batch['ratio_pad'][si]) # native-space pred
|
||||
predn = self._prepare_pred(pred, pbatch)
|
||||
stat['conf'] = predn[:, 4]
|
||||
stat['pred_cls'] = predn[:, 5]
|
||||
|
||||
# Evaluate
|
||||
if nl:
|
||||
height, width = batch['img'].shape[2:]
|
||||
tbox = ops.xywh2xyxy(bbox) * torch.tensor(
|
||||
(width, height, width, height), device=self.device) # target boxes
|
||||
ops.scale_boxes(batch['img'][si].shape[1:], tbox, shape,
|
||||
ratio_pad=batch['ratio_pad'][si]) # native-space labels
|
||||
labelsn = torch.cat((cls, tbox), 1) # native-space labels
|
||||
correct_bboxes = self._process_batch(predn, labelsn)
|
||||
# TODO: maybe remove these `self.` arguments as they already are member variable
|
||||
if self.args.plots:
|
||||
self.confusion_matrix.process_batch(predn, labelsn)
|
||||
self.stats.append((correct_bboxes, pred[:, 4], pred[:, 5], cls.squeeze(-1))) # (conf, pcls, tcls)
|
||||
stat['tp'] = self._process_batch(predn, bbox, cls)
|
||||
# TODO: obb has not supported confusion_matrix yet.
|
||||
if self.args.plots and self.args.task != 'obb':
|
||||
self.confusion_matrix.process_batch(predn, bbox, cls)
|
||||
for k in self.stats.keys():
|
||||
self.stats[k].append(stat[k])
|
||||
|
||||
# Save
|
||||
if self.args.save_json:
|
||||
self.pred_to_json(predn, batch['im_file'][si])
|
||||
if self.args.save_txt:
|
||||
file = self.save_dir / 'labels' / f'{Path(batch["im_file"][si]).stem}.txt'
|
||||
self.save_one_txt(predn, self.args.save_conf, shape, file)
|
||||
self.save_one_txt(predn, self.args.save_conf, pbatch['ori_shape'], file)
|
||||
|
||||
def finalize_metrics(self, *args, **kwargs):
|
||||
"""Set final values for metrics speed and confusion matrix."""
|
||||
|
|
@ -139,10 +156,11 @@ class DetectionValidator(BaseValidator):
|
|||
|
||||
def get_stats(self):
|
||||
"""Returns metrics statistics and results dictionary."""
|
||||
stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*self.stats)] # to numpy
|
||||
if len(stats) and stats[0].any():
|
||||
self.metrics.process(*stats)
|
||||
self.nt_per_class = np.bincount(stats[-1].astype(int), minlength=self.nc) # number of targets per class
|
||||
stats = {k: torch.cat(v, 0).cpu().numpy() for k, v in self.stats.items()} # to numpy
|
||||
if len(stats) and stats['tp'].any():
|
||||
self.metrics.process(**stats)
|
||||
self.nt_per_class = np.bincount(stats['target_cls'].astype(int),
|
||||
minlength=self.nc) # number of targets per class
|
||||
return self.metrics.results_dict
|
||||
|
||||
def print_results(self):
|
||||
|
|
@ -165,7 +183,7 @@ class DetectionValidator(BaseValidator):
|
|||
normalize=normalize,
|
||||
on_plot=self.on_plot)
|
||||
|
||||
def _process_batch(self, detections, labels):
|
||||
def _process_batch(self, detections, gt_bboxes, gt_cls):
|
||||
"""
|
||||
Return correct prediction matrix.
|
||||
|
||||
|
|
@ -178,8 +196,8 @@ class DetectionValidator(BaseValidator):
|
|||
Returns:
|
||||
(torch.Tensor): Correct prediction matrix of shape [N, 10] for 10 IoU levels.
|
||||
"""
|
||||
iou = box_iou(labels[:, 1:], detections[:, :4])
|
||||
return self.match_predictions(detections[:, 5], labels[:, 0], iou)
|
||||
iou = box_iou(gt_bboxes, detections[:, :4])
|
||||
return self.match_predictions(detections[:, 5], gt_cls, iou)
|
||||
|
||||
def build_dataset(self, img_path, mode='val', batch=None):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
from ultralytics.engine.model import Model
|
||||
from ultralytics.models import yolo # noqa
|
||||
from ultralytics.nn.tasks import ClassificationModel, DetectionModel, PoseModel, SegmentationModel
|
||||
from ultralytics.models import yolo
|
||||
from ultralytics.nn.tasks import ClassificationModel, DetectionModel, OBBModel, PoseModel, SegmentationModel
|
||||
|
||||
|
||||
class YOLO(Model):
|
||||
|
|
@ -31,4 +31,9 @@ class YOLO(Model):
|
|||
'model': PoseModel,
|
||||
'trainer': yolo.pose.PoseTrainer,
|
||||
'validator': yolo.pose.PoseValidator,
|
||||
'predictor': yolo.pose.PosePredictor, }, }
|
||||
'predictor': yolo.pose.PosePredictor, },
|
||||
'obb': {
|
||||
'model': OBBModel,
|
||||
'trainer': yolo.obb.OBBTrainer,
|
||||
'validator': yolo.obb.OBBValidator,
|
||||
'predictor': yolo.obb.OBBPredictor, }, }
|
||||
|
|
|
|||
7
ultralytics/models/yolo/obb/__init__.py
Normal file
7
ultralytics/models/yolo/obb/__init__.py
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
from .predict import OBBPredictor
|
||||
from .train import OBBTrainer
|
||||
from .val import OBBValidator
|
||||
|
||||
__all__ = 'OBBPredictor', 'OBBTrainer', 'OBBValidator'
|
||||
51
ultralytics/models/yolo/obb/predict.py
Normal file
51
ultralytics/models/yolo/obb/predict.py
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
import torch
|
||||
|
||||
from ultralytics.engine.results import Results
|
||||
from ultralytics.models.yolo.detect.predict import DetectionPredictor
|
||||
from ultralytics.utils import DEFAULT_CFG, ops
|
||||
|
||||
|
||||
class OBBPredictor(DetectionPredictor):
|
||||
"""
|
||||
A class extending the DetectionPredictor class for prediction based on an Oriented Bounding Box (OBB) model.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from ultralytics.utils import ASSETS
|
||||
from ultralytics.models.yolo.obb import OBBPredictor
|
||||
|
||||
args = dict(model='yolov8n-obb.pt', source=ASSETS)
|
||||
predictor = OBBPredictor(overrides=args)
|
||||
predictor.predict_cli()
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
|
||||
super().__init__(cfg, overrides, _callbacks)
|
||||
self.args.task = 'obb'
|
||||
|
||||
def postprocess(self, preds, img, orig_imgs):
|
||||
"""Post-processes predictions and returns a list of Results objects."""
|
||||
preds = ops.non_max_suppression(preds,
|
||||
self.args.conf,
|
||||
self.args.iou,
|
||||
agnostic=self.args.agnostic_nms,
|
||||
max_det=self.args.max_det,
|
||||
nc=len(self.model.names),
|
||||
classes=self.args.classes,
|
||||
rotated=True)
|
||||
|
||||
if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list
|
||||
orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
|
||||
|
||||
results = []
|
||||
for i, pred in enumerate(preds):
|
||||
orig_img = orig_imgs[i]
|
||||
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape, xywh=True)
|
||||
img_path = self.batch[0][i]
|
||||
# xywh, r, conf, cls
|
||||
obb = torch.cat([pred[:, :4], pred[:, -1:], pred[:, 4:6]], dim=-1)
|
||||
results.append(Results(orig_img, path=img_path, names=self.model.names, obb=obb))
|
||||
return results
|
||||
42
ultralytics/models/yolo/obb/train.py
Normal file
42
ultralytics/models/yolo/obb/train.py
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
from copy import copy
|
||||
|
||||
from ultralytics.models import yolo
|
||||
from ultralytics.nn.tasks import OBBModel
|
||||
from ultralytics.utils import DEFAULT_CFG, RANK
|
||||
|
||||
|
||||
class OBBTrainer(yolo.detect.DetectionTrainer):
|
||||
"""
|
||||
A class extending the DetectionTrainer class for training based on an Oriented Bounding Box (OBB) model.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from ultralytics.models.yolo.obb import OBBTrainer
|
||||
|
||||
args = dict(model='yolov8n-seg.pt', data='coco8-seg.yaml', epochs=3)
|
||||
trainer = OBBTrainer(overrides=args)
|
||||
trainer.train()
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
|
||||
"""Initialize a OBBTrainer object with given arguments."""
|
||||
if overrides is None:
|
||||
overrides = {}
|
||||
overrides['task'] = 'obb'
|
||||
super().__init__(cfg, overrides, _callbacks)
|
||||
|
||||
def get_model(self, cfg=None, weights=None, verbose=True):
|
||||
"""Return OBBModel initialized with specified config and weights."""
|
||||
model = OBBModel(cfg, ch=3, nc=self.data['nc'], verbose=verbose and RANK == -1)
|
||||
if weights:
|
||||
model.load(weights)
|
||||
|
||||
return model
|
||||
|
||||
def get_validator(self):
|
||||
"""Return an instance of OBBValidator for validation of YOLO model."""
|
||||
self.loss_names = 'box_loss', 'cls_loss', 'dfl_loss'
|
||||
return yolo.obb.OBBValidator(self.test_loader, save_dir=self.save_dir, args=copy(self.args))
|
||||
187
ultralytics/models/yolo/obb/val.py
Normal file
187
ultralytics/models/yolo/obb/val.py
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
|
||||
from ultralytics.models.yolo.detect import DetectionValidator
|
||||
from ultralytics.utils import LOGGER, ops
|
||||
from ultralytics.utils.metrics import OBBMetrics, batch_probiou
|
||||
from ultralytics.utils.plotting import output_to_rotated_target, plot_images
|
||||
|
||||
|
||||
class OBBValidator(DetectionValidator):
|
||||
"""
|
||||
A class extending the DetectionValidator class for validation based on an Oriented Bounding Box (OBB) model.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from ultralytics.models.yolo.obb import OBBValidator
|
||||
|
||||
args = dict(model='yolov8n-obb.pt', data='coco8-seg.yaml')
|
||||
validator = OBBValidator(args=args)
|
||||
validator(model=args['model'])
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None):
|
||||
"""Initialize OBBValidator and set task to 'obb', metrics to OBBMetrics."""
|
||||
super().__init__(dataloader, save_dir, pbar, args, _callbacks)
|
||||
self.args.task = 'obb'
|
||||
self.metrics = OBBMetrics(save_dir=self.save_dir, plot=True, on_plot=self.on_plot)
|
||||
|
||||
def init_metrics(self, model):
|
||||
"""Initialize evaluation metrics for YOLO."""
|
||||
super().init_metrics(model)
|
||||
val = self.data.get(self.args.split, '') # validation path
|
||||
self.is_dota = isinstance(val, str) and 'DOTA' in val # is COCO
|
||||
|
||||
def postprocess(self, preds):
|
||||
"""Apply Non-maximum suppression to prediction outputs."""
|
||||
return ops.non_max_suppression(preds,
|
||||
self.args.conf,
|
||||
self.args.iou,
|
||||
labels=self.lb,
|
||||
nc=self.nc,
|
||||
multi_label=True,
|
||||
agnostic=self.args.single_cls,
|
||||
max_det=self.args.max_det,
|
||||
rotated=True)
|
||||
|
||||
def _process_batch(self, detections, gt_bboxes, gt_cls):
|
||||
"""
|
||||
Return correct prediction matrix.
|
||||
|
||||
Args:
|
||||
detections (torch.Tensor): Tensor of shape [N, 6] representing detections.
|
||||
Each detection is of the format: x1, y1, x2, y2, conf, class.
|
||||
labels (torch.Tensor): Tensor of shape [M, 5] representing labels.
|
||||
Each label is of the format: class, x1, y1, x2, y2.
|
||||
|
||||
Returns:
|
||||
(torch.Tensor): Correct prediction matrix of shape [N, 10] for 10 IoU levels.
|
||||
"""
|
||||
iou = batch_probiou(gt_bboxes, torch.cat([detections[:, :4], detections[:, -2:-1]], dim=-1))
|
||||
return self.match_predictions(detections[:, 5], gt_cls, iou)
|
||||
|
||||
def _prepare_batch(self, si, batch):
|
||||
idx = batch['batch_idx'] == si
|
||||
cls = batch['cls'][idx].squeeze(-1)
|
||||
bbox = batch['bboxes'][idx]
|
||||
ori_shape = batch['ori_shape'][si]
|
||||
imgsz = batch['img'].shape[2:]
|
||||
ratio_pad = batch['ratio_pad'][si]
|
||||
if len(cls):
|
||||
bbox[..., :4].mul_(torch.tensor(imgsz, device=self.device)[[1, 0, 1, 0]]) # target boxes
|
||||
ops.scale_boxes(imgsz, bbox, ori_shape, ratio_pad=ratio_pad, xywh=True) # native-space labels
|
||||
prepared_batch = dict(cls=cls, bbox=bbox, ori_shape=ori_shape, imgsz=imgsz, ratio_pad=ratio_pad)
|
||||
return prepared_batch
|
||||
|
||||
def _prepare_pred(self, pred, pbatch):
|
||||
predn = pred.clone()
|
||||
ops.scale_boxes(pbatch['imgsz'], predn[:, :4], pbatch['ori_shape'], ratio_pad=pbatch['ratio_pad'],
|
||||
xywh=True) # native-space pred
|
||||
return predn
|
||||
|
||||
def plot_predictions(self, batch, preds, ni):
|
||||
"""Plots predicted bounding boxes on input images and saves the result."""
|
||||
plot_images(batch['img'],
|
||||
*output_to_rotated_target(preds, max_det=self.args.max_det),
|
||||
paths=batch['im_file'],
|
||||
fname=self.save_dir / f'val_batch{ni}_pred.jpg',
|
||||
names=self.names,
|
||||
on_plot=self.on_plot) # pred
|
||||
|
||||
def pred_to_json(self, predn, filename):
|
||||
"""Serialize YOLO predictions to COCO json format."""
|
||||
stem = Path(filename).stem
|
||||
image_id = int(stem) if stem.isnumeric() else stem
|
||||
rbox = torch.cat([predn[:, :4], predn[:, -1:]], dim=-1)
|
||||
poly = ops.xywhr2xyxyxyxy(rbox).view(-1, 8)
|
||||
for i, (r, b) in enumerate(zip(rbox.tolist(), poly.tolist())):
|
||||
self.jdict.append({
|
||||
'image_id': image_id,
|
||||
'category_id': self.class_map[int(predn[i, 5].item())],
|
||||
'score': round(predn[i, 4].item(), 5),
|
||||
'rbox': [round(x, 3) for x in r],
|
||||
'poly': [round(x, 3) for x in b]})
|
||||
|
||||
def eval_json(self, stats):
|
||||
"""Evaluates YOLO output in JSON format and returns performance statistics."""
|
||||
if self.args.save_json and self.is_dota and len(self.jdict):
|
||||
import json
|
||||
import re
|
||||
from collections import defaultdict
|
||||
pred_json = self.save_dir / 'predictions.json' # predictions
|
||||
pred_txt = self.save_dir / 'predictions_txt' # predictions
|
||||
pred_txt.mkdir(parents=True, exist_ok=True)
|
||||
data = json.load(open(pred_json))
|
||||
# Save split results
|
||||
LOGGER.info(f'Saving predictions with DOTA format to {str(pred_txt)}...')
|
||||
for d in data:
|
||||
image_id = d['image_id']
|
||||
score = d['score']
|
||||
classname = self.names[d['category_id']].replace(' ', '-')
|
||||
|
||||
lines = '{} {} {} {} {} {} {} {} {} {}\n'.format(
|
||||
image_id,
|
||||
score,
|
||||
d['poly'][0],
|
||||
d['poly'][1],
|
||||
d['poly'][2],
|
||||
d['poly'][3],
|
||||
d['poly'][4],
|
||||
d['poly'][5],
|
||||
d['poly'][6],
|
||||
d['poly'][7],
|
||||
)
|
||||
with open(str(pred_txt / f'Task1_{classname}') + '.txt', 'a') as f:
|
||||
f.writelines(lines)
|
||||
# Save merged results, this could result slightly lower map than using official merging script,
|
||||
# because of the probiou calculation.
|
||||
pred_merged_txt = self.save_dir / 'predictions_merged_txt' # predictions
|
||||
pred_merged_txt.mkdir(parents=True, exist_ok=True)
|
||||
merged_results = defaultdict(list)
|
||||
LOGGER.info(f'Saving merged predictions with DOTA format to {str(pred_merged_txt)}...')
|
||||
for d in data:
|
||||
image_id = d['image_id'].split('__')[0]
|
||||
pattern = re.compile(r'\d+___\d+')
|
||||
x, y = (int(c) for c in re.findall(pattern, d['image_id'])[0].split('___'))
|
||||
bbox, score, cls = d['rbox'], d['score'], d['category_id']
|
||||
bbox[0] += x
|
||||
bbox[1] += y
|
||||
bbox.extend([score, cls])
|
||||
merged_results[image_id].append(bbox)
|
||||
for image_id, bbox in merged_results.items():
|
||||
bbox = torch.tensor(bbox)
|
||||
max_wh = torch.max(bbox[:, :2]).item() * 2
|
||||
c = bbox[:, 6:7] * max_wh # classes
|
||||
scores = bbox[:, 5] # scores
|
||||
b = bbox[:, :5].clone()
|
||||
b[:, :2] += c
|
||||
# 0.3 could get results close to the ones from official merging script, even slightly better.
|
||||
i = ops.nms_rotated(b, scores, 0.3)
|
||||
bbox = bbox[i]
|
||||
|
||||
b = ops.xywhr2xyxyxyxy(bbox[:, :5]).view(-1, 8)
|
||||
for x in torch.cat([b, bbox[:, 5:7]], dim=-1).tolist():
|
||||
classname = self.names[int(x[-1])].replace(' ', '-')
|
||||
poly = [round(i, 3) for i in x[:-2]]
|
||||
score = round(x[-2], 3)
|
||||
|
||||
lines = '{} {} {} {} {} {} {} {} {} {}\n'.format(
|
||||
image_id,
|
||||
score,
|
||||
poly[0],
|
||||
poly[1],
|
||||
poly[2],
|
||||
poly[3],
|
||||
poly[4],
|
||||
poly[5],
|
||||
poly[6],
|
||||
poly[7],
|
||||
)
|
||||
with open(str(pred_merged_txt / f'Task1_{classname}') + '.txt', 'a') as f:
|
||||
f.writelines(lines)
|
||||
|
||||
return stats
|
||||
|
|
@ -66,57 +66,63 @@ class PoseValidator(DetectionValidator):
|
|||
is_pose = self.kpt_shape == [17, 3]
|
||||
nkpt = self.kpt_shape[0]
|
||||
self.sigma = OKS_SIGMA if is_pose else np.ones(nkpt) / nkpt
|
||||
self.stats = dict(tp_p=[], tp=[], conf=[], pred_cls=[], target_cls=[])
|
||||
|
||||
def _prepare_batch(self, si, batch):
|
||||
pbatch = super()._prepare_batch(si, batch)
|
||||
kpts = batch['keypoints'][batch['batch_idx'] == si]
|
||||
h, w = pbatch['imgsz']
|
||||
kpts = kpts.clone()
|
||||
kpts[..., 0] *= w
|
||||
kpts[..., 1] *= h
|
||||
kpts = ops.scale_coords(pbatch['imgsz'], kpts, pbatch['ori_shape'], ratio_pad=pbatch['ratio_pad'])
|
||||
pbatch['kpts'] = kpts
|
||||
return pbatch
|
||||
|
||||
def _prepare_pred(self, pred, pbatch):
|
||||
predn = super()._prepare_pred(pred, pbatch)
|
||||
nk = pbatch['kpts'].shape[1]
|
||||
pred_kpts = predn[:, 6:].view(len(predn), nk, -1)
|
||||
ops.scale_coords(pbatch['imgsz'], pred_kpts, pbatch['ori_shape'], ratio_pad=pbatch['ratio_pad'])
|
||||
return predn, pred_kpts
|
||||
|
||||
def update_metrics(self, preds, batch):
|
||||
"""Metrics."""
|
||||
for si, pred in enumerate(preds):
|
||||
idx = batch['batch_idx'] == si
|
||||
cls = batch['cls'][idx]
|
||||
bbox = batch['bboxes'][idx]
|
||||
kpts = batch['keypoints'][idx]
|
||||
nl, npr = cls.shape[0], pred.shape[0] # number of labels, predictions
|
||||
nk = kpts.shape[1] # number of keypoints
|
||||
shape = batch['ori_shape'][si]
|
||||
correct_kpts = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init
|
||||
correct_bboxes = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init
|
||||
self.seen += 1
|
||||
|
||||
npr = len(pred)
|
||||
stat = dict(conf=torch.zeros(0, device=self.device),
|
||||
pred_cls=torch.zeros(0, device=self.device),
|
||||
tp=torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device),
|
||||
tp_p=torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device))
|
||||
pbatch = self._prepare_batch(si, batch)
|
||||
cls, bbox = pbatch.pop('cls'), pbatch.pop('bbox')
|
||||
nl = len(cls)
|
||||
stat['target_cls'] = cls
|
||||
if npr == 0:
|
||||
if nl:
|
||||
self.stats.append((correct_bboxes, correct_kpts, *torch.zeros(
|
||||
(2, 0), device=self.device), cls.squeeze(-1)))
|
||||
for k in self.stats.keys():
|
||||
self.stats[k].append(stat[k])
|
||||
if self.args.plots:
|
||||
self.confusion_matrix.process_batch(detections=None, labels=cls.squeeze(-1))
|
||||
self.confusion_matrix.process_batch(detections=None, gt_bboxes=bbox, gt_cls=cls)
|
||||
continue
|
||||
|
||||
# Predictions
|
||||
if self.args.single_cls:
|
||||
pred[:, 5] = 0
|
||||
predn = pred.clone()
|
||||
ops.scale_boxes(batch['img'][si].shape[1:], predn[:, :4], shape,
|
||||
ratio_pad=batch['ratio_pad'][si]) # native-space pred
|
||||
pred_kpts = predn[:, 6:].view(npr, nk, -1)
|
||||
ops.scale_coords(batch['img'][si].shape[1:], pred_kpts, shape, ratio_pad=batch['ratio_pad'][si])
|
||||
predn, pred_kpts = self._prepare_pred(pred, pbatch)
|
||||
stat['conf'] = predn[:, 4]
|
||||
stat['pred_cls'] = predn[:, 5]
|
||||
|
||||
# Evaluate
|
||||
if nl:
|
||||
height, width = batch['img'].shape[2:]
|
||||
tbox = ops.xywh2xyxy(bbox) * torch.tensor(
|
||||
(width, height, width, height), device=self.device) # target boxes
|
||||
ops.scale_boxes(batch['img'][si].shape[1:], tbox, shape,
|
||||
ratio_pad=batch['ratio_pad'][si]) # native-space labels
|
||||
tkpts = kpts.clone()
|
||||
tkpts[..., 0] *= width
|
||||
tkpts[..., 1] *= height
|
||||
tkpts = ops.scale_coords(batch['img'][si].shape[1:], tkpts, shape, ratio_pad=batch['ratio_pad'][si])
|
||||
labelsn = torch.cat((cls, tbox), 1) # native-space labels
|
||||
correct_bboxes = self._process_batch(predn[:, :6], labelsn)
|
||||
correct_kpts = self._process_batch(predn[:, :6], labelsn, pred_kpts, tkpts)
|
||||
stat['tp'] = self._process_batch(predn, bbox, cls)
|
||||
stat['tp_p'] = self._process_batch(predn, bbox, cls, pred_kpts, pbatch['kpts'])
|
||||
if self.args.plots:
|
||||
self.confusion_matrix.process_batch(predn, labelsn)
|
||||
self.confusion_matrix.process_batch(predn, bbox, cls)
|
||||
|
||||
# Append correct_masks, correct_boxes, pconf, pcls, tcls
|
||||
self.stats.append((correct_bboxes, correct_kpts, pred[:, 4], pred[:, 5], cls.squeeze(-1)))
|
||||
for k in self.stats.keys():
|
||||
self.stats[k].append(stat[k])
|
||||
|
||||
# Save
|
||||
if self.args.save_json:
|
||||
|
|
@ -124,7 +130,7 @@ class PoseValidator(DetectionValidator):
|
|||
# if self.args.save_txt:
|
||||
# save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / f'{path.stem}.txt')
|
||||
|
||||
def _process_batch(self, detections, labels, pred_kpts=None, gt_kpts=None):
|
||||
def _process_batch(self, detections, gt_bboxes, gt_cls, pred_kpts=None, gt_kpts=None):
|
||||
"""
|
||||
Return correct prediction matrix.
|
||||
|
||||
|
|
@ -142,12 +148,12 @@ class PoseValidator(DetectionValidator):
|
|||
"""
|
||||
if pred_kpts is not None and gt_kpts is not None:
|
||||
# `0.53` is from https://github.com/jin-s13/xtcocoapi/blob/master/xtcocotools/cocoeval.py#L384
|
||||
area = ops.xyxy2xywh(labels[:, 1:])[:, 2:].prod(1) * 0.53
|
||||
area = ops.xyxy2xywh(gt_bboxes)[:, 2:].prod(1) * 0.53
|
||||
iou = kpt_iou(gt_kpts, pred_kpts, sigma=self.sigma, area=area)
|
||||
else: # boxes
|
||||
iou = box_iou(labels[:, 1:], detections[:, :4])
|
||||
iou = box_iou(gt_bboxes, detections[:, :4])
|
||||
|
||||
return self.match_predictions(detections[:, 5], labels[:, 0], iou)
|
||||
return self.match_predictions(detections[:, 5], gt_cls, iou)
|
||||
|
||||
def plot_val_samples(self, batch, ni):
|
||||
"""Plots and saves validation set samples with predicted bounding boxes and keypoints."""
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ class SegmentationValidator(DetectionValidator):
|
|||
self.process = ops.process_mask_upsample # more accurate
|
||||
else:
|
||||
self.process = ops.process_mask # faster
|
||||
self.stats = dict(tp_m=[], tp=[], conf=[], pred_cls=[], target_cls=[])
|
||||
|
||||
def get_desc(self):
|
||||
"""Return a formatted description of evaluation metrics."""
|
||||
|
|
@ -70,59 +71,62 @@ class SegmentationValidator(DetectionValidator):
|
|||
proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported
|
||||
return p, proto
|
||||
|
||||
def _prepare_batch(self, si, batch):
|
||||
prepared_batch = super()._prepare_batch(si, batch)
|
||||
midx = [si] if self.args.overlap_mask else batch['batch_idx'] == si
|
||||
prepared_batch['masks'] = batch['masks'][midx]
|
||||
return prepared_batch
|
||||
|
||||
def _prepare_pred(self, pred, pbatch, proto):
|
||||
predn = super()._prepare_pred(pred, pbatch)
|
||||
pred_masks = self.process(proto, pred[:, 6:], pred[:, :4], shape=pbatch['imgsz'])
|
||||
return predn, pred_masks
|
||||
|
||||
def update_metrics(self, preds, batch):
|
||||
"""Metrics."""
|
||||
for si, (pred, proto) in enumerate(zip(preds[0], preds[1])):
|
||||
idx = batch['batch_idx'] == si
|
||||
cls = batch['cls'][idx]
|
||||
bbox = batch['bboxes'][idx]
|
||||
nl, npr = cls.shape[0], pred.shape[0] # number of labels, predictions
|
||||
shape = batch['ori_shape'][si]
|
||||
correct_masks = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init
|
||||
correct_bboxes = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init
|
||||
self.seen += 1
|
||||
|
||||
npr = len(pred)
|
||||
stat = dict(conf=torch.zeros(0, device=self.device),
|
||||
pred_cls=torch.zeros(0, device=self.device),
|
||||
tp=torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device),
|
||||
tp_m=torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device))
|
||||
pbatch = self._prepare_batch(si, batch)
|
||||
cls, bbox = pbatch.pop('cls'), pbatch.pop('bbox')
|
||||
nl = len(cls)
|
||||
stat['target_cls'] = cls
|
||||
if npr == 0:
|
||||
if nl:
|
||||
self.stats.append((correct_bboxes, correct_masks, *torch.zeros(
|
||||
(2, 0), device=self.device), cls.squeeze(-1)))
|
||||
for k in self.stats.keys():
|
||||
self.stats[k].append(stat[k])
|
||||
if self.args.plots:
|
||||
self.confusion_matrix.process_batch(detections=None, labels=cls.squeeze(-1))
|
||||
self.confusion_matrix.process_batch(detections=None, gt_bboxes=bbox, gt_cls=cls)
|
||||
continue
|
||||
|
||||
# Masks
|
||||
midx = [si] if self.args.overlap_mask else idx
|
||||
gt_masks = batch['masks'][midx]
|
||||
pred_masks = self.process(proto, pred[:, 6:], pred[:, :4], shape=batch['img'][si].shape[1:])
|
||||
|
||||
gt_masks = pbatch.pop('masks')
|
||||
# Predictions
|
||||
if self.args.single_cls:
|
||||
pred[:, 5] = 0
|
||||
predn = pred.clone()
|
||||
ops.scale_boxes(batch['img'][si].shape[1:], predn[:, :4], shape,
|
||||
ratio_pad=batch['ratio_pad'][si]) # native-space pred
|
||||
predn, pred_masks = self._prepare_pred(pred, pbatch, proto)
|
||||
stat['conf'] = predn[:, 4]
|
||||
stat['pred_cls'] = predn[:, 5]
|
||||
|
||||
# Evaluate
|
||||
if nl:
|
||||
height, width = batch['img'].shape[2:]
|
||||
tbox = ops.xywh2xyxy(bbox) * torch.tensor(
|
||||
(width, height, width, height), device=self.device) # target boxes
|
||||
ops.scale_boxes(batch['img'][si].shape[1:], tbox, shape,
|
||||
ratio_pad=batch['ratio_pad'][si]) # native-space labels
|
||||
labelsn = torch.cat((cls, tbox), 1) # native-space labels
|
||||
correct_bboxes = self._process_batch(predn, labelsn)
|
||||
# TODO: maybe remove these `self.` arguments as they already are member variable
|
||||
correct_masks = self._process_batch(predn,
|
||||
labelsn,
|
||||
pred_masks,
|
||||
gt_masks,
|
||||
overlap=self.args.overlap_mask,
|
||||
masks=True)
|
||||
stat['tp'] = self._process_batch(predn, bbox, cls)
|
||||
stat['tp_m'] = self._process_batch(predn,
|
||||
bbox,
|
||||
cls,
|
||||
pred_masks,
|
||||
gt_masks,
|
||||
self.args.overlap_mask,
|
||||
masks=True)
|
||||
if self.args.plots:
|
||||
self.confusion_matrix.process_batch(predn, labelsn)
|
||||
self.confusion_matrix.process_batch(predn, bbox, cls)
|
||||
|
||||
# Append correct_masks, correct_boxes, pconf, pcls, tcls
|
||||
self.stats.append((correct_bboxes, correct_masks, pred[:, 4], pred[:, 5], cls.squeeze(-1)))
|
||||
for k in self.stats.keys():
|
||||
self.stats[k].append(stat[k])
|
||||
|
||||
pred_masks = torch.as_tensor(pred_masks, dtype=torch.uint8)
|
||||
if self.args.plots and self.batch_i < 3:
|
||||
|
|
@ -131,7 +135,7 @@ class SegmentationValidator(DetectionValidator):
|
|||
# Save
|
||||
if self.args.save_json:
|
||||
pred_masks = ops.scale_image(pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(),
|
||||
shape,
|
||||
pbatch['ori_shape'],
|
||||
ratio_pad=batch['ratio_pad'][si])
|
||||
self.pred_to_json(predn, batch['im_file'][si], pred_masks)
|
||||
# if self.args.save_txt:
|
||||
|
|
@ -142,7 +146,7 @@ class SegmentationValidator(DetectionValidator):
|
|||
self.metrics.speed = self.speed
|
||||
self.metrics.confusion_matrix = self.confusion_matrix
|
||||
|
||||
def _process_batch(self, detections, labels, pred_masks=None, gt_masks=None, overlap=False, masks=False):
|
||||
def _process_batch(self, detections, gt_bboxes, gt_cls, pred_masks=None, gt_masks=None, overlap=False, masks=False):
|
||||
"""
|
||||
Return correct prediction matrix.
|
||||
|
||||
|
|
@ -155,7 +159,7 @@ class SegmentationValidator(DetectionValidator):
|
|||
"""
|
||||
if masks:
|
||||
if overlap:
|
||||
nl = len(labels)
|
||||
nl = len(gt_cls)
|
||||
index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1
|
||||
gt_masks = gt_masks.repeat(nl, 1, 1) # shape(1,640,640) -> (n,640,640)
|
||||
gt_masks = torch.where(gt_masks == index, 1.0, 0.0)
|
||||
|
|
@ -164,9 +168,9 @@ class SegmentationValidator(DetectionValidator):
|
|||
gt_masks = gt_masks.gt_(0.5)
|
||||
iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1))
|
||||
else: # boxes
|
||||
iou = box_iou(labels[:, 1:], detections[:, :4])
|
||||
iou = box_iou(gt_bboxes, detections[:, :4])
|
||||
|
||||
return self.match_predictions(detections[:, 5], labels[:, 0], iou)
|
||||
return self.match_predictions(detections[:, 5], gt_cls, iou)
|
||||
|
||||
def plot_val_samples(self, batch, ni):
|
||||
"""Plots validation samples with bounding box labels."""
|
||||
|
|
@ -174,7 +178,7 @@ class SegmentationValidator(DetectionValidator):
|
|||
batch['batch_idx'],
|
||||
batch['cls'].squeeze(-1),
|
||||
batch['bboxes'],
|
||||
batch['masks'],
|
||||
masks=batch['masks'],
|
||||
paths=batch['im_file'],
|
||||
fname=self.save_dir / f'val_batch{ni}_labels.jpg',
|
||||
names=self.names,
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ from .block import (C1, C2, C3, C3TR, DFL, SPP, SPPF, Bottleneck, BottleneckCSP,
|
|||
HGBlock, HGStem, Proto, RepC3, ResNetLayer)
|
||||
from .conv import (CBAM, ChannelAttention, Concat, Conv, Conv2, ConvTranspose, DWConv, DWConvTranspose2d, Focus,
|
||||
GhostConv, LightConv, RepConv, SpatialAttention)
|
||||
from .head import Classify, Detect, Pose, RTDETRDecoder, Segment
|
||||
from .head import OBB, Classify, Detect, Pose, RTDETRDecoder, Segment
|
||||
from .transformer import (AIFI, MLP, DeformableTransformerDecoder, DeformableTransformerDecoderLayer, LayerNorm2d,
|
||||
MLPBlock, MSDeformAttn, TransformerBlock, TransformerEncoderLayer, TransformerLayer)
|
||||
|
||||
|
|
@ -30,4 +30,5 @@ __all__ = ('Conv', 'Conv2', 'LightConv', 'RepConv', 'DWConv', 'DWConvTranspose2d
|
|||
'TransformerBlock', 'MLPBlock', 'LayerNorm2d', 'DFL', 'HGBlock', 'HGStem', 'SPP', 'SPPF', 'C1', 'C2', 'C3',
|
||||
'C2f', 'C3x', 'C3TR', 'C3Ghost', 'GhostBottleneck', 'Bottleneck', 'BottleneckCSP', 'Proto', 'Detect',
|
||||
'Segment', 'Pose', 'Classify', 'TransformerEncoderLayer', 'RepC3', 'RTDETRDecoder', 'AIFI',
|
||||
'DeformableTransformerDecoder', 'DeformableTransformerDecoderLayer', 'MSDeformAttn', 'MLP', 'ResNetLayer')
|
||||
'DeformableTransformerDecoder', 'DeformableTransformerDecoderLayer', 'MSDeformAttn', 'MLP', 'ResNetLayer',
|
||||
'OBB')
|
||||
|
|
|
|||
|
|
@ -7,14 +7,14 @@ import torch
|
|||
import torch.nn as nn
|
||||
from torch.nn.init import constant_, xavier_uniform_
|
||||
|
||||
from ultralytics.utils.tal import TORCH_1_10, dist2bbox, make_anchors
|
||||
from ultralytics.utils.tal import TORCH_1_10, dist2bbox, dist2rbox, make_anchors
|
||||
|
||||
from .block import DFL, Proto
|
||||
from .conv import Conv
|
||||
from .transformer import MLP, DeformableTransformerDecoder, DeformableTransformerDecoderLayer
|
||||
from .utils import bias_init_with_prob, linear_init_
|
||||
|
||||
__all__ = 'Detect', 'Segment', 'Pose', 'Classify', 'RTDETRDecoder'
|
||||
__all__ = 'Detect', 'Segment', 'Pose', 'Classify', 'OBB', 'RTDETRDecoder'
|
||||
|
||||
|
||||
class Detect(nn.Module):
|
||||
|
|
@ -41,22 +41,24 @@ class Detect(nn.Module):
|
|||
|
||||
def forward(self, x):
|
||||
"""Concatenates and returns predicted bounding boxes and class probabilities."""
|
||||
shape = x[0].shape # BCHW
|
||||
for i in range(self.nl):
|
||||
x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)
|
||||
if self.training:
|
||||
if self.training: # Training path
|
||||
return x
|
||||
elif self.dynamic or self.shape != shape:
|
||||
|
||||
# Inference path
|
||||
shape = x[0].shape # BCHW
|
||||
x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2)
|
||||
if self.dynamic or self.shape != shape:
|
||||
self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
|
||||
self.shape = shape
|
||||
|
||||
x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2)
|
||||
if self.export and self.format in ('saved_model', 'pb', 'tflite', 'edgetpu', 'tfjs'): # avoid TF FlexSplitV ops
|
||||
box = x_cat[:, :self.reg_max * 4]
|
||||
cls = x_cat[:, self.reg_max * 4:]
|
||||
else:
|
||||
box, cls = x_cat.split((self.reg_max * 4, self.nc), 1)
|
||||
dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides
|
||||
dbox = self.decode_bboxes(box)
|
||||
|
||||
if self.export and self.format in ('tflite', 'edgetpu'):
|
||||
# Normalize xywh with image size to mitigate quantization error of TFLite integer models as done in YOLOv5:
|
||||
|
|
@ -79,6 +81,10 @@ class Detect(nn.Module):
|
|||
a[-1].bias.data[:] = 1.0 # box
|
||||
b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (.01 objects, 80 classes, 640 img)
|
||||
|
||||
def decode_bboxes(self, bboxes):
|
||||
"""Decode bounding boxes."""
|
||||
return dist2bbox(self.dfl(bboxes), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides
|
||||
|
||||
|
||||
class Segment(Detect):
|
||||
"""YOLOv8 Segment head for segmentation models."""
|
||||
|
|
@ -106,6 +112,35 @@ class Segment(Detect):
|
|||
return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p))
|
||||
|
||||
|
||||
class OBB(Detect):
|
||||
"""YOLOv8 OBB detection head for detection with rotation models."""
|
||||
|
||||
def __init__(self, nc=80, ne=1, ch=()):
|
||||
super().__init__(nc, ch)
|
||||
self.ne = ne # number of extra parameters
|
||||
self.detect = Detect.forward
|
||||
|
||||
c4 = max(ch[0] // 4, self.ne)
|
||||
self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.ne, 1)) for x in ch)
|
||||
|
||||
def forward(self, x):
|
||||
bs = x[0].shape[0] # batch size
|
||||
angle = torch.cat([self.cv4[i](x[i]).view(bs, self.ne, -1) for i in range(self.nl)], 2) # OBB theta logits
|
||||
# NOTE: set `angle` as an attribute so that `decode_bboxes` could use it.
|
||||
angle = (angle.sigmoid() - 0.25) * math.pi # [-pi/4, 3pi/4]
|
||||
# angle = angle.sigmoid() * math.pi / 2 # [0, pi/2]
|
||||
if not self.training:
|
||||
self.angle = angle
|
||||
x = self.detect(self, x)
|
||||
if self.training:
|
||||
return x, angle
|
||||
return torch.cat([x, angle], 1) if self.export else (torch.cat([x[0], angle], 1), (x[1], angle))
|
||||
|
||||
def decode_bboxes(self, bboxes):
|
||||
"""Decode rotated bounding boxes."""
|
||||
return dist2rbox(self.dfl(bboxes), self.angle, self.anchors.unsqueeze(0), dim=1) * self.strides
|
||||
|
||||
|
||||
class Pose(Detect):
|
||||
"""YOLOv8 Pose head for keypoints models."""
|
||||
|
||||
|
|
|
|||
|
|
@ -7,13 +7,13 @@ from pathlib import Path
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from ultralytics.nn.modules import (AIFI, C1, C2, C3, C3TR, SPP, SPPF, Bottleneck, BottleneckCSP, C2f, C3Ghost, C3x,
|
||||
Classify, Concat, Conv, Conv2, ConvTranspose, Detect, DWConv, DWConvTranspose2d,
|
||||
Focus, GhostBottleneck, GhostConv, HGBlock, HGStem, Pose, RepC3, RepConv,
|
||||
ResNetLayer, RTDETRDecoder, Segment)
|
||||
from ultralytics.nn.modules import (AIFI, C1, C2, C3, C3TR, OBB, SPP, SPPF, Bottleneck, BottleneckCSP, C2f, C3Ghost,
|
||||
C3x, Classify, Concat, Conv, Conv2, ConvTranspose, Detect, DWConv,
|
||||
DWConvTranspose2d, Focus, GhostBottleneck, GhostConv, HGBlock, HGStem, Pose, RepC3,
|
||||
RepConv, ResNetLayer, RTDETRDecoder, Segment)
|
||||
from ultralytics.utils import DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, colorstr, emojis, yaml_load
|
||||
from ultralytics.utils.checks import check_requirements, check_suffix, check_yaml
|
||||
from ultralytics.utils.loss import v8ClassificationLoss, v8DetectionLoss, v8PoseLoss, v8SegmentationLoss
|
||||
from ultralytics.utils.loss import v8ClassificationLoss, v8DetectionLoss, v8OBBLoss, v8PoseLoss, v8SegmentationLoss
|
||||
from ultralytics.utils.plotting import feature_visualization
|
||||
from ultralytics.utils.torch_utils import (fuse_conv_and_bn, fuse_deconv_and_bn, initialize_weights, intersect_dicts,
|
||||
make_divisible, model_info, scale_img, time_sync)
|
||||
|
|
@ -241,10 +241,10 @@ class DetectionModel(BaseModel):
|
|||
|
||||
# Build strides
|
||||
m = self.model[-1] # Detect()
|
||||
if isinstance(m, (Detect, Segment, Pose)):
|
||||
if isinstance(m, (Detect, Segment, Pose, OBB)):
|
||||
s = 256 # 2x min stride
|
||||
m.inplace = self.inplace
|
||||
forward = lambda x: self.forward(x)[0] if isinstance(m, (Segment, Pose)) else self.forward(x)
|
||||
forward = lambda x: self.forward(x)[0] if isinstance(m, (Segment, Pose, OBB)) else self.forward(x)
|
||||
m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward
|
||||
self.stride = m.stride
|
||||
m.bias_init() # only run once
|
||||
|
|
@ -298,6 +298,17 @@ class DetectionModel(BaseModel):
|
|||
return v8DetectionLoss(self)
|
||||
|
||||
|
||||
class OBBModel(DetectionModel):
|
||||
""""YOLOv8 Oriented Bounding Box (OBB) model."""
|
||||
|
||||
def __init__(self, cfg='yolov8n-obb.yaml', ch=3, nc=None, verbose=True):
|
||||
"""Initialize YOLOv8 OBB model with given config and parameters."""
|
||||
super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
|
||||
|
||||
def init_criterion(self):
|
||||
return v8OBBLoss(self)
|
||||
|
||||
|
||||
class SegmentationModel(DetectionModel):
|
||||
"""YOLOv8 segmentation model."""
|
||||
|
||||
|
|
@ -616,7 +627,7 @@ def attempt_load_weights(weights, device=None, inplace=True, fuse=False):
|
|||
# Module updates
|
||||
for m in ensemble.modules():
|
||||
t = type(m)
|
||||
if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Segment):
|
||||
if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Segment, Pose, OBB):
|
||||
m.inplace = inplace
|
||||
elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'):
|
||||
m.recompute_scale_factor = None # torch 1.11.0 compatibility
|
||||
|
|
@ -652,7 +663,7 @@ def attempt_load_one_weight(weight, device=None, inplace=True, fuse=False):
|
|||
# Module updates
|
||||
for m in model.modules():
|
||||
t = type(m)
|
||||
if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Segment):
|
||||
if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Segment, Pose, OBB):
|
||||
m.inplace = inplace
|
||||
elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'):
|
||||
m.recompute_scale_factor = None # torch 1.11.0 compatibility
|
||||
|
|
@ -717,7 +728,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
|
|||
args = [ch[f]]
|
||||
elif m is Concat:
|
||||
c2 = sum(ch[x] for x in f)
|
||||
elif m in (Detect, Segment, Pose):
|
||||
elif m in (Detect, Segment, Pose, OBB):
|
||||
args.append([ch[x] for x in f])
|
||||
if m is Segment:
|
||||
args[2] = make_divisible(min(args[2], max_channels) * width, 8)
|
||||
|
|
@ -801,6 +812,8 @@ def guess_model_task(model):
|
|||
return 'segment'
|
||||
if m == 'pose':
|
||||
return 'pose'
|
||||
if m == 'obb':
|
||||
return 'obb'
|
||||
|
||||
# Guess from model cfg
|
||||
if isinstance(model, dict):
|
||||
|
|
@ -825,6 +838,8 @@ def guess_model_task(model):
|
|||
return 'classify'
|
||||
elif isinstance(m, Pose):
|
||||
return 'pose'
|
||||
elif isinstance(m, OBB):
|
||||
return 'obb'
|
||||
|
||||
# Guess from model filename
|
||||
if isinstance(model, (str, Path)):
|
||||
|
|
@ -835,10 +850,12 @@ def guess_model_task(model):
|
|||
return 'classify'
|
||||
elif '-pose' in model.stem or 'pose' in model.parts:
|
||||
return 'pose'
|
||||
elif '-obb' in model.stem or 'obb' in model.parts:
|
||||
return 'obb'
|
||||
elif 'detect' in model.parts:
|
||||
return 'detect'
|
||||
|
||||
# Unable to determine task from model
|
||||
LOGGER.warning("WARNING ⚠️ Unable to automatically guess model task, assuming 'task=detect'. "
|
||||
"Explicitly define task for your model, i.e. 'task=detect', 'segment', 'classify', or 'pose'.")
|
||||
"Explicitly define task for your model, i.e. 'task=detect', 'segment', 'classify','pose' or 'obb'.")
|
||||
return 'detect' # assume detect
|
||||
|
|
|
|||
|
|
@ -24,6 +24,8 @@ def on_predict_start(predictor: object, persist: bool = False) -> None:
|
|||
Raises:
|
||||
AssertionError: If the tracker_type is not 'bytetrack' or 'botsort'.
|
||||
"""
|
||||
if predictor.args.task == 'obb':
|
||||
raise NotImplementedError('ERROR ❌ OBB task does not support track mode!')
|
||||
if hasattr(predictor, 'trackers') and persist:
|
||||
return
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ from typing import List
|
|||
|
||||
import numpy as np
|
||||
|
||||
from .ops import ltwh2xywh, ltwh2xyxy, resample_segments, xywh2ltwh, xywh2xyxy, xyxy2ltwh, xyxy2xywh
|
||||
from .ops import ltwh2xywh, ltwh2xyxy, xywh2ltwh, xywh2xyxy, xyxy2ltwh, xyxy2xywh
|
||||
|
||||
|
||||
def _ntuple(n):
|
||||
|
|
@ -212,19 +212,9 @@ class Instances:
|
|||
segments (list | ndarray): segments.
|
||||
keypoints (ndarray): keypoints(x, y, visible) with shape [N, 17, 3].
|
||||
"""
|
||||
if segments is None:
|
||||
segments = []
|
||||
self._bboxes = Bboxes(bboxes=bboxes, format=bbox_format)
|
||||
self.keypoints = keypoints
|
||||
self.normalized = normalized
|
||||
|
||||
if len(segments) > 0:
|
||||
# List[np.array(1000, 2)] * num_samples
|
||||
segments = resample_segments(segments)
|
||||
# (N, 1000, 2)
|
||||
segments = np.stack(segments, axis=0)
|
||||
else:
|
||||
segments = np.zeros((0, 1000, 2), dtype=np.float32)
|
||||
self.segments = segments
|
||||
|
||||
def convert_bbox(self, format):
|
||||
|
|
|
|||
|
|
@ -6,9 +6,9 @@ import torch.nn.functional as F
|
|||
|
||||
from ultralytics.utils.metrics import OKS_SIGMA
|
||||
from ultralytics.utils.ops import crop_mask, xywh2xyxy, xyxy2xywh
|
||||
from ultralytics.utils.tal import TaskAlignedAssigner, dist2bbox, make_anchors
|
||||
from ultralytics.utils.tal import RotatedTaskAlignedAssigner, TaskAlignedAssigner, dist2bbox, dist2rbox, make_anchors
|
||||
|
||||
from .metrics import bbox_iou
|
||||
from .metrics import bbox_iou, probiou
|
||||
from .tal import bbox2dist
|
||||
|
||||
|
||||
|
|
@ -95,6 +95,30 @@ class BboxLoss(nn.Module):
|
|||
F.cross_entropy(pred_dist, tr.view(-1), reduction='none').view(tl.shape) * wr).mean(-1, keepdim=True)
|
||||
|
||||
|
||||
class RotatedBboxLoss(BboxLoss):
|
||||
"""Criterion class for computing training losses during training."""
|
||||
|
||||
def __init__(self, reg_max, use_dfl=False):
|
||||
"""Initialize the BboxLoss module with regularization maximum and DFL settings."""
|
||||
super().__init__(reg_max, use_dfl)
|
||||
|
||||
def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask):
|
||||
"""IoU loss."""
|
||||
weight = target_scores.sum(-1)[fg_mask].unsqueeze(-1)
|
||||
iou = probiou(pred_bboxes[fg_mask], target_bboxes[fg_mask])
|
||||
loss_iou = ((1.0 - iou) * weight).sum() / target_scores_sum
|
||||
|
||||
# DFL loss
|
||||
if self.use_dfl:
|
||||
target_ltrb = bbox2dist(anchor_points, xywh2xyxy(target_bboxes[..., :4]), self.reg_max)
|
||||
loss_dfl = self._df_loss(pred_dist[fg_mask].view(-1, self.reg_max + 1), target_ltrb[fg_mask]) * weight
|
||||
loss_dfl = loss_dfl.sum() / target_scores_sum
|
||||
else:
|
||||
loss_dfl = torch.tensor(0.0).to(pred_dist.device)
|
||||
|
||||
return loss_iou, loss_dfl
|
||||
|
||||
|
||||
class KeypointLoss(nn.Module):
|
||||
"""Criterion class for computing training losses."""
|
||||
|
||||
|
|
@ -243,9 +267,9 @@ class v8SegmentationLoss(v8DetectionLoss):
|
|||
except RuntimeError as e:
|
||||
raise TypeError('ERROR ❌ segment dataset incorrectly formatted or not a segment dataset.\n'
|
||||
"This error can occur when incorrectly training a 'segment' model on a 'detect' dataset, "
|
||||
"i.e. 'yolo train model=yolov8n-seg.pt data=coco128.yaml'.\nVerify your dataset is a "
|
||||
"correctly formatted 'segment' dataset using 'data=coco128-seg.yaml' "
|
||||
'as an example.\nSee https://docs.ultralytics.com/tasks/segment/ for help.') from e
|
||||
"i.e. 'yolo train model=yolov8n-seg.pt data=coco8.yaml'.\nVerify your dataset is a "
|
||||
"correctly formatted 'segment' dataset using 'data=coco8-seg.yaml' "
|
||||
'as an example.\nSee https://docs.ultralytics.com/datasets/segment/ for help.') from e
|
||||
|
||||
# Pboxes
|
||||
pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4)
|
||||
|
|
@ -526,3 +550,109 @@ class v8ClassificationLoss:
|
|||
loss = torch.nn.functional.cross_entropy(preds, batch['cls'], reduction='mean')
|
||||
loss_items = loss.detach()
|
||||
return loss, loss_items
|
||||
|
||||
|
||||
class v8OBBLoss(v8DetectionLoss):
|
||||
|
||||
def __init__(self, model): # model must be de-paralleled
|
||||
super().__init__(model)
|
||||
self.assigner = RotatedTaskAlignedAssigner(topk=10, num_classes=self.nc, alpha=0.5, beta=6.0)
|
||||
self.bbox_loss = RotatedBboxLoss(self.reg_max - 1, use_dfl=self.use_dfl).to(self.device)
|
||||
|
||||
def preprocess(self, targets, batch_size, scale_tensor):
|
||||
"""Preprocesses the target counts and matches with the input batch size to output a tensor."""
|
||||
if targets.shape[0] == 0:
|
||||
out = torch.zeros(batch_size, 0, 6, device=self.device)
|
||||
else:
|
||||
i = targets[:, 0] # image index
|
||||
_, counts = i.unique(return_counts=True)
|
||||
counts = counts.to(dtype=torch.int32)
|
||||
out = torch.zeros(batch_size, counts.max(), 6, device=self.device)
|
||||
for j in range(batch_size):
|
||||
matches = i == j
|
||||
n = matches.sum()
|
||||
if n:
|
||||
bboxes = targets[matches, 2:]
|
||||
bboxes[..., :4].mul_(scale_tensor)
|
||||
out[j, :n] = torch.cat([targets[matches, 1:2], bboxes], dim=-1)
|
||||
return out
|
||||
|
||||
def __call__(self, preds, batch):
|
||||
"""Calculate and return the loss for the YOLO model."""
|
||||
loss = torch.zeros(3, device=self.device) # box, cls, dfl
|
||||
feats, pred_angle = preds if isinstance(preds[0], list) else preds[1]
|
||||
batch_size = pred_angle.shape[0] # batch size, number of masks, mask height, mask width
|
||||
pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
|
||||
(self.reg_max * 4, self.nc), 1)
|
||||
|
||||
# b, grids, ..
|
||||
pred_scores = pred_scores.permute(0, 2, 1).contiguous()
|
||||
pred_distri = pred_distri.permute(0, 2, 1).contiguous()
|
||||
pred_angle = pred_angle.permute(0, 2, 1).contiguous()
|
||||
|
||||
dtype = pred_scores.dtype
|
||||
imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w)
|
||||
anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)
|
||||
|
||||
# targets
|
||||
try:
|
||||
batch_idx = batch['batch_idx'].view(-1, 1)
|
||||
targets = torch.cat((batch_idx, batch['cls'].view(-1, 1), batch['bboxes'].view(-1, 5)), 1)
|
||||
rw, rh = targets[:, 4] * imgsz[0].item(), targets[:, 5] * imgsz[1].item()
|
||||
targets = targets[(rw >= 2) & (rh >= 2)] # filter rboxes of tiny size to stabilize training
|
||||
targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
|
||||
gt_labels, gt_bboxes = targets.split((1, 5), 2) # cls, xywhr
|
||||
mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0)
|
||||
except RuntimeError as e:
|
||||
raise TypeError('ERROR ❌ OBB dataset incorrectly formatted or not a OBB dataset.\n'
|
||||
"This error can occur when incorrectly training a 'OBB' model on a 'detect' dataset, "
|
||||
"i.e. 'yolo train model=yolov8n-obb.pt data=coco8.yaml'.\nVerify your dataset is a "
|
||||
"correctly formatted 'OBB' dataset using 'data=coco8-obb.yaml' "
|
||||
'as an example.\nSee https://docs.ultralytics.com/datasets/obb/ for help.') from e
|
||||
|
||||
# Pboxes
|
||||
pred_bboxes = self.bbox_decode(anchor_points, pred_distri, pred_angle) # xyxy, (b, h*w, 4)
|
||||
|
||||
bboxes_for_assigner = pred_bboxes.clone().detach()
|
||||
# Only the first four elements need to be scaled
|
||||
bboxes_for_assigner[..., :4] *= stride_tensor
|
||||
_, target_bboxes, target_scores, fg_mask, _ = self.assigner(pred_scores.detach().sigmoid(),
|
||||
bboxes_for_assigner.type(gt_bboxes.dtype),
|
||||
anchor_points * stride_tensor, gt_labels, gt_bboxes,
|
||||
mask_gt)
|
||||
|
||||
target_scores_sum = max(target_scores.sum(), 1)
|
||||
|
||||
# Cls loss
|
||||
# loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way
|
||||
loss[1] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE
|
||||
|
||||
# Bbox loss
|
||||
if fg_mask.sum():
|
||||
target_bboxes[..., :4] /= stride_tensor
|
||||
loss[0], loss[2] = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores,
|
||||
target_scores_sum, fg_mask)
|
||||
else:
|
||||
loss[0] += (pred_angle * 0).sum()
|
||||
|
||||
loss[0] *= self.hyp.box # box gain
|
||||
loss[1] *= self.hyp.cls # cls gain
|
||||
loss[2] *= self.hyp.dfl # dfl gain
|
||||
|
||||
return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl)
|
||||
|
||||
def bbox_decode(self, anchor_points, pred_dist, pred_angle):
|
||||
"""
|
||||
Decode predicted object bounding box coordinates from anchor points and distribution.
|
||||
|
||||
Args:
|
||||
anchor_points (torch.Tensor): Anchor points, (h*w, 2).
|
||||
pred_dist (torch.Tensor): Predicted rotated distance, (bs, h*w, 4).
|
||||
pred_angle (torch.Tensor): Predicted angle, (bs, h*w, 1).
|
||||
Returns:
|
||||
(torch.Tensor): Predicted rotated bounding boxes with angles, (bs, h*w, 5).
|
||||
"""
|
||||
if self.use_dfl:
|
||||
b, a, c = pred_dist.shape # batch, anchors, channels
|
||||
pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))
|
||||
return torch.cat((dist2rbox(pred_dist, pred_angle, anchor_points), pred_angle), dim=-1)
|
||||
|
|
|
|||
|
|
@ -165,6 +165,92 @@ def kpt_iou(kpt1, kpt2, area, sigma, eps=1e-7):
|
|||
return (torch.exp(-e) * kpt_mask[:, None]).sum(-1) / (kpt_mask.sum(-1)[:, None] + eps)
|
||||
|
||||
|
||||
def _get_covariance_matrix(boxes):
|
||||
"""
|
||||
Generating covariance matrix from obbs.
|
||||
|
||||
Args:
|
||||
boxes (torch.Tensor): A tensor of shape (N, 5) representing rotated bounding boxes, with xywhr format.
|
||||
|
||||
Returns:
|
||||
(torch.Tensor): Covariance metrixs corresponding to original rotated bounding boxes.
|
||||
"""
|
||||
# Gaussian bounding boxes, ignored the center points(the first two columns) cause it's not needed here.
|
||||
gbbs = torch.cat((torch.pow(boxes[:, 2:4], 2) / 12, boxes[:, 4:]), dim=-1)
|
||||
a, b, c = gbbs.split(1, dim=-1)
|
||||
return (
|
||||
a * torch.cos(c) ** 2 + b * torch.sin(c) ** 2,
|
||||
a * torch.sin(c) ** 2 + b * torch.cos(c) ** 2,
|
||||
a * torch.cos(c) * torch.sin(c) - b * torch.sin(c) * torch.cos(c),
|
||||
)
|
||||
|
||||
|
||||
def probiou(obb1, obb2, CIoU=False, eps=1e-7):
|
||||
"""
|
||||
Calculate the prob iou between oriented bounding boxes, https://arxiv.org/pdf/2106.06072v1.pdf.
|
||||
|
||||
Args:
|
||||
obb1 (torch.Tensor): A tensor of shape (N, 5) representing ground truth obbs, with xywhr format.
|
||||
obb2 (torch.Tensor): A tensor of shape (N, 5) representing predicted obbs, with xywhr format.
|
||||
eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.
|
||||
|
||||
Returns:
|
||||
(torch.Tensor): A tensor of shape (N, ) representing obb similarities.
|
||||
"""
|
||||
x1, y1 = obb1[..., :2].split(1, dim=-1)
|
||||
x2, y2 = obb2[..., :2].split(1, dim=-1)
|
||||
a1, b1, c1 = _get_covariance_matrix(obb1)
|
||||
a2, b2, c2 = _get_covariance_matrix(obb2)
|
||||
|
||||
t1 = (((a1 + a2) * (torch.pow(y1 - y2, 2)) + (b1 + b2) * (torch.pow(x1 - x2, 2))) /
|
||||
((a1 + a2) * (b1 + b2) - (torch.pow(c1 + c2, 2)) + eps)) * 0.25
|
||||
t2 = (((c1 + c2) * (x2 - x1) * (y1 - y2)) / ((a1 + a2) * (b1 + b2) - (torch.pow(c1 + c2, 2)) + eps)) * 0.5
|
||||
t3 = torch.log(((a1 + a2) * (b1 + b2) - (torch.pow(c1 + c2, 2))) /
|
||||
(4 * torch.sqrt((a1 * b1 - torch.pow(c1, 2)).clamp_(0) *
|
||||
(a2 * b2 - torch.pow(c2, 2)).clamp_(0)) + eps) + eps) * 0.5
|
||||
bd = t1 + t2 + t3
|
||||
bd = torch.clamp(bd, eps, 100.0)
|
||||
hd = torch.sqrt(1.0 - torch.exp(-bd) + eps)
|
||||
iou = 1 - hd
|
||||
if CIoU: # only include the wh aspect ratio part
|
||||
w1, h1 = obb1[..., 2:4].split(1, dim=-1)
|
||||
w2, h2 = obb2[..., 2:4].split(1, dim=-1)
|
||||
v = (4 / math.pi ** 2) * (torch.atan(w2 / h2) - torch.atan(w1 / h1)).pow(2)
|
||||
with torch.no_grad():
|
||||
alpha = v / (v - iou + (1 + eps))
|
||||
return iou - v * alpha # CIoU
|
||||
return iou
|
||||
|
||||
|
||||
def batch_probiou(obb1, obb2, eps=1e-7):
|
||||
"""
|
||||
Calculate the prob iou between oriented bounding boxes, https://arxiv.org/pdf/2106.06072v1.pdf.
|
||||
|
||||
Args:
|
||||
obb1 (torch.Tensor): A tensor of shape (N, 5) representing ground truth obbs, with xywhr format.
|
||||
obb2 (torch.Tensor): A tensor of shape (M, 5) representing predicted obbs, with xywhr format.
|
||||
eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.
|
||||
|
||||
Returns:
|
||||
(torch.Tensor): A tensor of shape (N, M) representing obb similarities.
|
||||
"""
|
||||
x1, y1 = obb1[..., :2].split(1, dim=-1)
|
||||
x2, y2 = (x.squeeze(-1)[None] for x in obb2[..., :2].split(1, dim=-1))
|
||||
a1, b1, c1 = _get_covariance_matrix(obb1)
|
||||
a2, b2, c2 = (x.squeeze(-1)[None] for x in _get_covariance_matrix(obb2))
|
||||
|
||||
t1 = (((a1 + a2) * (torch.pow(y1 - y2, 2)) + (b1 + b2) * (torch.pow(x1 - x2, 2))) /
|
||||
((a1 + a2) * (b1 + b2) - (torch.pow(c1 + c2, 2)) + eps)) * 0.25
|
||||
t2 = (((c1 + c2) * (x2 - x1) * (y1 - y2)) / ((a1 + a2) * (b1 + b2) - (torch.pow(c1 + c2, 2)) + eps)) * 0.5
|
||||
t3 = torch.log(((a1 + a2) * (b1 + b2) - (torch.pow(c1 + c2, 2))) /
|
||||
(4 * torch.sqrt((a1 * b1 - torch.pow(c1, 2)).clamp_(0) *
|
||||
(a2 * b2 - torch.pow(c2, 2)).clamp_(0)) + eps) + eps) * 0.5
|
||||
bd = t1 + t2 + t3
|
||||
bd = torch.clamp(bd, eps, 100.0)
|
||||
hd = torch.sqrt(1.0 - torch.exp(-bd) + eps)
|
||||
return 1 - hd
|
||||
|
||||
|
||||
def smooth_BCE(eps=0.1):
|
||||
"""
|
||||
Computes smoothed positive and negative Binary Cross-Entropy targets.
|
||||
|
|
@ -213,17 +299,17 @@ class ConfusionMatrix:
|
|||
for p, t in zip(preds.cpu().numpy(), targets.cpu().numpy()):
|
||||
self.matrix[p][t] += 1
|
||||
|
||||
def process_batch(self, detections, labels):
|
||||
def process_batch(self, detections, gt_bboxes, gt_cls):
|
||||
"""
|
||||
Update confusion matrix for object detection task.
|
||||
|
||||
Args:
|
||||
detections (Array[N, 6]): Detected bounding boxes and their associated information.
|
||||
Each row should contain (x1, y1, x2, y2, conf, class).
|
||||
labels (Array[M, 5]): Ground truth bounding boxes and their associated class labels.
|
||||
Each row should contain (class, x1, y1, x2, y2).
|
||||
gt_bboxes (Array[M, 4]): Ground truth bounding boxes with xyxy format.
|
||||
gt_cls (Array[M]): The class labels.
|
||||
"""
|
||||
if labels.size(0) == 0: # Check if labels is empty
|
||||
if gt_cls.size(0) == 0: # Check if labels is empty
|
||||
if detections is not None:
|
||||
detections = detections[detections[:, 4] > self.conf]
|
||||
detection_classes = detections[:, 5].int()
|
||||
|
|
@ -231,15 +317,15 @@ class ConfusionMatrix:
|
|||
self.matrix[dc, self.nc] += 1 # false positives
|
||||
return
|
||||
if detections is None:
|
||||
gt_classes = labels.int()
|
||||
gt_classes = gt_cls.int()
|
||||
for gc in gt_classes:
|
||||
self.matrix[self.nc, gc] += 1 # background FN
|
||||
return
|
||||
|
||||
detections = detections[detections[:, 4] > self.conf]
|
||||
gt_classes = labels[:, 0].int()
|
||||
gt_classes = gt_cls.int()
|
||||
detection_classes = detections[:, 5].int()
|
||||
iou = box_iou(labels[:, 1:], detections[:, :4])
|
||||
iou = box_iou(gt_bboxes, detections[:, :4])
|
||||
|
||||
x = torch.where(iou > self.iou_thres)
|
||||
if x[0].shape[0]:
|
||||
|
|
@ -814,12 +900,12 @@ class SegmentMetrics(SimpleClass):
|
|||
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
|
||||
self.task = 'segment'
|
||||
|
||||
def process(self, tp_b, tp_m, conf, pred_cls, target_cls):
|
||||
def process(self, tp, tp_m, conf, pred_cls, target_cls):
|
||||
"""
|
||||
Processes the detection and segmentation metrics over the given set of predictions.
|
||||
|
||||
Args:
|
||||
tp_b (list): List of True Positive boxes.
|
||||
tp (list): List of True Positive boxes.
|
||||
tp_m (list): List of True Positive masks.
|
||||
conf (list): List of confidence scores.
|
||||
pred_cls (list): List of predicted classes.
|
||||
|
|
@ -837,7 +923,7 @@ class SegmentMetrics(SimpleClass):
|
|||
prefix='Mask')[2:]
|
||||
self.seg.nc = len(self.names)
|
||||
self.seg.update(results_mask)
|
||||
results_box = ap_per_class(tp_b,
|
||||
results_box = ap_per_class(tp,
|
||||
conf,
|
||||
pred_cls,
|
||||
target_cls,
|
||||
|
|
@ -938,12 +1024,12 @@ class PoseMetrics(SegmentMetrics):
|
|||
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
|
||||
self.task = 'pose'
|
||||
|
||||
def process(self, tp_b, tp_p, conf, pred_cls, target_cls):
|
||||
def process(self, tp, tp_p, conf, pred_cls, target_cls):
|
||||
"""
|
||||
Processes the detection and pose metrics over the given set of predictions.
|
||||
|
||||
Args:
|
||||
tp_b (list): List of True Positive boxes.
|
||||
tp (list): List of True Positive boxes.
|
||||
tp_p (list): List of True Positive keypoints.
|
||||
conf (list): List of confidence scores.
|
||||
pred_cls (list): List of predicted classes.
|
||||
|
|
@ -961,7 +1047,7 @@ class PoseMetrics(SegmentMetrics):
|
|||
prefix='Pose')[2:]
|
||||
self.pose.nc = len(self.names)
|
||||
self.pose.update(results_pose)
|
||||
results_box = ap_per_class(tp_b,
|
||||
results_box = ap_per_class(tp,
|
||||
conf,
|
||||
pred_cls,
|
||||
target_cls,
|
||||
|
|
@ -1067,3 +1153,70 @@ class ClassifyMetrics(SimpleClass):
|
|||
def curves_results(self):
|
||||
"""Returns a list of curves for accessing specific metrics curves."""
|
||||
return []
|
||||
|
||||
|
||||
class OBBMetrics(SimpleClass):
|
||||
|
||||
def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names=()) -> None:
|
||||
self.save_dir = save_dir
|
||||
self.plot = plot
|
||||
self.on_plot = on_plot
|
||||
self.names = names
|
||||
self.box = Metric()
|
||||
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
|
||||
|
||||
def process(self, tp, conf, pred_cls, target_cls):
|
||||
"""Process predicted results for object detection and update metrics."""
|
||||
results = ap_per_class(tp,
|
||||
conf,
|
||||
pred_cls,
|
||||
target_cls,
|
||||
plot=self.plot,
|
||||
save_dir=self.save_dir,
|
||||
names=self.names,
|
||||
on_plot=self.on_plot)[2:]
|
||||
self.box.nc = len(self.names)
|
||||
self.box.update(results)
|
||||
|
||||
@property
|
||||
def keys(self):
|
||||
"""Returns a list of keys for accessing specific metrics."""
|
||||
return ['metrics/precision(B)', 'metrics/recall(B)', 'metrics/mAP50(B)', 'metrics/mAP50-95(B)']
|
||||
|
||||
def mean_results(self):
|
||||
"""Calculate mean of detected objects & return precision, recall, mAP50, and mAP50-95."""
|
||||
return self.box.mean_results()
|
||||
|
||||
def class_result(self, i):
|
||||
"""Return the result of evaluating the performance of an object detection model on a specific class."""
|
||||
return self.box.class_result(i)
|
||||
|
||||
@property
|
||||
def maps(self):
|
||||
"""Returns mean Average Precision (mAP) scores per class."""
|
||||
return self.box.maps
|
||||
|
||||
@property
|
||||
def fitness(self):
|
||||
"""Returns the fitness of box object."""
|
||||
return self.box.fitness()
|
||||
|
||||
@property
|
||||
def ap_class_index(self):
|
||||
"""Returns the average precision index per class."""
|
||||
return self.box.ap_class_index
|
||||
|
||||
@property
|
||||
def results_dict(self):
|
||||
"""Returns dictionary of computed performance metrics and statistics."""
|
||||
return dict(zip(self.keys + ['fitness'], self.mean_results() + [self.fitness]))
|
||||
|
||||
@property
|
||||
def curves(self):
|
||||
"""Returns a list of curves for accessing specific metrics curves."""
|
||||
return []
|
||||
|
||||
@property
|
||||
def curves_results(self):
|
||||
"""Returns a list of curves for accessing specific metrics curves."""
|
||||
return []
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ import torch.nn.functional as F
|
|||
import torchvision
|
||||
|
||||
from ultralytics.utils import LOGGER
|
||||
from ultralytics.utils.metrics import batch_probiou
|
||||
|
||||
|
||||
class Profile(contextlib.ContextDecorator):
|
||||
|
|
@ -80,10 +81,10 @@ def segment2box(segment, width=640, height=640):
|
|||
4, dtype=segment.dtype) # xyxy
|
||||
|
||||
|
||||
def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding=True):
|
||||
def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding=True, xywh=False):
|
||||
"""
|
||||
Rescales bounding boxes (in the format of xyxy) from the shape of the image they were originally specified in
|
||||
(img1_shape) to the shape of a different image (img0_shape).
|
||||
Rescales bounding boxes (in the format of xyxy by default) from the shape of the image they were originally
|
||||
specified in (img1_shape) to the shape of a different image (img0_shape).
|
||||
|
||||
Args:
|
||||
img1_shape (tuple): The shape of the image that the bounding boxes are for, in the format of (height, width).
|
||||
|
|
@ -93,6 +94,7 @@ def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding=True):
|
|||
calculated based on the size difference between the two images.
|
||||
padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
|
||||
rescaling.
|
||||
xywh (bool): The box format is xywh or not, default=False.
|
||||
|
||||
Returns:
|
||||
boxes (torch.Tensor): The scaled bounding boxes, in the format of (x1, y1, x2, y2)
|
||||
|
|
@ -106,8 +108,11 @@ def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding=True):
|
|||
pad = ratio_pad[1]
|
||||
|
||||
if padding:
|
||||
boxes[..., [0, 2]] -= pad[0] # x padding
|
||||
boxes[..., [1, 3]] -= pad[1] # y padding
|
||||
boxes[..., 0] -= pad[0] # x padding
|
||||
boxes[..., 1] -= pad[1] # y padding
|
||||
if not xywh:
|
||||
boxes[..., 2] -= pad[0] # x padding
|
||||
boxes[..., 3] -= pad[1] # y padding
|
||||
boxes[..., :4] /= gain
|
||||
return clip_boxes(boxes, img0_shape)
|
||||
|
||||
|
|
@ -128,19 +133,40 @@ def make_divisible(x, divisor):
|
|||
return math.ceil(x / divisor) * divisor
|
||||
|
||||
|
||||
def nms_rotated(boxes, scores, threshold=0.45):
|
||||
"""
|
||||
NMS for obbs, powered by probiou and fast-nms.
|
||||
|
||||
Args:
|
||||
boxes (torch.Tensor): (N, 5), xywhr.
|
||||
scores (torch.Tensor): (N, ).
|
||||
threshold (float): Iou threshold.
|
||||
|
||||
Returns:
|
||||
"""
|
||||
if len(boxes) == 0:
|
||||
return np.empty((0, ), dtype=np.int8)
|
||||
sorted_idx = torch.argsort(scores, descending=True)
|
||||
boxes = boxes[sorted_idx]
|
||||
ious = batch_probiou(boxes, boxes).triu_(diagonal=1)
|
||||
pick = torch.nonzero(ious.max(dim=0)[0] < threshold).squeeze_(-1)
|
||||
return sorted_idx[pick]
|
||||
|
||||
|
||||
def non_max_suppression(
|
||||
prediction,
|
||||
conf_thres=0.25,
|
||||
iou_thres=0.45,
|
||||
classes=None,
|
||||
agnostic=False,
|
||||
multi_label=False,
|
||||
labels=(),
|
||||
max_det=300,
|
||||
nc=0, # number of classes (optional)
|
||||
max_time_img=0.05,
|
||||
max_nms=30000,
|
||||
max_wh=7680,
|
||||
prediction,
|
||||
conf_thres=0.25,
|
||||
iou_thres=0.45,
|
||||
classes=None,
|
||||
agnostic=False,
|
||||
multi_label=False,
|
||||
labels=(),
|
||||
max_det=300,
|
||||
nc=0, # number of classes (optional)
|
||||
max_time_img=0.05,
|
||||
max_nms=30000,
|
||||
max_wh=7680,
|
||||
rotated=False,
|
||||
):
|
||||
"""
|
||||
Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box.
|
||||
|
|
@ -190,7 +216,8 @@ def non_max_suppression(
|
|||
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
|
||||
|
||||
prediction = prediction.transpose(-1, -2) # shape(1,84,6300) to shape(1,6300,84)
|
||||
prediction[..., :4] = xywh2xyxy(prediction[..., :4]) # xywh to xyxy
|
||||
if not rotated:
|
||||
prediction[..., :4] = xywh2xyxy(prediction[..., :4]) # xywh to xyxy
|
||||
|
||||
t = time.time()
|
||||
output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs
|
||||
|
|
@ -200,7 +227,7 @@ def non_max_suppression(
|
|||
x = x[xc[xi]] # confidence
|
||||
|
||||
# Cat apriori labels if autolabelling
|
||||
if labels and len(labels[xi]):
|
||||
if labels and len(labels[xi]) and not rotated:
|
||||
lb = labels[xi]
|
||||
v = torch.zeros((len(lb), nc + nm + 4), device=x.device)
|
||||
v[:, :4] = xywh2xyxy(lb[:, 1:5]) # box
|
||||
|
|
@ -234,8 +261,13 @@ def non_max_suppression(
|
|||
|
||||
# Batched NMS
|
||||
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
|
||||
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
|
||||
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
|
||||
scores = x[:, 4] # scores
|
||||
if rotated:
|
||||
boxes = torch.cat((x[:, :2] + c, x[:, 2:4], x[:, -2:-1]), dim=-1) # xywhr
|
||||
i = nms_rotated(boxes, scores, iou_thres)
|
||||
else:
|
||||
boxes = x[:, :4] + c # boxes (offset by class)
|
||||
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
|
||||
i = i[:max_det] # limit detections
|
||||
|
||||
# # Experimental
|
||||
|
|
@ -320,7 +352,7 @@ def scale_image(masks, im0_shape, ratio_pad=None):
|
|||
gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1]) # gain = old / new
|
||||
pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2 # wh padding
|
||||
else:
|
||||
gain = ratio_pad[0][0]
|
||||
# gain = ratio_pad[0][0]
|
||||
pad = ratio_pad[1]
|
||||
top, left = (int(round(pad[1] - 0.1)), int(round(pad[0] - 0.1))) # y, x
|
||||
bottom, right = (int(round(im1_shape[0] - pad[1] + 0.1)), int(round(im1_shape[1] - pad[0] + 0.1)))
|
||||
|
|
@ -476,7 +508,8 @@ def ltwh2xywh(x):
|
|||
|
||||
def xyxyxyxy2xywhr(corners):
|
||||
"""
|
||||
Convert batched Oriented Bounding Boxes (OBB) from [xy1, xy2, xy3, xy4] to [xywh, rotation].
|
||||
Convert batched Oriented Bounding Boxes (OBB) from [xy1, xy2, xy3, xy4] to [xywh, rotation]. Rotation values are
|
||||
expected in degrees from 0 to 90.
|
||||
|
||||
Args:
|
||||
corners (numpy.ndarray | torch.Tensor): Input corners of shape (n, 8).
|
||||
|
|
@ -484,61 +517,46 @@ def xyxyxyxy2xywhr(corners):
|
|||
Returns:
|
||||
(numpy.ndarray | torch.Tensor): Converted data in [cx, cy, w, h, rotation] format of shape (n, 5).
|
||||
"""
|
||||
is_numpy = isinstance(corners, np.ndarray)
|
||||
atan2, sqrt = (np.arctan2, np.sqrt) if is_numpy else (torch.atan2, torch.sqrt)
|
||||
|
||||
x1, y1, x2, y2, x3, y3, x4, y4 = corners.T
|
||||
cx = (x1 + x3) / 2
|
||||
cy = (y1 + y3) / 2
|
||||
dx21 = x2 - x1
|
||||
dy21 = y2 - y1
|
||||
|
||||
w = sqrt(dx21 ** 2 + dy21 ** 2)
|
||||
h = sqrt((x2 - x3) ** 2 + (y2 - y3) ** 2)
|
||||
|
||||
rotation = atan2(-dy21, dx21)
|
||||
rotation *= 180.0 / math.pi # radians to degrees
|
||||
|
||||
return np.vstack((cx, cy, w, h, rotation)).T if is_numpy else torch.stack((cx, cy, w, h, rotation), dim=1)
|
||||
is_torch = isinstance(corners, torch.Tensor)
|
||||
points = corners.cpu().numpy() if is_torch else corners
|
||||
points = points.reshape(len(corners), -1, 2)
|
||||
rboxes = []
|
||||
for pts in points:
|
||||
# NOTE: Use cv2.minAreaRect to get accurate xywhr,
|
||||
# especially some objects are cut off by augmentations in dataloader.
|
||||
(x, y), (w, h), angle = cv2.minAreaRect(pts)
|
||||
rboxes.append([x, y, w, h, angle / 180 * np.pi])
|
||||
rboxes = torch.tensor(rboxes, device=corners.device, dtype=corners.dtype) if is_torch else np.asarray(
|
||||
rboxes, dtype=points.dtype)
|
||||
return rboxes
|
||||
|
||||
|
||||
def xywhr2xyxyxyxy(center):
|
||||
"""
|
||||
Convert batched Oriented Bounding Boxes (OBB) from [xywh, rotation] to [xy1, xy2, xy3, xy4].
|
||||
Convert batched Oriented Bounding Boxes (OBB) from [xywh, rotation] to [xy1, xy2, xy3, xy4]. Rotation values should
|
||||
be in degrees from 0 to 90.
|
||||
|
||||
Args:
|
||||
center (numpy.ndarray | torch.Tensor): Input data in [cx, cy, w, h, rotation] format of shape (n, 5).
|
||||
center (numpy.ndarray | torch.Tensor): Input data in [cx, cy, w, h, rotation] format of shape (n, 5) or (b, n, 5).
|
||||
|
||||
Returns:
|
||||
(numpy.ndarray | torch.Tensor): Converted corner points of shape (n, 8).
|
||||
(numpy.ndarray | torch.Tensor): Converted corner points of shape (n, 4, 2) or (b, n, 4, 2).
|
||||
"""
|
||||
is_numpy = isinstance(center, np.ndarray)
|
||||
cos, sin = (np.cos, np.sin) if is_numpy else (torch.cos, torch.sin)
|
||||
|
||||
cx, cy, w, h, rotation = center.T
|
||||
rotation *= math.pi / 180.0 # degrees to radians
|
||||
|
||||
dx = w / 2
|
||||
dy = h / 2
|
||||
|
||||
cos_rot = cos(rotation)
|
||||
sin_rot = sin(rotation)
|
||||
dx_cos_rot = dx * cos_rot
|
||||
dx_sin_rot = dx * sin_rot
|
||||
dy_cos_rot = dy * cos_rot
|
||||
dy_sin_rot = dy * sin_rot
|
||||
|
||||
x1 = cx - dx_cos_rot - dy_sin_rot
|
||||
y1 = cy + dx_sin_rot - dy_cos_rot
|
||||
x2 = cx + dx_cos_rot - dy_sin_rot
|
||||
y2 = cy - dx_sin_rot - dy_cos_rot
|
||||
x3 = cx + dx_cos_rot + dy_sin_rot
|
||||
y3 = cy - dx_sin_rot + dy_cos_rot
|
||||
x4 = cx - dx_cos_rot + dy_sin_rot
|
||||
y4 = cy + dx_sin_rot + dy_cos_rot
|
||||
|
||||
return np.vstack((x1, y1, x2, y2, x3, y3, x4, y4)).T if is_numpy else torch.stack(
|
||||
(x1, y1, x2, y2, x3, y3, x4, y4), dim=1)
|
||||
ctr = center[..., :2]
|
||||
w, h, angle = (center[..., i:i + 1] for i in range(2, 5))
|
||||
cos_value, sin_value = cos(angle), sin(angle)
|
||||
vec1 = [w / 2 * cos_value, w / 2 * sin_value]
|
||||
vec2 = [-h / 2 * sin_value, h / 2 * cos_value]
|
||||
vec1 = np.concatenate(vec1, axis=-1) if is_numpy else torch.cat(vec1, dim=-1)
|
||||
vec2 = np.concatenate(vec2, axis=-1) if is_numpy else torch.cat(vec2, dim=-1)
|
||||
pt1 = ctr + vec1 + vec2
|
||||
pt2 = ctr + vec1 - vec2
|
||||
pt3 = ctr - vec1 - vec2
|
||||
pt4 = ctr - vec1 + vec2
|
||||
return np.stack([pt1, pt2, pt3, pt4], axis=-2) if is_numpy else torch.stack([pt1, pt2, pt3, pt4], dim=-2)
|
||||
|
||||
|
||||
def ltwh2xyxy(x):
|
||||
|
|
|
|||
|
|
@ -100,25 +100,35 @@ class Annotator:
|
|||
self.limb_color = colors.pose_palette[[9, 9, 9, 9, 7, 7, 7, 0, 0, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16]]
|
||||
self.kpt_color = colors.pose_palette[[16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9]]
|
||||
|
||||
def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)):
|
||||
def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255), rotated=False):
|
||||
"""Add one xyxy box to image with label."""
|
||||
if isinstance(box, torch.Tensor):
|
||||
box = box.tolist()
|
||||
if self.pil or not is_ascii(label):
|
||||
self.draw.rectangle(box, width=self.lw, outline=color) # box
|
||||
if rotated:
|
||||
p1 = box[0]
|
||||
# NOTE: PIL-version polygon needs tuple type.
|
||||
self.draw.polygon([tuple(b) for b in box], width=self.lw, outline=color)
|
||||
else:
|
||||
p1 = (box[0], box[1])
|
||||
self.draw.rectangle(box, width=self.lw, outline=color) # box
|
||||
if label:
|
||||
w, h = self.font.getsize(label) # text width, height
|
||||
outside = box[1] - h >= 0 # label fits outside box
|
||||
outside = p1[1] - h >= 0 # label fits outside box
|
||||
self.draw.rectangle(
|
||||
(box[0], box[1] - h if outside else box[1], box[0] + w + 1,
|
||||
box[1] + 1 if outside else box[1] + h + 1),
|
||||
(p1[0], p1[1] - h if outside else p1[1], p1[0] + w + 1, p1[1] + 1 if outside else p1[1] + h + 1),
|
||||
fill=color,
|
||||
)
|
||||
# self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls') # for PIL>8.0
|
||||
self.draw.text((box[0], box[1] - h if outside else box[1]), label, fill=txt_color, font=self.font)
|
||||
self.draw.text((p1[0], p1[1] - h if outside else p1[1]), label, fill=txt_color, font=self.font)
|
||||
else: # cv2
|
||||
p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
|
||||
cv2.rectangle(self.im, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA)
|
||||
if rotated:
|
||||
p1 = [int(b) for b in box[0]]
|
||||
# NOTE: cv2-version polylines needs np.asarray type.
|
||||
cv2.polylines(self.im, [np.asarray(box, dtype=np.int)], True, color, self.lw)
|
||||
else:
|
||||
p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
|
||||
cv2.rectangle(self.im, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA)
|
||||
if label:
|
||||
w, h = cv2.getTextSize(label, 0, fontScale=self.sf, thickness=self.tf)[0] # text width, height
|
||||
outside = p1[1] - h >= 3
|
||||
|
|
@ -563,6 +573,7 @@ def plot_images(images,
|
|||
batch_idx,
|
||||
cls,
|
||||
bboxes=np.zeros(0, dtype=np.float32),
|
||||
confs=None,
|
||||
masks=np.zeros(0, dtype=np.uint8),
|
||||
kpts=np.zeros((0, 51), dtype=np.float32),
|
||||
paths=None,
|
||||
|
|
@ -618,27 +629,29 @@ def plot_images(images,
|
|||
if len(cls) > 0:
|
||||
idx = batch_idx == i
|
||||
classes = cls[idx].astype('int')
|
||||
labels = confs is None
|
||||
|
||||
if len(bboxes):
|
||||
boxes = ops.xywh2xyxy(bboxes[idx, :4]).T
|
||||
labels = bboxes.shape[1] == 4 # labels if no conf column
|
||||
conf = None if labels else bboxes[idx, 4] # check for confidence presence (label vs pred)
|
||||
|
||||
if boxes.shape[1]:
|
||||
if boxes.max() <= 1.01: # if normalized with tolerance 0.01
|
||||
boxes[[0, 2]] *= w # scale to pixels
|
||||
boxes[[1, 3]] *= h
|
||||
boxes = bboxes[idx]
|
||||
conf = confs[idx] if confs is not None else None # check for confidence presence (label vs pred)
|
||||
if len(boxes):
|
||||
if boxes[:, :4].max() <= 1.1: # if normalized with tolerance 0.1
|
||||
boxes[:, [0, 2]] *= w # scale to pixels
|
||||
boxes[:, [1, 3]] *= h
|
||||
elif scale < 1: # absolute coords need scale if image scales
|
||||
boxes *= scale
|
||||
boxes[[0, 2]] += x
|
||||
boxes[[1, 3]] += y
|
||||
for j, box in enumerate(boxes.T.tolist()):
|
||||
boxes[:, :4] *= scale
|
||||
boxes[:, 0] += x
|
||||
boxes[:, 1] += y
|
||||
is_obb = boxes.shape[-1] == 5 # xywhr
|
||||
boxes = ops.xywhr2xyxyxyxy(boxes) if is_obb else ops.xywh2xyxy(boxes)
|
||||
for j, box in enumerate(boxes.astype(np.int64).tolist()):
|
||||
c = classes[j]
|
||||
color = colors(c)
|
||||
c = names.get(c, c) if names else c
|
||||
if labels or conf[j] > 0.25: # 0.25 conf thresh
|
||||
label = f'{c}' if labels else f'{c} {conf[j]:.1f}'
|
||||
annotator.box_label(box, label, color=color)
|
||||
annotator.box_label(box, label, color=color, rotated=is_obb)
|
||||
|
||||
elif len(classes):
|
||||
for c in classes:
|
||||
color = colors(c)
|
||||
|
|
@ -847,7 +860,18 @@ def output_to_target(output, max_det=300):
|
|||
j = torch.full((conf.shape[0], 1), i)
|
||||
targets.append(torch.cat((j, cls, ops.xyxy2xywh(box), conf), 1))
|
||||
targets = torch.cat(targets, 0).numpy()
|
||||
return targets[:, 0], targets[:, 1], targets[:, 2:]
|
||||
return targets[:, 0], targets[:, 1], targets[:, 2:-1], targets[:, -1]
|
||||
|
||||
|
||||
def output_to_rotated_target(output, max_det=300):
|
||||
"""Convert model output to target format [batch_id, class_id, x, y, w, h, conf] for plotting."""
|
||||
targets = []
|
||||
for i, o in enumerate(output):
|
||||
box, conf, cls, angle = o[:max_det].cpu().split((4, 1, 1, 1), 1)
|
||||
j = torch.full((conf.shape[0], 1), i)
|
||||
targets.append(torch.cat((j, cls, box, angle, conf), 1))
|
||||
targets = torch.cat(targets, 0).numpy()
|
||||
return targets[:, 0], targets[:, 1], targets[:, 2:-1], targets[:, -1]
|
||||
|
||||
|
||||
def feature_visualization(x, module_type, stage, n=32, save_dir=Path('runs/detect/exp')):
|
||||
|
|
|
|||
|
|
@ -4,59 +4,12 @@ import torch
|
|||
import torch.nn as nn
|
||||
|
||||
from .checks import check_version
|
||||
from .metrics import bbox_iou
|
||||
from .metrics import bbox_iou, probiou
|
||||
from .ops import xywhr2xyxyxyxy
|
||||
|
||||
TORCH_1_10 = check_version(torch.__version__, '1.10.0')
|
||||
|
||||
|
||||
def select_candidates_in_gts(xy_centers, gt_bboxes, eps=1e-9):
|
||||
"""
|
||||
Select the positive anchor center in gt.
|
||||
|
||||
Args:
|
||||
xy_centers (Tensor): shape(h*w, 2)
|
||||
gt_bboxes (Tensor): shape(b, n_boxes, 4)
|
||||
|
||||
Returns:
|
||||
(Tensor): shape(b, n_boxes, h*w)
|
||||
"""
|
||||
n_anchors = xy_centers.shape[0]
|
||||
bs, n_boxes, _ = gt_bboxes.shape
|
||||
lt, rb = gt_bboxes.view(-1, 1, 4).chunk(2, 2) # left-top, right-bottom
|
||||
bbox_deltas = torch.cat((xy_centers[None] - lt, rb - xy_centers[None]), dim=2).view(bs, n_boxes, n_anchors, -1)
|
||||
# return (bbox_deltas.min(3)[0] > eps).to(gt_bboxes.dtype)
|
||||
return bbox_deltas.amin(3).gt_(eps)
|
||||
|
||||
|
||||
def select_highest_overlaps(mask_pos, overlaps, n_max_boxes):
|
||||
"""
|
||||
If an anchor box is assigned to multiple gts, the one with the highest IoI will be selected.
|
||||
|
||||
Args:
|
||||
mask_pos (Tensor): shape(b, n_max_boxes, h*w)
|
||||
overlaps (Tensor): shape(b, n_max_boxes, h*w)
|
||||
|
||||
Returns:
|
||||
target_gt_idx (Tensor): shape(b, h*w)
|
||||
fg_mask (Tensor): shape(b, h*w)
|
||||
mask_pos (Tensor): shape(b, n_max_boxes, h*w)
|
||||
"""
|
||||
# (b, n_max_boxes, h*w) -> (b, h*w)
|
||||
fg_mask = mask_pos.sum(-2)
|
||||
if fg_mask.max() > 1: # one anchor is assigned to multiple gt_bboxes
|
||||
mask_multi_gts = (fg_mask.unsqueeze(1) > 1).expand(-1, n_max_boxes, -1) # (b, n_max_boxes, h*w)
|
||||
max_overlaps_idx = overlaps.argmax(1) # (b, h*w)
|
||||
|
||||
is_max_overlaps = torch.zeros(mask_pos.shape, dtype=mask_pos.dtype, device=mask_pos.device)
|
||||
is_max_overlaps.scatter_(1, max_overlaps_idx.unsqueeze(1), 1)
|
||||
|
||||
mask_pos = torch.where(mask_multi_gts, is_max_overlaps, mask_pos).float() # (b, n_max_boxes, h*w)
|
||||
fg_mask = mask_pos.sum(-2)
|
||||
# Find each grid serve which gt(index)
|
||||
target_gt_idx = mask_pos.argmax(-2) # (b, h*w)
|
||||
return target_gt_idx, fg_mask, mask_pos
|
||||
|
||||
|
||||
class TaskAlignedAssigner(nn.Module):
|
||||
"""
|
||||
A task-aligned assigner for object detection.
|
||||
|
|
@ -115,7 +68,7 @@ class TaskAlignedAssigner(nn.Module):
|
|||
mask_pos, align_metric, overlaps = self.get_pos_mask(pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points,
|
||||
mask_gt)
|
||||
|
||||
target_gt_idx, fg_mask, mask_pos = select_highest_overlaps(mask_pos, overlaps, self.n_max_boxes)
|
||||
target_gt_idx, fg_mask, mask_pos = self.select_highest_overlaps(mask_pos, overlaps, self.n_max_boxes)
|
||||
|
||||
# Assigned target
|
||||
target_labels, target_bboxes, target_scores = self.get_targets(gt_labels, gt_bboxes, target_gt_idx, fg_mask)
|
||||
|
|
@ -131,7 +84,7 @@ class TaskAlignedAssigner(nn.Module):
|
|||
|
||||
def get_pos_mask(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt):
|
||||
"""Get in_gts mask, (b, max_num_obj, h*w)."""
|
||||
mask_in_gts = select_candidates_in_gts(anc_points, gt_bboxes)
|
||||
mask_in_gts = self.select_candidates_in_gts(anc_points, gt_bboxes)
|
||||
# Get anchor_align metric, (b, max_num_obj, h*w)
|
||||
align_metric, overlaps = self.get_box_metrics(pd_scores, pd_bboxes, gt_labels, gt_bboxes, mask_in_gts * mask_gt)
|
||||
# Get topk_metric mask, (b, max_num_obj, h*w)
|
||||
|
|
@ -157,11 +110,15 @@ class TaskAlignedAssigner(nn.Module):
|
|||
# (b, max_num_obj, 1, 4), (b, 1, h*w, 4)
|
||||
pd_boxes = pd_bboxes.unsqueeze(1).expand(-1, self.n_max_boxes, -1, -1)[mask_gt]
|
||||
gt_boxes = gt_bboxes.unsqueeze(2).expand(-1, -1, na, -1)[mask_gt]
|
||||
overlaps[mask_gt] = bbox_iou(gt_boxes, pd_boxes, xywh=False, CIoU=True).squeeze(-1).clamp_(0)
|
||||
overlaps[mask_gt] = self.iou_calculation(gt_boxes, pd_boxes)
|
||||
|
||||
align_metric = bbox_scores.pow(self.alpha) * overlaps.pow(self.beta)
|
||||
return align_metric, overlaps
|
||||
|
||||
def iou_calculation(self, gt_bboxes, pd_bboxes):
|
||||
"""Iou calculation for horizontal bounding boxes."""
|
||||
return bbox_iou(gt_bboxes, pd_bboxes, xywh=False, CIoU=True).squeeze(-1).clamp_(0)
|
||||
|
||||
def select_topk_candidates(self, metrics, largest=True, topk_mask=None):
|
||||
"""
|
||||
Select the top-k candidates based on the given metrics.
|
||||
|
|
@ -229,7 +186,7 @@ class TaskAlignedAssigner(nn.Module):
|
|||
target_labels = gt_labels.long().flatten()[target_gt_idx] # (b, h*w)
|
||||
|
||||
# Assigned target boxes, (b, max_num_obj, 4) -> (b, h*w, 4)
|
||||
target_bboxes = gt_bboxes.view(-1, 4)[target_gt_idx]
|
||||
target_bboxes = gt_bboxes.view(-1, gt_bboxes.shape[-1])[target_gt_idx]
|
||||
|
||||
# Assigned target scores
|
||||
target_labels.clamp_(0)
|
||||
|
|
@ -245,6 +202,89 @@ class TaskAlignedAssigner(nn.Module):
|
|||
|
||||
return target_labels, target_bboxes, target_scores
|
||||
|
||||
@staticmethod
|
||||
def select_candidates_in_gts(xy_centers, gt_bboxes, eps=1e-9):
|
||||
"""
|
||||
Select the positive anchor center in gt.
|
||||
|
||||
Args:
|
||||
xy_centers (Tensor): shape(h*w, 2)
|
||||
gt_bboxes (Tensor): shape(b, n_boxes, 4)
|
||||
|
||||
Returns:
|
||||
(Tensor): shape(b, n_boxes, h*w)
|
||||
"""
|
||||
n_anchors = xy_centers.shape[0]
|
||||
bs, n_boxes, _ = gt_bboxes.shape
|
||||
lt, rb = gt_bboxes.view(-1, 1, 4).chunk(2, 2) # left-top, right-bottom
|
||||
bbox_deltas = torch.cat((xy_centers[None] - lt, rb - xy_centers[None]), dim=2).view(bs, n_boxes, n_anchors, -1)
|
||||
# return (bbox_deltas.min(3)[0] > eps).to(gt_bboxes.dtype)
|
||||
return bbox_deltas.amin(3).gt_(eps)
|
||||
|
||||
@staticmethod
|
||||
def select_highest_overlaps(mask_pos, overlaps, n_max_boxes):
|
||||
"""
|
||||
If an anchor box is assigned to multiple gts, the one with the highest IoI will be selected.
|
||||
|
||||
Args:
|
||||
mask_pos (Tensor): shape(b, n_max_boxes, h*w)
|
||||
overlaps (Tensor): shape(b, n_max_boxes, h*w)
|
||||
|
||||
Returns:
|
||||
target_gt_idx (Tensor): shape(b, h*w)
|
||||
fg_mask (Tensor): shape(b, h*w)
|
||||
mask_pos (Tensor): shape(b, n_max_boxes, h*w)
|
||||
"""
|
||||
# (b, n_max_boxes, h*w) -> (b, h*w)
|
||||
fg_mask = mask_pos.sum(-2)
|
||||
if fg_mask.max() > 1: # one anchor is assigned to multiple gt_bboxes
|
||||
mask_multi_gts = (fg_mask.unsqueeze(1) > 1).expand(-1, n_max_boxes, -1) # (b, n_max_boxes, h*w)
|
||||
max_overlaps_idx = overlaps.argmax(1) # (b, h*w)
|
||||
|
||||
is_max_overlaps = torch.zeros(mask_pos.shape, dtype=mask_pos.dtype, device=mask_pos.device)
|
||||
is_max_overlaps.scatter_(1, max_overlaps_idx.unsqueeze(1), 1)
|
||||
|
||||
mask_pos = torch.where(mask_multi_gts, is_max_overlaps, mask_pos).float() # (b, n_max_boxes, h*w)
|
||||
fg_mask = mask_pos.sum(-2)
|
||||
# Find each grid serve which gt(index)
|
||||
target_gt_idx = mask_pos.argmax(-2) # (b, h*w)
|
||||
return target_gt_idx, fg_mask, mask_pos
|
||||
|
||||
|
||||
class RotatedTaskAlignedAssigner(TaskAlignedAssigner):
|
||||
|
||||
def iou_calculation(self, gt_bboxes, pd_bboxes):
|
||||
"""Iou calculation for rotated bounding boxes."""
|
||||
return probiou(gt_bboxes, pd_bboxes).squeeze(-1).clamp_(0)
|
||||
|
||||
@staticmethod
|
||||
def select_candidates_in_gts(xy_centers, gt_bboxes):
|
||||
"""
|
||||
Select the positive anchor center in gt for rotated bounding boxes.
|
||||
|
||||
Args:
|
||||
xy_centers (Tensor): shape(h*w, 2)
|
||||
gt_bboxes (Tensor): shape(b, n_boxes, 5)
|
||||
|
||||
Returns:
|
||||
(Tensor): shape(b, n_boxes, h*w)
|
||||
"""
|
||||
# (b, n_boxes, 5) --> (b, n_boxes, 4, 2)
|
||||
corners = xywhr2xyxyxyxy(gt_bboxes)
|
||||
# (b, n_boxes, 1, 2)
|
||||
a, b, _, d = corners.split(1, dim=-2)
|
||||
ab = b - a
|
||||
ad = d - a
|
||||
|
||||
# (b, n_boxes, h*w, 2)
|
||||
ap = xy_centers - a
|
||||
norm_ab = (ab * ab).sum(dim=-1)
|
||||
norm_ad = (ad * ad).sum(dim=-1)
|
||||
ap_dot_ab = (ap * ab).sum(dim=-1)
|
||||
ap_dot_ad = (ap * ad).sum(dim=-1)
|
||||
is_in_box = (ap_dot_ab >= 0) & (ap_dot_ab <= norm_ab) & (ap_dot_ad >= 0) & (ap_dot_ad <= norm_ad)
|
||||
return is_in_box
|
||||
|
||||
|
||||
def make_anchors(feats, strides, grid_cell_offset=0.5):
|
||||
"""Generate anchors from features."""
|
||||
|
|
@ -277,3 +317,23 @@ def bbox2dist(anchor_points, bbox, reg_max):
|
|||
"""Transform bbox(xyxy) to dist(ltrb)."""
|
||||
x1y1, x2y2 = bbox.chunk(2, -1)
|
||||
return torch.cat((anchor_points - x1y1, x2y2 - anchor_points), -1).clamp_(0, reg_max - 0.01) # dist (lt, rb)
|
||||
|
||||
|
||||
def dist2rbox(pred_dist, pred_angle, anchor_points, dim=-1):
|
||||
"""
|
||||
Decode predicted object bounding box coordinates from anchor points and distribution.
|
||||
|
||||
Args:
|
||||
pred_dist (torch.Tensor): Predicted rotated distance, (bs, h*w, 4).
|
||||
pred_angle (torch.Tensor): Predicted angle, (bs, h*w, 1).
|
||||
anchor_points (torch.Tensor): Anchor points, (h*w, 2).
|
||||
Returns:
|
||||
(torch.Tensor): Predicted rotated bounding boxes, (bs, h*w, 4).
|
||||
"""
|
||||
lt, rb = pred_dist.split(2, dim=dim)
|
||||
cos, sin = torch.cos(pred_angle), torch.sin(pred_angle)
|
||||
# (bs, h*w, 1)
|
||||
xf, yf = ((rb - lt) / 2).split(1, dim=dim)
|
||||
x, y = xf * cos - yf * sin, xf * sin + yf * cos
|
||||
xy = torch.cat([x, y], dim=dim) + anchor_points
|
||||
return torch.cat([xy, lt + rb], dim=dim)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue