ultralytics 8.0.235 YOLOv8 OBB train, val, predict and export (#4499)
Co-authored-by: Yash Khurana <ykhurana6@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Swamita Gupta <swamita2001@gmail.com> Co-authored-by: Ayush Chaurasia <ayush.chaurarsia@gmail.com> Co-authored-by: Laughing-q <1185102784@qq.com> Co-authored-by: Laughing <61612323+Laughing-q@users.noreply.github.com> Co-authored-by: Laughing-q <1182102784@qq.com>
This commit is contained in:
parent
f702b34a50
commit
072291bc78
52 changed files with 2090 additions and 524 deletions
|
|
@ -1,7 +1,5 @@
|
|||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
|
||||
from ultralytics.data import YOLODataset
|
||||
|
|
@ -22,7 +20,7 @@ class RTDETRDataset(YOLODataset):
|
|||
|
||||
def __init__(self, *args, data=None, **kwargs):
|
||||
"""Initialize the RTDETRDataset class by inheriting from the YOLODataset class."""
|
||||
super().__init__(*args, data=data, use_segments=False, use_keypoints=False, **kwargs)
|
||||
super().__init__(*args, data=data, **kwargs)
|
||||
|
||||
# NOTE: add stretch version load_image for RTDETR mosaic
|
||||
def load_image(self, i, rect_mode=False):
|
||||
|
|
@ -108,47 +106,22 @@ class RTDETRValidator(DetectionValidator):
|
|||
|
||||
return outputs
|
||||
|
||||
def update_metrics(self, preds, batch):
|
||||
"""Metrics."""
|
||||
for si, pred in enumerate(preds):
|
||||
idx = batch['batch_idx'] == si
|
||||
cls = batch['cls'][idx]
|
||||
bbox = batch['bboxes'][idx]
|
||||
nl, npr = cls.shape[0], pred.shape[0] # number of labels, predictions
|
||||
shape = batch['ori_shape'][si]
|
||||
correct_bboxes = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init
|
||||
self.seen += 1
|
||||
def _prepare_batch(self, si, batch):
|
||||
idx = batch['batch_idx'] == si
|
||||
cls = batch['cls'][idx].squeeze(-1)
|
||||
bbox = batch['bboxes'][idx]
|
||||
ori_shape = batch['ori_shape'][si]
|
||||
imgsz = batch['img'].shape[2:]
|
||||
ratio_pad = batch['ratio_pad'][si]
|
||||
if len(cls):
|
||||
bbox = ops.xywh2xyxy(bbox) # target boxes
|
||||
bbox[..., [0, 2]] *= ori_shape[1] # native-space pred
|
||||
bbox[..., [1, 3]] *= ori_shape[0] # native-space pred
|
||||
prepared_batch = dict(cls=cls, bbox=bbox, ori_shape=ori_shape, imgsz=imgsz, ratio_pad=ratio_pad)
|
||||
return prepared_batch
|
||||
|
||||
if npr == 0:
|
||||
if nl:
|
||||
self.stats.append((correct_bboxes, *torch.zeros((2, 0), device=self.device), cls.squeeze(-1)))
|
||||
if self.args.plots:
|
||||
self.confusion_matrix.process_batch(detections=None, labels=cls.squeeze(-1))
|
||||
continue
|
||||
|
||||
# Predictions
|
||||
if self.args.single_cls:
|
||||
pred[:, 5] = 0
|
||||
predn = pred.clone()
|
||||
predn[..., [0, 2]] *= shape[1] / self.args.imgsz # native-space pred
|
||||
predn[..., [1, 3]] *= shape[0] / self.args.imgsz # native-space pred
|
||||
|
||||
# Evaluate
|
||||
if nl:
|
||||
tbox = ops.xywh2xyxy(bbox) # target boxes
|
||||
tbox[..., [0, 2]] *= shape[1] # native-space pred
|
||||
tbox[..., [1, 3]] *= shape[0] # native-space pred
|
||||
labelsn = torch.cat((cls, tbox), 1) # native-space labels
|
||||
# NOTE: To get correct metrics, the inputs of `_process_batch` should always be float32 type.
|
||||
correct_bboxes = self._process_batch(predn.float(), labelsn)
|
||||
# TODO: maybe remove these `self.` arguments as they already are member variable
|
||||
if self.args.plots:
|
||||
self.confusion_matrix.process_batch(predn, labelsn)
|
||||
self.stats.append((correct_bboxes, pred[:, 4], pred[:, 5], cls.squeeze(-1))) # (conf, pcls, tcls)
|
||||
|
||||
# Save
|
||||
if self.args.save_json:
|
||||
self.pred_to_json(predn, batch['im_file'][si])
|
||||
if self.args.save_txt:
|
||||
file = self.save_dir / 'labels' / f'{Path(batch["im_file"][si]).stem}.txt'
|
||||
self.save_one_txt(predn, self.args.save_conf, shape, file)
|
||||
def _prepare_pred(self, pred, pbatch):
|
||||
predn = pred.clone()
|
||||
predn[..., [0, 2]] *= pbatch['ori_shape'][1] / self.args.imgsz # native-space pred
|
||||
predn[..., [1, 3]] *= pbatch['ori_shape'][0] / self.args.imgsz # native-space pred
|
||||
return predn.float()
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
from ultralytics.models.yolo import classify, detect, pose, segment
|
||||
from ultralytics.models.yolo import classify, detect, obb, pose, segment
|
||||
|
||||
from .model import YOLO
|
||||
|
||||
__all__ = 'classify', 'segment', 'detect', 'pose', 'YOLO'
|
||||
__all__ = 'classify', 'segment', 'detect', 'pose', 'obb', 'YOLO'
|
||||
|
|
|
|||
|
|
@ -1,8 +1,11 @@
|
|||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
import math
|
||||
import random
|
||||
from copy import copy
|
||||
|
||||
import numpy as np
|
||||
import torch.nn as nn
|
||||
|
||||
from ultralytics.data import build_dataloader, build_yolo_dataset
|
||||
from ultralytics.engine.trainer import BaseTrainer
|
||||
|
|
@ -54,6 +57,16 @@ class DetectionTrainer(BaseTrainer):
|
|||
def preprocess_batch(self, batch):
|
||||
"""Preprocesses a batch of images by scaling and converting to float."""
|
||||
batch['img'] = batch['img'].to(self.device, non_blocking=True).float() / 255
|
||||
if self.args.multi_scale:
|
||||
imgs = batch['img']
|
||||
sz = (random.randrange(self.args.imgsz * 0.5, self.args.imgsz * 1.5 + self.stride) // self.stride *
|
||||
self.stride) # size
|
||||
sf = sz / max(imgs.shape[2:]) # scale factor
|
||||
if sf != 1:
|
||||
ns = [math.ceil(x * sf / self.stride) * self.stride
|
||||
for x in imgs.shape[2:]] # new shape (stretched to gs-multiple)
|
||||
imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
|
||||
batch['img'] = imgs
|
||||
return batch
|
||||
|
||||
def set_model_attributes(self):
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ class DetectionValidator(BaseValidator):
|
|||
self.confusion_matrix = ConfusionMatrix(nc=self.nc, conf=self.args.conf)
|
||||
self.seen = 0
|
||||
self.jdict = []
|
||||
self.stats = []
|
||||
self.stats = dict(tp=[], conf=[], pred_cls=[], target_cls=[])
|
||||
|
||||
def get_desc(self):
|
||||
"""Return a formatted string summarizing class metrics of YOLO model."""
|
||||
|
|
@ -86,51 +86,68 @@ class DetectionValidator(BaseValidator):
|
|||
agnostic=self.args.single_cls,
|
||||
max_det=self.args.max_det)
|
||||
|
||||
def _prepare_batch(self, si, batch):
|
||||
idx = batch['batch_idx'] == si
|
||||
cls = batch['cls'][idx].squeeze(-1)
|
||||
bbox = batch['bboxes'][idx]
|
||||
ori_shape = batch['ori_shape'][si]
|
||||
imgsz = batch['img'].shape[2:]
|
||||
ratio_pad = batch['ratio_pad'][si]
|
||||
if len(cls):
|
||||
bbox = ops.xywh2xyxy(bbox) * torch.tensor(imgsz, device=self.device)[[1, 0, 1, 0]] # target boxes
|
||||
ops.scale_boxes(imgsz, bbox, ori_shape, ratio_pad=ratio_pad) # native-space labels
|
||||
prepared_batch = dict(cls=cls, bbox=bbox, ori_shape=ori_shape, imgsz=imgsz, ratio_pad=ratio_pad)
|
||||
return prepared_batch
|
||||
|
||||
def _prepare_pred(self, pred, pbatch):
|
||||
predn = pred.clone()
|
||||
ops.scale_boxes(pbatch['imgsz'], predn[:, :4], pbatch['ori_shape'],
|
||||
ratio_pad=pbatch['ratio_pad']) # native-space pred
|
||||
return predn
|
||||
|
||||
def update_metrics(self, preds, batch):
|
||||
"""Metrics."""
|
||||
for si, pred in enumerate(preds):
|
||||
idx = batch['batch_idx'] == si
|
||||
cls = batch['cls'][idx]
|
||||
bbox = batch['bboxes'][idx]
|
||||
nl, npr = cls.shape[0], pred.shape[0] # number of labels, predictions
|
||||
shape = batch['ori_shape'][si]
|
||||
correct_bboxes = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init
|
||||
self.seen += 1
|
||||
|
||||
npr = len(pred)
|
||||
stat = dict(conf=torch.zeros(0, device=self.device),
|
||||
pred_cls=torch.zeros(0, device=self.device),
|
||||
tp=torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device))
|
||||
pbatch = self._prepare_batch(si, batch)
|
||||
cls, bbox = pbatch.pop('cls'), pbatch.pop('bbox')
|
||||
nl = len(cls)
|
||||
stat['target_cls'] = cls
|
||||
if npr == 0:
|
||||
if nl:
|
||||
self.stats.append((correct_bboxes, *torch.zeros((2, 0), device=self.device), cls.squeeze(-1)))
|
||||
if self.args.plots:
|
||||
self.confusion_matrix.process_batch(detections=None, labels=cls.squeeze(-1))
|
||||
for k in self.stats.keys():
|
||||
self.stats[k].append(stat[k])
|
||||
# TODO: obb has not supported confusion_matrix yet.
|
||||
if self.args.plots and self.args.task != 'obb':
|
||||
self.confusion_matrix.process_batch(detections=None, gt_bboxes=bbox, gt_cls=cls)
|
||||
continue
|
||||
|
||||
# Predictions
|
||||
if self.args.single_cls:
|
||||
pred[:, 5] = 0
|
||||
predn = pred.clone()
|
||||
ops.scale_boxes(batch['img'][si].shape[1:], predn[:, :4], shape,
|
||||
ratio_pad=batch['ratio_pad'][si]) # native-space pred
|
||||
predn = self._prepare_pred(pred, pbatch)
|
||||
stat['conf'] = predn[:, 4]
|
||||
stat['pred_cls'] = predn[:, 5]
|
||||
|
||||
# Evaluate
|
||||
if nl:
|
||||
height, width = batch['img'].shape[2:]
|
||||
tbox = ops.xywh2xyxy(bbox) * torch.tensor(
|
||||
(width, height, width, height), device=self.device) # target boxes
|
||||
ops.scale_boxes(batch['img'][si].shape[1:], tbox, shape,
|
||||
ratio_pad=batch['ratio_pad'][si]) # native-space labels
|
||||
labelsn = torch.cat((cls, tbox), 1) # native-space labels
|
||||
correct_bboxes = self._process_batch(predn, labelsn)
|
||||
# TODO: maybe remove these `self.` arguments as they already are member variable
|
||||
if self.args.plots:
|
||||
self.confusion_matrix.process_batch(predn, labelsn)
|
||||
self.stats.append((correct_bboxes, pred[:, 4], pred[:, 5], cls.squeeze(-1))) # (conf, pcls, tcls)
|
||||
stat['tp'] = self._process_batch(predn, bbox, cls)
|
||||
# TODO: obb has not supported confusion_matrix yet.
|
||||
if self.args.plots and self.args.task != 'obb':
|
||||
self.confusion_matrix.process_batch(predn, bbox, cls)
|
||||
for k in self.stats.keys():
|
||||
self.stats[k].append(stat[k])
|
||||
|
||||
# Save
|
||||
if self.args.save_json:
|
||||
self.pred_to_json(predn, batch['im_file'][si])
|
||||
if self.args.save_txt:
|
||||
file = self.save_dir / 'labels' / f'{Path(batch["im_file"][si]).stem}.txt'
|
||||
self.save_one_txt(predn, self.args.save_conf, shape, file)
|
||||
self.save_one_txt(predn, self.args.save_conf, pbatch['ori_shape'], file)
|
||||
|
||||
def finalize_metrics(self, *args, **kwargs):
|
||||
"""Set final values for metrics speed and confusion matrix."""
|
||||
|
|
@ -139,10 +156,11 @@ class DetectionValidator(BaseValidator):
|
|||
|
||||
def get_stats(self):
|
||||
"""Returns metrics statistics and results dictionary."""
|
||||
stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*self.stats)] # to numpy
|
||||
if len(stats) and stats[0].any():
|
||||
self.metrics.process(*stats)
|
||||
self.nt_per_class = np.bincount(stats[-1].astype(int), minlength=self.nc) # number of targets per class
|
||||
stats = {k: torch.cat(v, 0).cpu().numpy() for k, v in self.stats.items()} # to numpy
|
||||
if len(stats) and stats['tp'].any():
|
||||
self.metrics.process(**stats)
|
||||
self.nt_per_class = np.bincount(stats['target_cls'].astype(int),
|
||||
minlength=self.nc) # number of targets per class
|
||||
return self.metrics.results_dict
|
||||
|
||||
def print_results(self):
|
||||
|
|
@ -165,7 +183,7 @@ class DetectionValidator(BaseValidator):
|
|||
normalize=normalize,
|
||||
on_plot=self.on_plot)
|
||||
|
||||
def _process_batch(self, detections, labels):
|
||||
def _process_batch(self, detections, gt_bboxes, gt_cls):
|
||||
"""
|
||||
Return correct prediction matrix.
|
||||
|
||||
|
|
@ -178,8 +196,8 @@ class DetectionValidator(BaseValidator):
|
|||
Returns:
|
||||
(torch.Tensor): Correct prediction matrix of shape [N, 10] for 10 IoU levels.
|
||||
"""
|
||||
iou = box_iou(labels[:, 1:], detections[:, :4])
|
||||
return self.match_predictions(detections[:, 5], labels[:, 0], iou)
|
||||
iou = box_iou(gt_bboxes, detections[:, :4])
|
||||
return self.match_predictions(detections[:, 5], gt_cls, iou)
|
||||
|
||||
def build_dataset(self, img_path, mode='val', batch=None):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
from ultralytics.engine.model import Model
|
||||
from ultralytics.models import yolo # noqa
|
||||
from ultralytics.nn.tasks import ClassificationModel, DetectionModel, PoseModel, SegmentationModel
|
||||
from ultralytics.models import yolo
|
||||
from ultralytics.nn.tasks import ClassificationModel, DetectionModel, OBBModel, PoseModel, SegmentationModel
|
||||
|
||||
|
||||
class YOLO(Model):
|
||||
|
|
@ -31,4 +31,9 @@ class YOLO(Model):
|
|||
'model': PoseModel,
|
||||
'trainer': yolo.pose.PoseTrainer,
|
||||
'validator': yolo.pose.PoseValidator,
|
||||
'predictor': yolo.pose.PosePredictor, }, }
|
||||
'predictor': yolo.pose.PosePredictor, },
|
||||
'obb': {
|
||||
'model': OBBModel,
|
||||
'trainer': yolo.obb.OBBTrainer,
|
||||
'validator': yolo.obb.OBBValidator,
|
||||
'predictor': yolo.obb.OBBPredictor, }, }
|
||||
|
|
|
|||
7
ultralytics/models/yolo/obb/__init__.py
Normal file
7
ultralytics/models/yolo/obb/__init__.py
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
from .predict import OBBPredictor
|
||||
from .train import OBBTrainer
|
||||
from .val import OBBValidator
|
||||
|
||||
__all__ = 'OBBPredictor', 'OBBTrainer', 'OBBValidator'
|
||||
51
ultralytics/models/yolo/obb/predict.py
Normal file
51
ultralytics/models/yolo/obb/predict.py
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
import torch
|
||||
|
||||
from ultralytics.engine.results import Results
|
||||
from ultralytics.models.yolo.detect.predict import DetectionPredictor
|
||||
from ultralytics.utils import DEFAULT_CFG, ops
|
||||
|
||||
|
||||
class OBBPredictor(DetectionPredictor):
|
||||
"""
|
||||
A class extending the DetectionPredictor class for prediction based on an Oriented Bounding Box (OBB) model.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from ultralytics.utils import ASSETS
|
||||
from ultralytics.models.yolo.obb import OBBPredictor
|
||||
|
||||
args = dict(model='yolov8n-obb.pt', source=ASSETS)
|
||||
predictor = OBBPredictor(overrides=args)
|
||||
predictor.predict_cli()
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
|
||||
super().__init__(cfg, overrides, _callbacks)
|
||||
self.args.task = 'obb'
|
||||
|
||||
def postprocess(self, preds, img, orig_imgs):
|
||||
"""Post-processes predictions and returns a list of Results objects."""
|
||||
preds = ops.non_max_suppression(preds,
|
||||
self.args.conf,
|
||||
self.args.iou,
|
||||
agnostic=self.args.agnostic_nms,
|
||||
max_det=self.args.max_det,
|
||||
nc=len(self.model.names),
|
||||
classes=self.args.classes,
|
||||
rotated=True)
|
||||
|
||||
if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list
|
||||
orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
|
||||
|
||||
results = []
|
||||
for i, pred in enumerate(preds):
|
||||
orig_img = orig_imgs[i]
|
||||
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape, xywh=True)
|
||||
img_path = self.batch[0][i]
|
||||
# xywh, r, conf, cls
|
||||
obb = torch.cat([pred[:, :4], pred[:, -1:], pred[:, 4:6]], dim=-1)
|
||||
results.append(Results(orig_img, path=img_path, names=self.model.names, obb=obb))
|
||||
return results
|
||||
42
ultralytics/models/yolo/obb/train.py
Normal file
42
ultralytics/models/yolo/obb/train.py
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
from copy import copy
|
||||
|
||||
from ultralytics.models import yolo
|
||||
from ultralytics.nn.tasks import OBBModel
|
||||
from ultralytics.utils import DEFAULT_CFG, RANK
|
||||
|
||||
|
||||
class OBBTrainer(yolo.detect.DetectionTrainer):
|
||||
"""
|
||||
A class extending the DetectionTrainer class for training based on an Oriented Bounding Box (OBB) model.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from ultralytics.models.yolo.obb import OBBTrainer
|
||||
|
||||
args = dict(model='yolov8n-seg.pt', data='coco8-seg.yaml', epochs=3)
|
||||
trainer = OBBTrainer(overrides=args)
|
||||
trainer.train()
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
|
||||
"""Initialize a OBBTrainer object with given arguments."""
|
||||
if overrides is None:
|
||||
overrides = {}
|
||||
overrides['task'] = 'obb'
|
||||
super().__init__(cfg, overrides, _callbacks)
|
||||
|
||||
def get_model(self, cfg=None, weights=None, verbose=True):
|
||||
"""Return OBBModel initialized with specified config and weights."""
|
||||
model = OBBModel(cfg, ch=3, nc=self.data['nc'], verbose=verbose and RANK == -1)
|
||||
if weights:
|
||||
model.load(weights)
|
||||
|
||||
return model
|
||||
|
||||
def get_validator(self):
|
||||
"""Return an instance of OBBValidator for validation of YOLO model."""
|
||||
self.loss_names = 'box_loss', 'cls_loss', 'dfl_loss'
|
||||
return yolo.obb.OBBValidator(self.test_loader, save_dir=self.save_dir, args=copy(self.args))
|
||||
187
ultralytics/models/yolo/obb/val.py
Normal file
187
ultralytics/models/yolo/obb/val.py
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
|
||||
from ultralytics.models.yolo.detect import DetectionValidator
|
||||
from ultralytics.utils import LOGGER, ops
|
||||
from ultralytics.utils.metrics import OBBMetrics, batch_probiou
|
||||
from ultralytics.utils.plotting import output_to_rotated_target, plot_images
|
||||
|
||||
|
||||
class OBBValidator(DetectionValidator):
|
||||
"""
|
||||
A class extending the DetectionValidator class for validation based on an Oriented Bounding Box (OBB) model.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from ultralytics.models.yolo.obb import OBBValidator
|
||||
|
||||
args = dict(model='yolov8n-obb.pt', data='coco8-seg.yaml')
|
||||
validator = OBBValidator(args=args)
|
||||
validator(model=args['model'])
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None):
|
||||
"""Initialize OBBValidator and set task to 'obb', metrics to OBBMetrics."""
|
||||
super().__init__(dataloader, save_dir, pbar, args, _callbacks)
|
||||
self.args.task = 'obb'
|
||||
self.metrics = OBBMetrics(save_dir=self.save_dir, plot=True, on_plot=self.on_plot)
|
||||
|
||||
def init_metrics(self, model):
|
||||
"""Initialize evaluation metrics for YOLO."""
|
||||
super().init_metrics(model)
|
||||
val = self.data.get(self.args.split, '') # validation path
|
||||
self.is_dota = isinstance(val, str) and 'DOTA' in val # is COCO
|
||||
|
||||
def postprocess(self, preds):
|
||||
"""Apply Non-maximum suppression to prediction outputs."""
|
||||
return ops.non_max_suppression(preds,
|
||||
self.args.conf,
|
||||
self.args.iou,
|
||||
labels=self.lb,
|
||||
nc=self.nc,
|
||||
multi_label=True,
|
||||
agnostic=self.args.single_cls,
|
||||
max_det=self.args.max_det,
|
||||
rotated=True)
|
||||
|
||||
def _process_batch(self, detections, gt_bboxes, gt_cls):
|
||||
"""
|
||||
Return correct prediction matrix.
|
||||
|
||||
Args:
|
||||
detections (torch.Tensor): Tensor of shape [N, 6] representing detections.
|
||||
Each detection is of the format: x1, y1, x2, y2, conf, class.
|
||||
labels (torch.Tensor): Tensor of shape [M, 5] representing labels.
|
||||
Each label is of the format: class, x1, y1, x2, y2.
|
||||
|
||||
Returns:
|
||||
(torch.Tensor): Correct prediction matrix of shape [N, 10] for 10 IoU levels.
|
||||
"""
|
||||
iou = batch_probiou(gt_bboxes, torch.cat([detections[:, :4], detections[:, -2:-1]], dim=-1))
|
||||
return self.match_predictions(detections[:, 5], gt_cls, iou)
|
||||
|
||||
def _prepare_batch(self, si, batch):
|
||||
idx = batch['batch_idx'] == si
|
||||
cls = batch['cls'][idx].squeeze(-1)
|
||||
bbox = batch['bboxes'][idx]
|
||||
ori_shape = batch['ori_shape'][si]
|
||||
imgsz = batch['img'].shape[2:]
|
||||
ratio_pad = batch['ratio_pad'][si]
|
||||
if len(cls):
|
||||
bbox[..., :4].mul_(torch.tensor(imgsz, device=self.device)[[1, 0, 1, 0]]) # target boxes
|
||||
ops.scale_boxes(imgsz, bbox, ori_shape, ratio_pad=ratio_pad, xywh=True) # native-space labels
|
||||
prepared_batch = dict(cls=cls, bbox=bbox, ori_shape=ori_shape, imgsz=imgsz, ratio_pad=ratio_pad)
|
||||
return prepared_batch
|
||||
|
||||
def _prepare_pred(self, pred, pbatch):
|
||||
predn = pred.clone()
|
||||
ops.scale_boxes(pbatch['imgsz'], predn[:, :4], pbatch['ori_shape'], ratio_pad=pbatch['ratio_pad'],
|
||||
xywh=True) # native-space pred
|
||||
return predn
|
||||
|
||||
def plot_predictions(self, batch, preds, ni):
|
||||
"""Plots predicted bounding boxes on input images and saves the result."""
|
||||
plot_images(batch['img'],
|
||||
*output_to_rotated_target(preds, max_det=self.args.max_det),
|
||||
paths=batch['im_file'],
|
||||
fname=self.save_dir / f'val_batch{ni}_pred.jpg',
|
||||
names=self.names,
|
||||
on_plot=self.on_plot) # pred
|
||||
|
||||
def pred_to_json(self, predn, filename):
|
||||
"""Serialize YOLO predictions to COCO json format."""
|
||||
stem = Path(filename).stem
|
||||
image_id = int(stem) if stem.isnumeric() else stem
|
||||
rbox = torch.cat([predn[:, :4], predn[:, -1:]], dim=-1)
|
||||
poly = ops.xywhr2xyxyxyxy(rbox).view(-1, 8)
|
||||
for i, (r, b) in enumerate(zip(rbox.tolist(), poly.tolist())):
|
||||
self.jdict.append({
|
||||
'image_id': image_id,
|
||||
'category_id': self.class_map[int(predn[i, 5].item())],
|
||||
'score': round(predn[i, 4].item(), 5),
|
||||
'rbox': [round(x, 3) for x in r],
|
||||
'poly': [round(x, 3) for x in b]})
|
||||
|
||||
def eval_json(self, stats):
|
||||
"""Evaluates YOLO output in JSON format and returns performance statistics."""
|
||||
if self.args.save_json and self.is_dota and len(self.jdict):
|
||||
import json
|
||||
import re
|
||||
from collections import defaultdict
|
||||
pred_json = self.save_dir / 'predictions.json' # predictions
|
||||
pred_txt = self.save_dir / 'predictions_txt' # predictions
|
||||
pred_txt.mkdir(parents=True, exist_ok=True)
|
||||
data = json.load(open(pred_json))
|
||||
# Save split results
|
||||
LOGGER.info(f'Saving predictions with DOTA format to {str(pred_txt)}...')
|
||||
for d in data:
|
||||
image_id = d['image_id']
|
||||
score = d['score']
|
||||
classname = self.names[d['category_id']].replace(' ', '-')
|
||||
|
||||
lines = '{} {} {} {} {} {} {} {} {} {}\n'.format(
|
||||
image_id,
|
||||
score,
|
||||
d['poly'][0],
|
||||
d['poly'][1],
|
||||
d['poly'][2],
|
||||
d['poly'][3],
|
||||
d['poly'][4],
|
||||
d['poly'][5],
|
||||
d['poly'][6],
|
||||
d['poly'][7],
|
||||
)
|
||||
with open(str(pred_txt / f'Task1_{classname}') + '.txt', 'a') as f:
|
||||
f.writelines(lines)
|
||||
# Save merged results, this could result slightly lower map than using official merging script,
|
||||
# because of the probiou calculation.
|
||||
pred_merged_txt = self.save_dir / 'predictions_merged_txt' # predictions
|
||||
pred_merged_txt.mkdir(parents=True, exist_ok=True)
|
||||
merged_results = defaultdict(list)
|
||||
LOGGER.info(f'Saving merged predictions with DOTA format to {str(pred_merged_txt)}...')
|
||||
for d in data:
|
||||
image_id = d['image_id'].split('__')[0]
|
||||
pattern = re.compile(r'\d+___\d+')
|
||||
x, y = (int(c) for c in re.findall(pattern, d['image_id'])[0].split('___'))
|
||||
bbox, score, cls = d['rbox'], d['score'], d['category_id']
|
||||
bbox[0] += x
|
||||
bbox[1] += y
|
||||
bbox.extend([score, cls])
|
||||
merged_results[image_id].append(bbox)
|
||||
for image_id, bbox in merged_results.items():
|
||||
bbox = torch.tensor(bbox)
|
||||
max_wh = torch.max(bbox[:, :2]).item() * 2
|
||||
c = bbox[:, 6:7] * max_wh # classes
|
||||
scores = bbox[:, 5] # scores
|
||||
b = bbox[:, :5].clone()
|
||||
b[:, :2] += c
|
||||
# 0.3 could get results close to the ones from official merging script, even slightly better.
|
||||
i = ops.nms_rotated(b, scores, 0.3)
|
||||
bbox = bbox[i]
|
||||
|
||||
b = ops.xywhr2xyxyxyxy(bbox[:, :5]).view(-1, 8)
|
||||
for x in torch.cat([b, bbox[:, 5:7]], dim=-1).tolist():
|
||||
classname = self.names[int(x[-1])].replace(' ', '-')
|
||||
poly = [round(i, 3) for i in x[:-2]]
|
||||
score = round(x[-2], 3)
|
||||
|
||||
lines = '{} {} {} {} {} {} {} {} {} {}\n'.format(
|
||||
image_id,
|
||||
score,
|
||||
poly[0],
|
||||
poly[1],
|
||||
poly[2],
|
||||
poly[3],
|
||||
poly[4],
|
||||
poly[5],
|
||||
poly[6],
|
||||
poly[7],
|
||||
)
|
||||
with open(str(pred_merged_txt / f'Task1_{classname}') + '.txt', 'a') as f:
|
||||
f.writelines(lines)
|
||||
|
||||
return stats
|
||||
|
|
@ -66,57 +66,63 @@ class PoseValidator(DetectionValidator):
|
|||
is_pose = self.kpt_shape == [17, 3]
|
||||
nkpt = self.kpt_shape[0]
|
||||
self.sigma = OKS_SIGMA if is_pose else np.ones(nkpt) / nkpt
|
||||
self.stats = dict(tp_p=[], tp=[], conf=[], pred_cls=[], target_cls=[])
|
||||
|
||||
def _prepare_batch(self, si, batch):
|
||||
pbatch = super()._prepare_batch(si, batch)
|
||||
kpts = batch['keypoints'][batch['batch_idx'] == si]
|
||||
h, w = pbatch['imgsz']
|
||||
kpts = kpts.clone()
|
||||
kpts[..., 0] *= w
|
||||
kpts[..., 1] *= h
|
||||
kpts = ops.scale_coords(pbatch['imgsz'], kpts, pbatch['ori_shape'], ratio_pad=pbatch['ratio_pad'])
|
||||
pbatch['kpts'] = kpts
|
||||
return pbatch
|
||||
|
||||
def _prepare_pred(self, pred, pbatch):
|
||||
predn = super()._prepare_pred(pred, pbatch)
|
||||
nk = pbatch['kpts'].shape[1]
|
||||
pred_kpts = predn[:, 6:].view(len(predn), nk, -1)
|
||||
ops.scale_coords(pbatch['imgsz'], pred_kpts, pbatch['ori_shape'], ratio_pad=pbatch['ratio_pad'])
|
||||
return predn, pred_kpts
|
||||
|
||||
def update_metrics(self, preds, batch):
|
||||
"""Metrics."""
|
||||
for si, pred in enumerate(preds):
|
||||
idx = batch['batch_idx'] == si
|
||||
cls = batch['cls'][idx]
|
||||
bbox = batch['bboxes'][idx]
|
||||
kpts = batch['keypoints'][idx]
|
||||
nl, npr = cls.shape[0], pred.shape[0] # number of labels, predictions
|
||||
nk = kpts.shape[1] # number of keypoints
|
||||
shape = batch['ori_shape'][si]
|
||||
correct_kpts = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init
|
||||
correct_bboxes = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init
|
||||
self.seen += 1
|
||||
|
||||
npr = len(pred)
|
||||
stat = dict(conf=torch.zeros(0, device=self.device),
|
||||
pred_cls=torch.zeros(0, device=self.device),
|
||||
tp=torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device),
|
||||
tp_p=torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device))
|
||||
pbatch = self._prepare_batch(si, batch)
|
||||
cls, bbox = pbatch.pop('cls'), pbatch.pop('bbox')
|
||||
nl = len(cls)
|
||||
stat['target_cls'] = cls
|
||||
if npr == 0:
|
||||
if nl:
|
||||
self.stats.append((correct_bboxes, correct_kpts, *torch.zeros(
|
||||
(2, 0), device=self.device), cls.squeeze(-1)))
|
||||
for k in self.stats.keys():
|
||||
self.stats[k].append(stat[k])
|
||||
if self.args.plots:
|
||||
self.confusion_matrix.process_batch(detections=None, labels=cls.squeeze(-1))
|
||||
self.confusion_matrix.process_batch(detections=None, gt_bboxes=bbox, gt_cls=cls)
|
||||
continue
|
||||
|
||||
# Predictions
|
||||
if self.args.single_cls:
|
||||
pred[:, 5] = 0
|
||||
predn = pred.clone()
|
||||
ops.scale_boxes(batch['img'][si].shape[1:], predn[:, :4], shape,
|
||||
ratio_pad=batch['ratio_pad'][si]) # native-space pred
|
||||
pred_kpts = predn[:, 6:].view(npr, nk, -1)
|
||||
ops.scale_coords(batch['img'][si].shape[1:], pred_kpts, shape, ratio_pad=batch['ratio_pad'][si])
|
||||
predn, pred_kpts = self._prepare_pred(pred, pbatch)
|
||||
stat['conf'] = predn[:, 4]
|
||||
stat['pred_cls'] = predn[:, 5]
|
||||
|
||||
# Evaluate
|
||||
if nl:
|
||||
height, width = batch['img'].shape[2:]
|
||||
tbox = ops.xywh2xyxy(bbox) * torch.tensor(
|
||||
(width, height, width, height), device=self.device) # target boxes
|
||||
ops.scale_boxes(batch['img'][si].shape[1:], tbox, shape,
|
||||
ratio_pad=batch['ratio_pad'][si]) # native-space labels
|
||||
tkpts = kpts.clone()
|
||||
tkpts[..., 0] *= width
|
||||
tkpts[..., 1] *= height
|
||||
tkpts = ops.scale_coords(batch['img'][si].shape[1:], tkpts, shape, ratio_pad=batch['ratio_pad'][si])
|
||||
labelsn = torch.cat((cls, tbox), 1) # native-space labels
|
||||
correct_bboxes = self._process_batch(predn[:, :6], labelsn)
|
||||
correct_kpts = self._process_batch(predn[:, :6], labelsn, pred_kpts, tkpts)
|
||||
stat['tp'] = self._process_batch(predn, bbox, cls)
|
||||
stat['tp_p'] = self._process_batch(predn, bbox, cls, pred_kpts, pbatch['kpts'])
|
||||
if self.args.plots:
|
||||
self.confusion_matrix.process_batch(predn, labelsn)
|
||||
self.confusion_matrix.process_batch(predn, bbox, cls)
|
||||
|
||||
# Append correct_masks, correct_boxes, pconf, pcls, tcls
|
||||
self.stats.append((correct_bboxes, correct_kpts, pred[:, 4], pred[:, 5], cls.squeeze(-1)))
|
||||
for k in self.stats.keys():
|
||||
self.stats[k].append(stat[k])
|
||||
|
||||
# Save
|
||||
if self.args.save_json:
|
||||
|
|
@ -124,7 +130,7 @@ class PoseValidator(DetectionValidator):
|
|||
# if self.args.save_txt:
|
||||
# save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / f'{path.stem}.txt')
|
||||
|
||||
def _process_batch(self, detections, labels, pred_kpts=None, gt_kpts=None):
|
||||
def _process_batch(self, detections, gt_bboxes, gt_cls, pred_kpts=None, gt_kpts=None):
|
||||
"""
|
||||
Return correct prediction matrix.
|
||||
|
||||
|
|
@ -142,12 +148,12 @@ class PoseValidator(DetectionValidator):
|
|||
"""
|
||||
if pred_kpts is not None and gt_kpts is not None:
|
||||
# `0.53` is from https://github.com/jin-s13/xtcocoapi/blob/master/xtcocotools/cocoeval.py#L384
|
||||
area = ops.xyxy2xywh(labels[:, 1:])[:, 2:].prod(1) * 0.53
|
||||
area = ops.xyxy2xywh(gt_bboxes)[:, 2:].prod(1) * 0.53
|
||||
iou = kpt_iou(gt_kpts, pred_kpts, sigma=self.sigma, area=area)
|
||||
else: # boxes
|
||||
iou = box_iou(labels[:, 1:], detections[:, :4])
|
||||
iou = box_iou(gt_bboxes, detections[:, :4])
|
||||
|
||||
return self.match_predictions(detections[:, 5], labels[:, 0], iou)
|
||||
return self.match_predictions(detections[:, 5], gt_cls, iou)
|
||||
|
||||
def plot_val_samples(self, batch, ni):
|
||||
"""Plots and saves validation set samples with predicted bounding boxes and keypoints."""
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ class SegmentationValidator(DetectionValidator):
|
|||
self.process = ops.process_mask_upsample # more accurate
|
||||
else:
|
||||
self.process = ops.process_mask # faster
|
||||
self.stats = dict(tp_m=[], tp=[], conf=[], pred_cls=[], target_cls=[])
|
||||
|
||||
def get_desc(self):
|
||||
"""Return a formatted description of evaluation metrics."""
|
||||
|
|
@ -70,59 +71,62 @@ class SegmentationValidator(DetectionValidator):
|
|||
proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported
|
||||
return p, proto
|
||||
|
||||
def _prepare_batch(self, si, batch):
|
||||
prepared_batch = super()._prepare_batch(si, batch)
|
||||
midx = [si] if self.args.overlap_mask else batch['batch_idx'] == si
|
||||
prepared_batch['masks'] = batch['masks'][midx]
|
||||
return prepared_batch
|
||||
|
||||
def _prepare_pred(self, pred, pbatch, proto):
|
||||
predn = super()._prepare_pred(pred, pbatch)
|
||||
pred_masks = self.process(proto, pred[:, 6:], pred[:, :4], shape=pbatch['imgsz'])
|
||||
return predn, pred_masks
|
||||
|
||||
def update_metrics(self, preds, batch):
|
||||
"""Metrics."""
|
||||
for si, (pred, proto) in enumerate(zip(preds[0], preds[1])):
|
||||
idx = batch['batch_idx'] == si
|
||||
cls = batch['cls'][idx]
|
||||
bbox = batch['bboxes'][idx]
|
||||
nl, npr = cls.shape[0], pred.shape[0] # number of labels, predictions
|
||||
shape = batch['ori_shape'][si]
|
||||
correct_masks = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init
|
||||
correct_bboxes = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init
|
||||
self.seen += 1
|
||||
|
||||
npr = len(pred)
|
||||
stat = dict(conf=torch.zeros(0, device=self.device),
|
||||
pred_cls=torch.zeros(0, device=self.device),
|
||||
tp=torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device),
|
||||
tp_m=torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device))
|
||||
pbatch = self._prepare_batch(si, batch)
|
||||
cls, bbox = pbatch.pop('cls'), pbatch.pop('bbox')
|
||||
nl = len(cls)
|
||||
stat['target_cls'] = cls
|
||||
if npr == 0:
|
||||
if nl:
|
||||
self.stats.append((correct_bboxes, correct_masks, *torch.zeros(
|
||||
(2, 0), device=self.device), cls.squeeze(-1)))
|
||||
for k in self.stats.keys():
|
||||
self.stats[k].append(stat[k])
|
||||
if self.args.plots:
|
||||
self.confusion_matrix.process_batch(detections=None, labels=cls.squeeze(-1))
|
||||
self.confusion_matrix.process_batch(detections=None, gt_bboxes=bbox, gt_cls=cls)
|
||||
continue
|
||||
|
||||
# Masks
|
||||
midx = [si] if self.args.overlap_mask else idx
|
||||
gt_masks = batch['masks'][midx]
|
||||
pred_masks = self.process(proto, pred[:, 6:], pred[:, :4], shape=batch['img'][si].shape[1:])
|
||||
|
||||
gt_masks = pbatch.pop('masks')
|
||||
# Predictions
|
||||
if self.args.single_cls:
|
||||
pred[:, 5] = 0
|
||||
predn = pred.clone()
|
||||
ops.scale_boxes(batch['img'][si].shape[1:], predn[:, :4], shape,
|
||||
ratio_pad=batch['ratio_pad'][si]) # native-space pred
|
||||
predn, pred_masks = self._prepare_pred(pred, pbatch, proto)
|
||||
stat['conf'] = predn[:, 4]
|
||||
stat['pred_cls'] = predn[:, 5]
|
||||
|
||||
# Evaluate
|
||||
if nl:
|
||||
height, width = batch['img'].shape[2:]
|
||||
tbox = ops.xywh2xyxy(bbox) * torch.tensor(
|
||||
(width, height, width, height), device=self.device) # target boxes
|
||||
ops.scale_boxes(batch['img'][si].shape[1:], tbox, shape,
|
||||
ratio_pad=batch['ratio_pad'][si]) # native-space labels
|
||||
labelsn = torch.cat((cls, tbox), 1) # native-space labels
|
||||
correct_bboxes = self._process_batch(predn, labelsn)
|
||||
# TODO: maybe remove these `self.` arguments as they already are member variable
|
||||
correct_masks = self._process_batch(predn,
|
||||
labelsn,
|
||||
pred_masks,
|
||||
gt_masks,
|
||||
overlap=self.args.overlap_mask,
|
||||
masks=True)
|
||||
stat['tp'] = self._process_batch(predn, bbox, cls)
|
||||
stat['tp_m'] = self._process_batch(predn,
|
||||
bbox,
|
||||
cls,
|
||||
pred_masks,
|
||||
gt_masks,
|
||||
self.args.overlap_mask,
|
||||
masks=True)
|
||||
if self.args.plots:
|
||||
self.confusion_matrix.process_batch(predn, labelsn)
|
||||
self.confusion_matrix.process_batch(predn, bbox, cls)
|
||||
|
||||
# Append correct_masks, correct_boxes, pconf, pcls, tcls
|
||||
self.stats.append((correct_bboxes, correct_masks, pred[:, 4], pred[:, 5], cls.squeeze(-1)))
|
||||
for k in self.stats.keys():
|
||||
self.stats[k].append(stat[k])
|
||||
|
||||
pred_masks = torch.as_tensor(pred_masks, dtype=torch.uint8)
|
||||
if self.args.plots and self.batch_i < 3:
|
||||
|
|
@ -131,7 +135,7 @@ class SegmentationValidator(DetectionValidator):
|
|||
# Save
|
||||
if self.args.save_json:
|
||||
pred_masks = ops.scale_image(pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(),
|
||||
shape,
|
||||
pbatch['ori_shape'],
|
||||
ratio_pad=batch['ratio_pad'][si])
|
||||
self.pred_to_json(predn, batch['im_file'][si], pred_masks)
|
||||
# if self.args.save_txt:
|
||||
|
|
@ -142,7 +146,7 @@ class SegmentationValidator(DetectionValidator):
|
|||
self.metrics.speed = self.speed
|
||||
self.metrics.confusion_matrix = self.confusion_matrix
|
||||
|
||||
def _process_batch(self, detections, labels, pred_masks=None, gt_masks=None, overlap=False, masks=False):
|
||||
def _process_batch(self, detections, gt_bboxes, gt_cls, pred_masks=None, gt_masks=None, overlap=False, masks=False):
|
||||
"""
|
||||
Return correct prediction matrix.
|
||||
|
||||
|
|
@ -155,7 +159,7 @@ class SegmentationValidator(DetectionValidator):
|
|||
"""
|
||||
if masks:
|
||||
if overlap:
|
||||
nl = len(labels)
|
||||
nl = len(gt_cls)
|
||||
index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1
|
||||
gt_masks = gt_masks.repeat(nl, 1, 1) # shape(1,640,640) -> (n,640,640)
|
||||
gt_masks = torch.where(gt_masks == index, 1.0, 0.0)
|
||||
|
|
@ -164,9 +168,9 @@ class SegmentationValidator(DetectionValidator):
|
|||
gt_masks = gt_masks.gt_(0.5)
|
||||
iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1))
|
||||
else: # boxes
|
||||
iou = box_iou(labels[:, 1:], detections[:, :4])
|
||||
iou = box_iou(gt_bboxes, detections[:, :4])
|
||||
|
||||
return self.match_predictions(detections[:, 5], labels[:, 0], iou)
|
||||
return self.match_predictions(detections[:, 5], gt_cls, iou)
|
||||
|
||||
def plot_val_samples(self, batch, ni):
|
||||
"""Plots validation samples with bounding box labels."""
|
||||
|
|
@ -174,7 +178,7 @@ class SegmentationValidator(DetectionValidator):
|
|||
batch['batch_idx'],
|
||||
batch['cls'].squeeze(-1),
|
||||
batch['bboxes'],
|
||||
batch['masks'],
|
||||
masks=batch['masks'],
|
||||
paths=batch['im_file'],
|
||||
fname=self.save_dir / f'val_batch{ni}_labels.jpg',
|
||||
names=self.names,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue