ulralytics 8.0.199 *.npy image loading exception handling (#5683)
Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: snyk-bot <snyk-bot@snyk.io> Co-authored-by: Yonghye Kwon <developer.0hye@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
parent
5b3c4cfc0e
commit
cedce60f8c
16 changed files with 479 additions and 280 deletions
|
|
@ -1,5 +1,12 @@
|
|||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
"""RT-DETR model interface."""
|
||||
"""
|
||||
Interface for Baidu's RT-DETR, a Vision Transformer-based real-time object detector. RT-DETR offers real-time
|
||||
performance and high accuracy, excelling in accelerated backends like CUDA with TensorRT. It features an efficient
|
||||
hybrid encoder and IoU-aware query selection for enhanced detection accuracy.
|
||||
|
||||
For more information on RT-DETR, visit: https://arxiv.org/pdf/2304.08069.pdf
|
||||
"""
|
||||
|
||||
from ultralytics.engine.model import Model
|
||||
from ultralytics.nn.tasks import RTDETRDetectionModel
|
||||
|
||||
|
|
@ -9,17 +16,36 @@ from .val import RTDETRValidator
|
|||
|
||||
|
||||
class RTDETR(Model):
|
||||
"""RTDETR model interface."""
|
||||
"""
|
||||
Interface for Baidu's RT-DETR model. This Vision Transformer-based object detector provides real-time performance
|
||||
with high accuracy. It supports efficient hybrid encoding, IoU-aware query selection, and adaptable inference speed.
|
||||
|
||||
Attributes:
|
||||
model (str): Path to the pre-trained model. Defaults to 'rtdetr-l.pt'.
|
||||
"""
|
||||
|
||||
def __init__(self, model='rtdetr-l.pt') -> None:
|
||||
"""Initializes the RTDETR model with the given model file, defaulting to 'rtdetr-l.pt'."""
|
||||
"""
|
||||
Initializes the RT-DETR model with the given pre-trained model file. Supports .pt and .yaml formats.
|
||||
|
||||
Args:
|
||||
model (str): Path to the pre-trained model. Defaults to 'rtdetr-l.pt'.
|
||||
|
||||
Raises:
|
||||
NotImplementedError: If the model file extension is not 'pt', 'yaml', or 'yml'.
|
||||
"""
|
||||
if model and model.split('.')[-1] not in ('pt', 'yaml', 'yml'):
|
||||
raise NotImplementedError('RT-DETR only supports creating from *.pt file or *.yaml file.')
|
||||
raise NotImplementedError('RT-DETR only supports creating from *.pt, *.yaml, or *.yml files.')
|
||||
super().__init__(model=model, task='detect')
|
||||
|
||||
@property
|
||||
def task_map(self):
|
||||
"""Returns a dictionary mapping task names to corresponding Ultralytics task classes for RTDETR model."""
|
||||
def task_map(self) -> dict:
|
||||
"""
|
||||
Returns a task map for RT-DETR, associating tasks with corresponding Ultralytics classes.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary mapping task names to Ultralytics task classes for the RT-DETR model.
|
||||
"""
|
||||
return {
|
||||
'detect': {
|
||||
'predictor': RTDETRPredictor,
|
||||
|
|
|
|||
|
|
@ -10,7 +10,11 @@ from ultralytics.utils import ops
|
|||
|
||||
class RTDETRPredictor(BasePredictor):
|
||||
"""
|
||||
A class extending the BasePredictor class for prediction based on an RT-DETR detection model.
|
||||
RT-DETR (Real-Time Detection Transformer) Predictor extending the BasePredictor class for making predictions using
|
||||
Baidu's RT-DETR model.
|
||||
|
||||
This class leverages the power of Vision Transformers to provide real-time object detection while maintaining
|
||||
high accuracy. It supports key features like efficient hybrid encoding and IoU-aware query selection.
|
||||
|
||||
Example:
|
||||
```python
|
||||
|
|
@ -21,10 +25,27 @@ class RTDETRPredictor(BasePredictor):
|
|||
predictor = RTDETRPredictor(overrides=args)
|
||||
predictor.predict_cli()
|
||||
```
|
||||
|
||||
Attributes:
|
||||
imgsz (int): Image size for inference (must be square and scale-filled).
|
||||
args (dict): Argument overrides for the predictor.
|
||||
"""
|
||||
|
||||
def postprocess(self, preds, img, orig_imgs):
|
||||
"""Postprocess predictions and returns a list of Results objects."""
|
||||
"""
|
||||
Postprocess the raw predictions from the model to generate bounding boxes and confidence scores.
|
||||
|
||||
The method filters detections based on confidence and class if specified in `self.args`.
|
||||
|
||||
Args:
|
||||
preds (torch.Tensor): Raw predictions from the model.
|
||||
img (torch.Tensor): Processed input images.
|
||||
orig_imgs (list or torch.Tensor): Original, unprocessed images.
|
||||
|
||||
Returns:
|
||||
(list[Results]): A list of Results objects containing the post-processed bounding boxes, confidence scores,
|
||||
and class labels.
|
||||
"""
|
||||
nd = preds[0].shape[-1]
|
||||
bboxes, scores = preds[0].split((4, nd - 4), dim=-1)
|
||||
|
||||
|
|
@ -49,15 +70,14 @@ class RTDETRPredictor(BasePredictor):
|
|||
|
||||
def pre_transform(self, im):
|
||||
"""
|
||||
Pre-transform input image before inference.
|
||||
Pre-transforms the input images before feeding them into the model for inference. The input images are
|
||||
letterboxed to ensure a square aspect ratio and scale-filled. The size must be square(640) and scaleFilled.
|
||||
|
||||
Args:
|
||||
im (List(np.ndarray)): (N, 3, h, w) for tensor, [(h, w, 3) x N] for list.
|
||||
|
||||
Notes: The size must be square(640) and scaleFilled.
|
||||
im (list[np.ndarray] |torch.Tensor): Input images of shape (N,3,h,w) for tensor, [(h,w,3) x N] for list.
|
||||
|
||||
Returns:
|
||||
(list): A list of transformed imgs.
|
||||
(list): List of pre-transformed images ready for model inference.
|
||||
"""
|
||||
letterbox = LetterBox(self.imgsz, auto=False, scaleFill=True)
|
||||
return [letterbox(image=x) for x in im]
|
||||
|
|
|
|||
|
|
@ -13,10 +13,12 @@ from .val import RTDETRDataset, RTDETRValidator
|
|||
|
||||
class RTDETRTrainer(DetectionTrainer):
|
||||
"""
|
||||
A class extending the DetectionTrainer class for training based on an RT-DETR detection model.
|
||||
Trainer class for the RT-DETR model developed by Baidu for real-time object detection. Extends the DetectionTrainer
|
||||
class for YOLO to adapt to the specific features and architecture of RT-DETR. This model leverages Vision
|
||||
Transformers and has capabilities like IoU-aware query selection and adaptable inference speed.
|
||||
|
||||
Notes:
|
||||
- F.grid_sample used in rt-detr does not support the `deterministic=True` argument.
|
||||
- F.grid_sample used in RT-DETR does not support the `deterministic=True` argument.
|
||||
- AMP training can lead to NaN outputs and may produce errors during bipartite graph matching.
|
||||
|
||||
Example:
|
||||
|
|
@ -30,7 +32,17 @@ class RTDETRTrainer(DetectionTrainer):
|
|||
"""
|
||||
|
||||
def get_model(self, cfg=None, weights=None, verbose=True):
|
||||
"""Return a YOLO detection model."""
|
||||
"""
|
||||
Initialize and return an RT-DETR model for object detection tasks.
|
||||
|
||||
Args:
|
||||
cfg (dict, optional): Model configuration. Defaults to None.
|
||||
weights (str, optional): Path to pre-trained model weights. Defaults to None.
|
||||
verbose (bool): Verbose logging if True. Defaults to True.
|
||||
|
||||
Returns:
|
||||
(RTDETRDetectionModel): Initialized model.
|
||||
"""
|
||||
model = RTDETRDetectionModel(cfg, nc=self.data['nc'], verbose=verbose and RANK == -1)
|
||||
if weights:
|
||||
model.load(weights)
|
||||
|
|
@ -38,31 +50,46 @@ class RTDETRTrainer(DetectionTrainer):
|
|||
|
||||
def build_dataset(self, img_path, mode='val', batch=None):
|
||||
"""
|
||||
Build RTDETR Dataset.
|
||||
Build and return an RT-DETR dataset for training or validation.
|
||||
|
||||
Args:
|
||||
img_path (str): Path to the folder containing images.
|
||||
mode (str): `train` mode or `val` mode, users are able to customize different augmentations for each mode.
|
||||
batch (int, optional): Size of batches, this is for `rect`. Defaults to None.
|
||||
mode (str): Dataset mode, either 'train' or 'val'.
|
||||
batch (int, optional): Batch size for rectangle training. Defaults to None.
|
||||
|
||||
Returns:
|
||||
(RTDETRDataset): Dataset object for the specific mode.
|
||||
"""
|
||||
return RTDETRDataset(
|
||||
img_path=img_path,
|
||||
imgsz=self.args.imgsz,
|
||||
batch_size=batch,
|
||||
augment=mode == 'train', # no augmentation
|
||||
hyp=self.args,
|
||||
rect=False, # no rect
|
||||
cache=self.args.cache or None,
|
||||
prefix=colorstr(f'{mode}: '),
|
||||
data=self.data)
|
||||
return RTDETRDataset(img_path=img_path,
|
||||
imgsz=self.args.imgsz,
|
||||
batch_size=batch,
|
||||
augment=mode == 'train',
|
||||
hyp=self.args,
|
||||
rect=False,
|
||||
cache=self.args.cache or None,
|
||||
prefix=colorstr(f'{mode}: '),
|
||||
data=self.data)
|
||||
|
||||
def get_validator(self):
|
||||
"""Returns a DetectionValidator for RTDETR model validation."""
|
||||
"""
|
||||
Returns a DetectionValidator suitable for RT-DETR model validation.
|
||||
|
||||
Returns:
|
||||
(RTDETRValidator): Validator object for model validation.
|
||||
"""
|
||||
self.loss_names = 'giou_loss', 'cls_loss', 'l1_loss'
|
||||
return RTDETRValidator(self.test_loader, save_dir=self.save_dir, args=copy(self.args))
|
||||
|
||||
def preprocess_batch(self, batch):
|
||||
"""Preprocesses a batch of images by scaling and converting to float."""
|
||||
"""
|
||||
Preprocess a batch of images. Scales and converts the images to float format.
|
||||
|
||||
Args:
|
||||
batch (dict): Dictionary containing a batch of images, bboxes, and labels.
|
||||
|
||||
Returns:
|
||||
(dict): Preprocessed batch.
|
||||
"""
|
||||
batch = super().preprocess_batch(batch)
|
||||
bs = len(batch['img'])
|
||||
batch_idx = batch['batch_idx']
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue