ultralytics 8.0.197 save P, R, F1 curves to metrics (#5354)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com>
Co-authored-by: erminkev1 <83356055+erminkev1@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Andy <39454881+yermandy@users.noreply.github.com>
This commit is contained in:
Glenn Jocher 2023-10-13 02:49:31 +02:00 committed by GitHub
parent 7fd5dcbd86
commit 12e3eef844
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
33 changed files with 337 additions and 195 deletions

View file

@ -192,7 +192,7 @@ class RTDETRDecoder(nn.Module):
dropout=0.,
act=nn.ReLU(),
eval_idx=-1,
# training args
# Training args
nd=100, # num denoising
label_noise_ratio=0.5,
box_noise_scale=1.0,
@ -225,7 +225,7 @@ class RTDETRDecoder(nn.Module):
self.num_queries = nq
self.num_decoder_layers = ndl
# backbone feature projection
# Backbone feature projection
self.input_proj = nn.ModuleList(nn.Sequential(nn.Conv2d(x, hd, 1, bias=False), nn.BatchNorm2d(hd)) for x in ch)
# NOTE: simplified version but it's not consistent with .pt weights.
# self.input_proj = nn.ModuleList(Conv(x, hd, act=False) for x in ch)
@ -234,24 +234,24 @@ class RTDETRDecoder(nn.Module):
decoder_layer = DeformableTransformerDecoderLayer(hd, nh, d_ffn, dropout, act, self.nl, ndp)
self.decoder = DeformableTransformerDecoder(hd, decoder_layer, ndl, eval_idx)
# denoising part
# Denoising part
self.denoising_class_embed = nn.Embedding(nc, hd)
self.num_denoising = nd
self.label_noise_ratio = label_noise_ratio
self.box_noise_scale = box_noise_scale
# decoder embedding
# Decoder embedding
self.learnt_init_query = learnt_init_query
if learnt_init_query:
self.tgt_embed = nn.Embedding(nq, hd)
self.query_pos_head = MLP(4, 2 * hd, hd, num_layers=2)
# encoder head
# Encoder head
self.enc_output = nn.Sequential(nn.Linear(hd, hd), nn.LayerNorm(hd))
self.enc_score_head = nn.Linear(hd, nc)
self.enc_bbox_head = MLP(hd, hd, 4, num_layers=3)
# decoder head
# Decoder head
self.dec_score_head = nn.ModuleList([nn.Linear(hd, nc) for _ in range(ndl)])
self.dec_bbox_head = nn.ModuleList([MLP(hd, hd, 4, num_layers=3) for _ in range(ndl)])
@ -261,10 +261,10 @@ class RTDETRDecoder(nn.Module):
"""Runs the forward pass of the module, returning bounding box and classification scores for the input."""
from ultralytics.models.utils.ops import get_cdn_group
# input projection and embedding
# Input projection and embedding
feats, shapes = self._get_encoder_input(x)
# prepare denoising training
# Prepare denoising training
dn_embed, dn_bbox, attn_mask, dn_meta = \
get_cdn_group(batch,
self.nc,
@ -278,7 +278,7 @@ class RTDETRDecoder(nn.Module):
embed, refer_bbox, enc_bboxes, enc_scores = \
self._get_decoder_input(feats, shapes, dn_embed, dn_bbox)
# decoder
# Decoder
dec_bboxes, dec_scores = self.decoder(embed,
refer_bbox,
feats,
@ -316,9 +316,9 @@ class RTDETRDecoder(nn.Module):
def _get_encoder_input(self, x):
"""Processes and returns encoder inputs by getting projection features from input and concatenating them."""
# get projection features
# Get projection features
x = [self.input_proj[i](feat) for i, feat in enumerate(x)]
# get encoder inputs
# Get encoder inputs
feats = []
shapes = []
for feat in x:
@ -335,13 +335,13 @@ class RTDETRDecoder(nn.Module):
def _get_decoder_input(self, feats, shapes, dn_embed=None, dn_bbox=None):
"""Generates and prepares the input required for the decoder from the provided features and shapes."""
bs = len(feats)
# prepare input for decoder
# Prepare input for decoder
anchors, valid_mask = self._generate_anchors(shapes, dtype=feats.dtype, device=feats.device)
features = self.enc_output(valid_mask * feats) # bs, h*w, 256
enc_outputs_scores = self.enc_score_head(features) # (bs, h*w, nc)
# query selection
# Query selection
# (bs, num_queries)
topk_ind = torch.topk(enc_outputs_scores.max(-1).values, self.num_queries, dim=1).indices.view(-1)
# (bs, num_queries)
@ -352,7 +352,7 @@ class RTDETRDecoder(nn.Module):
# (bs, num_queries, 4)
top_k_anchors = anchors[:, topk_ind].view(bs, self.num_queries, -1)
# dynamic anchors + static content
# Dynamic anchors + static content
refer_bbox = self.enc_bbox_head(top_k_features) + top_k_anchors
enc_bboxes = refer_bbox.sigmoid()
@ -373,7 +373,7 @@ class RTDETRDecoder(nn.Module):
# TODO
def _reset_parameters(self):
"""Initializes or resets the parameters of the model's various components with predefined weights and biases."""
# class and bbox head init
# Class and bbox head init
bias_cls = bias_init_with_prob(0.01) / 80 * self.nc
# NOTE: the weight initialization in `linear_init_` would cause NaN when training with custom datasets.
# linear_init_(self.enc_score_head)