ultralytics 8.1.23 add YOLOv9-C and E models (#8571)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com>
Co-authored-by: UltralyticsAssistant <web@ultralytics.com>
Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
This commit is contained in:
Laughing 2024-03-04 21:27:23 +08:00 committed by GitHub
parent e138d701a0
commit 2071776a36
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 360 additions and 10 deletions

View file

@ -43,6 +43,12 @@ from ultralytics.nn.modules import (
RTDETRDecoder,
Segment,
WorldDetect,
RepNCSPELAN4,
ADown,
SPPELAN,
CBFuse,
CBLinear,
Silence,
)
from ultralytics.utils import DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, colorstr, emojis, yaml_load
from ultralytics.utils.checks import check_requirements, check_suffix, check_yaml
@ -570,7 +576,7 @@ class WorldModel(DetectionModel):
text_token = clip.tokenize(text).to(device)
txt_feats = model.encode_text(text_token).to(dtype=torch.float32)
txt_feats = txt_feats / txt_feats.norm(p=2, dim=-1, keepdim=True)
self.txt_feats = txt_feats.reshape(-1, len(text), txt_feats.shape[-1])
self.txt_feats = txt_feats.reshape(-1, len(text), txt_feats.shape[-1]).detach()
self.model[-1].nc = len(text)
def init_criterion(self):
@ -850,6 +856,9 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
C1,
C2,
C2f,
RepNCSPELAN4,
ADown,
SPPELAN,
C2fAttn,
C3,
C3TR,
@ -892,6 +901,12 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
args[2] = make_divisible(min(args[2], max_channels) * width, 8)
elif m is RTDETRDecoder: # special case, channels arg must be passed in index 1
args.insert(1, [ch[x] for x in f])
elif m is CBLinear:
c2 = args[0]
c1 = ch[f]
args = [c1, c2, *args[1:]]
elif m is CBFuse:
c2 = ch[f[-1]]
else:
c2 = ch[f]