YOLOv8 architecture updates from R&D branch (#88)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
parent
5fbea25f0b
commit
ebd3cfb2fd
23 changed files with 720 additions and 570 deletions
|
|
@ -19,10 +19,10 @@ from torch.cuda import amp
|
|||
|
||||
from ultralytics.yolo.data.augment import LetterBox
|
||||
from ultralytics.yolo.utils import LOGGER, colorstr
|
||||
from ultralytics.yolo.utils.checks import check_version
|
||||
from ultralytics.yolo.utils.files import increment_path
|
||||
from ultralytics.yolo.utils.ops import Profile, make_divisible, non_max_suppression, scale_boxes, xyxy2xywh
|
||||
from ultralytics.yolo.utils.plotting import Annotator, colors, save_one_box
|
||||
from ultralytics.yolo.utils.tal import dist2bbox, make_anchors
|
||||
from ultralytics.yolo.utils.torch_utils import copy_attr, smart_inference_mode
|
||||
|
||||
from .autobackend import AutoBackend
|
||||
|
|
@ -605,62 +605,55 @@ class Ensemble(nn.ModuleList):
|
|||
# heads
|
||||
class Detect(nn.Module):
|
||||
# YOLOv5 Detect head for detection models
|
||||
stride = None # strides computed during build
|
||||
dynamic = False # force grid reconstruction
|
||||
export = False # export mode
|
||||
shape = None
|
||||
anchors = torch.empty(0) # init
|
||||
strides = torch.empty(0) # init
|
||||
|
||||
def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detection layer
|
||||
def __init__(self, nc=80, ch=()): # detection layer
|
||||
super().__init__()
|
||||
self.nc = nc # number of classes
|
||||
self.no = nc + 5 # number of outputs per anchor
|
||||
self.nl = len(anchors) # number of detection layers
|
||||
self.na = len(anchors[0]) // 2 # number of anchors
|
||||
self.grid = [torch.empty(0) for _ in range(self.nl)] # init grid
|
||||
self.anchor_grid = [torch.empty(0) for _ in range(self.nl)] # init anchor grid
|
||||
self.register_buffer('anchors', torch.tensor(anchors).float().view(self.nl, -1, 2)) # shape(nl,na,2)
|
||||
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
|
||||
self.inplace = inplace # use inplace ops (e.g. slice assignment)
|
||||
self.nl = len(ch) # number of detection layers
|
||||
self.reg_max = 16 # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x)
|
||||
self.no = nc + self.reg_max * 4 # number of outputs per anchor
|
||||
self.stride = torch.zeros(self.nl) # strides computed during build
|
||||
|
||||
c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], self.nc) # channels
|
||||
self.cv2 = nn.ModuleList(
|
||||
nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch)
|
||||
self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch)
|
||||
self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity()
|
||||
|
||||
def forward(self, x):
|
||||
z = [] # inference output
|
||||
shape = x[0].shape # BCHW
|
||||
for i in range(self.nl):
|
||||
x[i] = self.m[i](x[i]) # conv
|
||||
bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
|
||||
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
|
||||
x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)
|
||||
box, cls = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2).split((self.reg_max * 4, self.nc), 1)
|
||||
if self.training:
|
||||
return x, box, cls
|
||||
elif self.dynamic or self.shape != shape:
|
||||
self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
|
||||
self.shape = shape
|
||||
|
||||
if not self.training: # inference
|
||||
if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
|
||||
self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
|
||||
dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides
|
||||
y = torch.cat((dbox, cls.sigmoid()), 1)
|
||||
return y if self.export else (y, (x, box, cls))
|
||||
|
||||
if isinstance(self, Segment): # (boxes + masks)
|
||||
xy, wh, conf, mask = x[i].split((2, 2, self.nc + 1, self.no - self.nc - 5), 4)
|
||||
xy = (xy.sigmoid() * 2 + self.grid[i]) * self.stride[i] # xy
|
||||
wh = (wh.sigmoid() * 2) ** 2 * self.anchor_grid[i] # wh
|
||||
y = torch.cat((xy, wh, conf.sigmoid(), mask), 4)
|
||||
else: # Detect (boxes only)
|
||||
xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4)
|
||||
xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy
|
||||
wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh
|
||||
y = torch.cat((xy, wh, conf), 4)
|
||||
z.append(y.view(bs, self.na * nx * ny, self.no))
|
||||
|
||||
return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x)
|
||||
|
||||
def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch.__version__, '1.10.0')):
|
||||
d = self.anchors[i].device
|
||||
t = self.anchors[i].dtype
|
||||
shape = 1, self.na, ny, nx, 2 # grid shape
|
||||
y, x = torch.arange(ny, device=d, dtype=t), torch.arange(nx, device=d, dtype=t)
|
||||
yv, xv = torch.meshgrid(y, x, indexing='ij') if torch_1_10 else torch.meshgrid(y, x) # torch>=0.7 compatibility
|
||||
grid = torch.stack((xv, yv), 2).expand(shape) - 0.5 # add grid offset, i.e. y = 2.0 * x - 0.5
|
||||
anchor_grid = (self.anchors[i] * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(shape)
|
||||
return grid, anchor_grid
|
||||
def bias_init(self):
|
||||
# Initialize Detect() biases, WARNING: requires stride availability
|
||||
m = self # self.model[-1] # Detect() module
|
||||
# cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1
|
||||
# ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency
|
||||
for a, b, s in zip(m.cv2, m.cv3, m.stride): # from
|
||||
a[-1].bias.data[:] = 1.0 # box
|
||||
b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (.01 objects, 80 classes, 640 img)
|
||||
|
||||
|
||||
class Segment(Detect):
|
||||
# YOLOv5 Segment head for segmentation models
|
||||
def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True):
|
||||
super().__init__(nc, anchors, ch, inplace)
|
||||
def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=()):
|
||||
super().__init__(nc, anchors, ch)
|
||||
self.nm = nm # number of masks
|
||||
self.npr = npr # number of protos
|
||||
self.no = 5 + nc + self.nm # number of outputs per anchor
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue