Code Refactor for Speed and Readability (#13450)
Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
This commit is contained in:
parent
1b26838def
commit
6367ff4748
3 changed files with 35 additions and 28 deletions
|
|
@ -86,7 +86,7 @@ def load_yolo_dota(data_root, split="train"):
|
|||
return annos
|
||||
|
||||
|
||||
def get_windows(im_size, crop_sizes=[1024], gaps=[200], im_rate_thr=0.6, eps=0.01):
|
||||
def get_windows(im_size, crop_sizes=(1024,), gaps=(200,), im_rate_thr=0.6, eps=0.01):
|
||||
"""
|
||||
Get the coordinates of windows.
|
||||
|
||||
|
|
@ -95,6 +95,7 @@ def get_windows(im_size, crop_sizes=[1024], gaps=[200], im_rate_thr=0.6, eps=0.0
|
|||
crop_sizes (List(int)): Crop size of windows.
|
||||
gaps (List(int)): Gap between crops.
|
||||
im_rate_thr (float): Threshold of windows areas divided by image ares.
|
||||
eps (float): Epsilon value for math operations.
|
||||
"""
|
||||
h, w = im_size
|
||||
windows = []
|
||||
|
|
@ -187,7 +188,7 @@ def crop_and_save(anno, windows, window_objs, im_dir, lb_dir):
|
|||
f.write(f"{int(lb[0])} {' '.join(formatted_coords)}\n")
|
||||
|
||||
|
||||
def split_images_and_labels(data_root, save_dir, split="train", crop_sizes=[1024], gaps=[200]):
|
||||
def split_images_and_labels(data_root, save_dir, split="train", crop_sizes=(1024,), gaps=(200,)):
|
||||
"""
|
||||
Split both images and labels.
|
||||
|
||||
|
|
@ -217,7 +218,7 @@ def split_images_and_labels(data_root, save_dir, split="train", crop_sizes=[1024
|
|||
crop_and_save(anno, windows, window_objs, str(im_dir), str(lb_dir))
|
||||
|
||||
|
||||
def split_trainval(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
|
||||
def split_trainval(data_root, save_dir, crop_size=1024, gap=200, rates=(1.0,)):
|
||||
"""
|
||||
Split train and val set of DOTA.
|
||||
|
||||
|
|
@ -247,7 +248,7 @@ def split_trainval(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
|
|||
split_images_and_labels(data_root, save_dir, split, crop_sizes, gaps)
|
||||
|
||||
|
||||
def split_test(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
|
||||
def split_test(data_root, save_dir, crop_size=1024, gap=200, rates=(1.0,)):
|
||||
"""
|
||||
Split test set of DOTA, labels are not included within this set.
|
||||
|
||||
|
|
|
|||
|
|
@ -169,12 +169,18 @@ class BasePredictor:
|
|||
|
||||
def predict_cli(self, source=None, model=None):
|
||||
"""
|
||||
Method used for CLI prediction.
|
||||
Method used for Command Line Interface (CLI) prediction.
|
||||
|
||||
It uses always generator as outputs as not required by CLI mode.
|
||||
This function is designed to run predictions using the CLI. It sets up the source and model, then processes
|
||||
the inputs in a streaming manner. This method ensures that no outputs accumulate in memory by consuming the
|
||||
generator without storing results.
|
||||
|
||||
Note:
|
||||
Do not modify this function or remove the generator. The generator ensures that no outputs are
|
||||
accumulated in memory, which is critical for preventing memory issues during long-running predictions.
|
||||
"""
|
||||
gen = self.stream_inference(source, model)
|
||||
for _ in gen: # noqa, running CLI inference without accumulating any outputs (do not modify)
|
||||
for _ in gen: # sourcery skip: remove-empty-nested-block, noqa
|
||||
pass
|
||||
|
||||
def setup_source(self, source):
|
||||
|
|
|
|||
|
|
@ -383,44 +383,44 @@ class TinyViTBlock(nn.Module):
|
|||
"""Applies attention-based transformation or padding to input 'x' before passing it through a local
|
||||
convolution.
|
||||
"""
|
||||
H, W = self.input_resolution
|
||||
B, L, C = x.shape
|
||||
assert L == H * W, "input feature has wrong size"
|
||||
h, w = self.input_resolution
|
||||
b, l, c = x.shape
|
||||
assert l == h * w, "input feature has wrong size"
|
||||
res_x = x
|
||||
if H == self.window_size and W == self.window_size:
|
||||
if h == self.window_size and w == self.window_size:
|
||||
x = self.attn(x)
|
||||
else:
|
||||
x = x.view(B, H, W, C)
|
||||
pad_b = (self.window_size - H % self.window_size) % self.window_size
|
||||
pad_r = (self.window_size - W % self.window_size) % self.window_size
|
||||
x = x.view(b, h, w, c)
|
||||
pad_b = (self.window_size - h % self.window_size) % self.window_size
|
||||
pad_r = (self.window_size - w % self.window_size) % self.window_size
|
||||
padding = pad_b > 0 or pad_r > 0
|
||||
|
||||
if padding:
|
||||
x = F.pad(x, (0, 0, 0, pad_r, 0, pad_b))
|
||||
|
||||
pH, pW = H + pad_b, W + pad_r
|
||||
pH, pW = h + pad_b, w + pad_r
|
||||
nH = pH // self.window_size
|
||||
nW = pW // self.window_size
|
||||
# Window partition
|
||||
x = (
|
||||
x.view(B, nH, self.window_size, nW, self.window_size, C)
|
||||
x.view(b, nH, self.window_size, nW, self.window_size, c)
|
||||
.transpose(2, 3)
|
||||
.reshape(B * nH * nW, self.window_size * self.window_size, C)
|
||||
.reshape(b * nH * nW, self.window_size * self.window_size, c)
|
||||
)
|
||||
x = self.attn(x)
|
||||
# Window reverse
|
||||
x = x.view(B, nH, nW, self.window_size, self.window_size, C).transpose(2, 3).reshape(B, pH, pW, C)
|
||||
x = x.view(b, nH, nW, self.window_size, self.window_size, c).transpose(2, 3).reshape(b, pH, pW, c)
|
||||
|
||||
if padding:
|
||||
x = x[:, :H, :W].contiguous()
|
||||
x = x[:, :h, :w].contiguous()
|
||||
|
||||
x = x.view(B, L, C)
|
||||
x = x.view(b, l, c)
|
||||
|
||||
x = res_x + self.drop_path(x)
|
||||
|
||||
x = x.transpose(1, 2).reshape(B, C, H, W)
|
||||
x = x.transpose(1, 2).reshape(b, c, h, w)
|
||||
x = self.local_conv(x)
|
||||
x = x.view(B, C, L).transpose(1, 2)
|
||||
x = x.view(b, c, l).transpose(1, 2)
|
||||
|
||||
return x + self.drop_path(self.mlp(x))
|
||||
|
||||
|
|
@ -565,10 +565,10 @@ class TinyViT(nn.Module):
|
|||
img_size=224,
|
||||
in_chans=3,
|
||||
num_classes=1000,
|
||||
embed_dims=[96, 192, 384, 768],
|
||||
depths=[2, 2, 6, 2],
|
||||
num_heads=[3, 6, 12, 24],
|
||||
window_sizes=[7, 7, 14, 7],
|
||||
embed_dims=(96, 192, 384, 768),
|
||||
depths=(2, 2, 6, 2),
|
||||
num_heads=(3, 6, 12, 24),
|
||||
window_sizes=(7, 7, 14, 7),
|
||||
mlp_ratio=4.0,
|
||||
drop_rate=0.0,
|
||||
drop_path_rate=0.1,
|
||||
|
|
@ -732,8 +732,8 @@ class TinyViT(nn.Module):
|
|||
for i in range(start_i, len(self.layers)):
|
||||
layer = self.layers[i]
|
||||
x = layer(x)
|
||||
B, _, C = x.shape
|
||||
x = x.view(B, 64, 64, C)
|
||||
batch, _, channel = x.shape
|
||||
x = x.view(batch, 64, 64, channel)
|
||||
x = x.permute(0, 3, 1, 2)
|
||||
return self.neck(x)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue