ultralytics 8.0.239 Ultralytics Actions and hub-sdk adoption (#7431)
Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: UltralyticsAssistant <web@ultralytics.com> Co-authored-by: Burhan <62214284+Burhan-Q@users.noreply.github.com> Co-authored-by: Kayzwer <68285002+Kayzwer@users.noreply.github.com>
This commit is contained in:
parent
e795277391
commit
fe27db2f6e
139 changed files with 6870 additions and 5125 deletions
|
|
@ -1,9 +1,29 @@
|
|||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
from .tasks import (BaseModel, ClassificationModel, DetectionModel, SegmentationModel, attempt_load_one_weight,
|
||||
attempt_load_weights, guess_model_scale, guess_model_task, parse_model, torch_safe_load,
|
||||
yaml_model_load)
|
||||
from .tasks import (
|
||||
BaseModel,
|
||||
ClassificationModel,
|
||||
DetectionModel,
|
||||
SegmentationModel,
|
||||
attempt_load_one_weight,
|
||||
attempt_load_weights,
|
||||
guess_model_scale,
|
||||
guess_model_task,
|
||||
parse_model,
|
||||
torch_safe_load,
|
||||
yaml_model_load,
|
||||
)
|
||||
|
||||
__all__ = ('attempt_load_one_weight', 'attempt_load_weights', 'parse_model', 'yaml_model_load', 'guess_model_task',
|
||||
'guess_model_scale', 'torch_safe_load', 'DetectionModel', 'SegmentationModel', 'ClassificationModel',
|
||||
'BaseModel')
|
||||
__all__ = (
|
||||
"attempt_load_one_weight",
|
||||
"attempt_load_weights",
|
||||
"parse_model",
|
||||
"yaml_model_load",
|
||||
"guess_model_task",
|
||||
"guess_model_scale",
|
||||
"torch_safe_load",
|
||||
"DetectionModel",
|
||||
"SegmentationModel",
|
||||
"ClassificationModel",
|
||||
"BaseModel",
|
||||
)
|
||||
|
|
|
|||
|
|
@ -32,10 +32,12 @@ def check_class_names(names):
|
|||
names = {int(k): str(v) for k, v in names.items()}
|
||||
n = len(names)
|
||||
if max(names.keys()) >= n:
|
||||
raise KeyError(f'{n}-class dataset requires class indices 0-{n - 1}, but you have invalid class indices '
|
||||
f'{min(names.keys())}-{max(names.keys())} defined in your dataset YAML.')
|
||||
if isinstance(names[0], str) and names[0].startswith('n0'): # imagenet class codes, i.e. 'n01440764'
|
||||
names_map = yaml_load(ROOT / 'cfg/datasets/ImageNet.yaml')['map'] # human-readable names
|
||||
raise KeyError(
|
||||
f"{n}-class dataset requires class indices 0-{n - 1}, but you have invalid class indices "
|
||||
f"{min(names.keys())}-{max(names.keys())} defined in your dataset YAML."
|
||||
)
|
||||
if isinstance(names[0], str) and names[0].startswith("n0"): # imagenet class codes, i.e. 'n01440764'
|
||||
names_map = yaml_load(ROOT / "cfg/datasets/ImageNet.yaml")["map"] # human-readable names
|
||||
names = {k: names_map[v] for k, v in names.items()}
|
||||
return names
|
||||
|
||||
|
|
@ -44,8 +46,8 @@ def default_class_names(data=None):
|
|||
"""Applies default class names to an input YAML file or returns numerical class names."""
|
||||
if data:
|
||||
with contextlib.suppress(Exception):
|
||||
return yaml_load(check_yaml(data))['names']
|
||||
return {i: f'class{i}' for i in range(999)} # return default if above errors
|
||||
return yaml_load(check_yaml(data))["names"]
|
||||
return {i: f"class{i}" for i in range(999)} # return default if above errors
|
||||
|
||||
|
||||
class AutoBackend(nn.Module):
|
||||
|
|
@ -77,14 +79,16 @@ class AutoBackend(nn.Module):
|
|||
"""
|
||||
|
||||
@torch.no_grad()
|
||||
def __init__(self,
|
||||
weights='yolov8n.pt',
|
||||
device=torch.device('cpu'),
|
||||
dnn=False,
|
||||
data=None,
|
||||
fp16=False,
|
||||
fuse=True,
|
||||
verbose=True):
|
||||
def __init__(
|
||||
self,
|
||||
weights="yolov8n.pt",
|
||||
device=torch.device("cpu"),
|
||||
dnn=False,
|
||||
data=None,
|
||||
fp16=False,
|
||||
fuse=True,
|
||||
verbose=True,
|
||||
):
|
||||
"""
|
||||
Initialize the AutoBackend for inference.
|
||||
|
||||
|
|
@ -100,17 +104,31 @@ class AutoBackend(nn.Module):
|
|||
super().__init__()
|
||||
w = str(weights[0] if isinstance(weights, list) else weights)
|
||||
nn_module = isinstance(weights, torch.nn.Module)
|
||||
pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, ncnn, triton = \
|
||||
self._model_type(w)
|
||||
(
|
||||
pt,
|
||||
jit,
|
||||
onnx,
|
||||
xml,
|
||||
engine,
|
||||
coreml,
|
||||
saved_model,
|
||||
pb,
|
||||
tflite,
|
||||
edgetpu,
|
||||
tfjs,
|
||||
paddle,
|
||||
ncnn,
|
||||
triton,
|
||||
) = self._model_type(w)
|
||||
fp16 &= pt or jit or onnx or xml or engine or nn_module or triton # FP16
|
||||
nhwc = coreml or saved_model or pb or tflite or edgetpu # BHWC formats (vs torch BCWH)
|
||||
stride = 32 # default stride
|
||||
model, metadata = None, None
|
||||
|
||||
# Set device
|
||||
cuda = torch.cuda.is_available() and device.type != 'cpu' # use CUDA
|
||||
cuda = torch.cuda.is_available() and device.type != "cpu" # use CUDA
|
||||
if cuda and not any([nn_module, pt, jit, engine, onnx]): # GPU dataloader formats
|
||||
device = torch.device('cpu')
|
||||
device = torch.device("cpu")
|
||||
cuda = False
|
||||
|
||||
# Download if not local
|
||||
|
|
@ -121,77 +139,79 @@ class AutoBackend(nn.Module):
|
|||
if nn_module: # in-memory PyTorch model
|
||||
model = weights.to(device)
|
||||
model = model.fuse(verbose=verbose) if fuse else model
|
||||
if hasattr(model, 'kpt_shape'):
|
||||
if hasattr(model, "kpt_shape"):
|
||||
kpt_shape = model.kpt_shape # pose-only
|
||||
stride = max(int(model.stride.max()), 32) # model stride
|
||||
names = model.module.names if hasattr(model, 'module') else model.names # get class names
|
||||
names = model.module.names if hasattr(model, "module") else model.names # get class names
|
||||
model.half() if fp16 else model.float()
|
||||
self.model = model # explicitly assign for to(), cpu(), cuda(), half()
|
||||
pt = True
|
||||
elif pt: # PyTorch
|
||||
from ultralytics.nn.tasks import attempt_load_weights
|
||||
model = attempt_load_weights(weights if isinstance(weights, list) else w,
|
||||
device=device,
|
||||
inplace=True,
|
||||
fuse=fuse)
|
||||
if hasattr(model, 'kpt_shape'):
|
||||
|
||||
model = attempt_load_weights(
|
||||
weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=fuse
|
||||
)
|
||||
if hasattr(model, "kpt_shape"):
|
||||
kpt_shape = model.kpt_shape # pose-only
|
||||
stride = max(int(model.stride.max()), 32) # model stride
|
||||
names = model.module.names if hasattr(model, 'module') else model.names # get class names
|
||||
names = model.module.names if hasattr(model, "module") else model.names # get class names
|
||||
model.half() if fp16 else model.float()
|
||||
self.model = model # explicitly assign for to(), cpu(), cuda(), half()
|
||||
elif jit: # TorchScript
|
||||
LOGGER.info(f'Loading {w} for TorchScript inference...')
|
||||
extra_files = {'config.txt': ''} # model metadata
|
||||
LOGGER.info(f"Loading {w} for TorchScript inference...")
|
||||
extra_files = {"config.txt": ""} # model metadata
|
||||
model = torch.jit.load(w, _extra_files=extra_files, map_location=device)
|
||||
model.half() if fp16 else model.float()
|
||||
if extra_files['config.txt']: # load metadata dict
|
||||
metadata = json.loads(extra_files['config.txt'], object_hook=lambda x: dict(x.items()))
|
||||
if extra_files["config.txt"]: # load metadata dict
|
||||
metadata = json.loads(extra_files["config.txt"], object_hook=lambda x: dict(x.items()))
|
||||
elif dnn: # ONNX OpenCV DNN
|
||||
LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
|
||||
check_requirements('opencv-python>=4.5.4')
|
||||
LOGGER.info(f"Loading {w} for ONNX OpenCV DNN inference...")
|
||||
check_requirements("opencv-python>=4.5.4")
|
||||
net = cv2.dnn.readNetFromONNX(w)
|
||||
elif onnx: # ONNX Runtime
|
||||
LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
|
||||
check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
|
||||
LOGGER.info(f"Loading {w} for ONNX Runtime inference...")
|
||||
check_requirements(("onnx", "onnxruntime-gpu" if cuda else "onnxruntime"))
|
||||
import onnxruntime
|
||||
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
|
||||
|
||||
providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] if cuda else ["CPUExecutionProvider"]
|
||||
session = onnxruntime.InferenceSession(w, providers=providers)
|
||||
output_names = [x.name for x in session.get_outputs()]
|
||||
metadata = session.get_modelmeta().custom_metadata_map # metadata
|
||||
elif xml: # OpenVINO
|
||||
LOGGER.info(f'Loading {w} for OpenVINO inference...')
|
||||
check_requirements('openvino>=2023.0') # requires openvino-dev: https://pypi.org/project/openvino-dev/
|
||||
LOGGER.info(f"Loading {w} for OpenVINO inference...")
|
||||
check_requirements("openvino>=2023.0") # requires openvino-dev: https://pypi.org/project/openvino-dev/
|
||||
from openvino.runtime import Core, Layout, get_batch # noqa
|
||||
|
||||
core = Core()
|
||||
w = Path(w)
|
||||
if not w.is_file(): # if not *.xml
|
||||
w = next(w.glob('*.xml')) # get *.xml file from *_openvino_model dir
|
||||
ov_model = core.read_model(model=str(w), weights=w.with_suffix('.bin'))
|
||||
w = next(w.glob("*.xml")) # get *.xml file from *_openvino_model dir
|
||||
ov_model = core.read_model(model=str(w), weights=w.with_suffix(".bin"))
|
||||
if ov_model.get_parameters()[0].get_layout().empty:
|
||||
ov_model.get_parameters()[0].set_layout(Layout('NCHW'))
|
||||
ov_model.get_parameters()[0].set_layout(Layout("NCHW"))
|
||||
batch_dim = get_batch(ov_model)
|
||||
if batch_dim.is_static:
|
||||
batch_size = batch_dim.get_length()
|
||||
ov_compiled_model = core.compile_model(ov_model, device_name='AUTO') # AUTO selects best available device
|
||||
metadata = w.parent / 'metadata.yaml'
|
||||
ov_compiled_model = core.compile_model(ov_model, device_name="AUTO") # AUTO selects best available device
|
||||
metadata = w.parent / "metadata.yaml"
|
||||
elif engine: # TensorRT
|
||||
LOGGER.info(f'Loading {w} for TensorRT inference...')
|
||||
LOGGER.info(f"Loading {w} for TensorRT inference...")
|
||||
try:
|
||||
import tensorrt as trt # noqa https://developer.nvidia.com/nvidia-tensorrt-download
|
||||
except ImportError:
|
||||
if LINUX:
|
||||
check_requirements('nvidia-tensorrt', cmds='-U --index-url https://pypi.ngc.nvidia.com')
|
||||
check_requirements("nvidia-tensorrt", cmds="-U --index-url https://pypi.ngc.nvidia.com")
|
||||
import tensorrt as trt # noqa
|
||||
check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0
|
||||
if device.type == 'cpu':
|
||||
device = torch.device('cuda:0')
|
||||
Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
|
||||
check_version(trt.__version__, "7.0.0", hard=True) # require tensorrt>=7.0.0
|
||||
if device.type == "cpu":
|
||||
device = torch.device("cuda:0")
|
||||
Binding = namedtuple("Binding", ("name", "dtype", "shape", "data", "ptr"))
|
||||
logger = trt.Logger(trt.Logger.INFO)
|
||||
# Read file
|
||||
with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
|
||||
meta_len = int.from_bytes(f.read(4), byteorder='little') # read metadata length
|
||||
metadata = json.loads(f.read(meta_len).decode('utf-8')) # read metadata
|
||||
with open(w, "rb") as f, trt.Runtime(logger) as runtime:
|
||||
meta_len = int.from_bytes(f.read(4), byteorder="little") # read metadata length
|
||||
metadata = json.loads(f.read(meta_len).decode("utf-8")) # read metadata
|
||||
model = runtime.deserialize_cuda_engine(f.read()) # read engine
|
||||
context = model.create_execution_context()
|
||||
bindings = OrderedDict()
|
||||
|
|
@ -213,116 +233,124 @@ class AutoBackend(nn.Module):
|
|||
im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)
|
||||
bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))
|
||||
binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
|
||||
batch_size = bindings['images'].shape[0] # if dynamic, this is instead max batch size
|
||||
batch_size = bindings["images"].shape[0] # if dynamic, this is instead max batch size
|
||||
elif coreml: # CoreML
|
||||
LOGGER.info(f'Loading {w} for CoreML inference...')
|
||||
LOGGER.info(f"Loading {w} for CoreML inference...")
|
||||
import coremltools as ct
|
||||
|
||||
model = ct.models.MLModel(w)
|
||||
metadata = dict(model.user_defined_metadata)
|
||||
elif saved_model: # TF SavedModel
|
||||
LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...')
|
||||
LOGGER.info(f"Loading {w} for TensorFlow SavedModel inference...")
|
||||
import tensorflow as tf
|
||||
|
||||
keras = False # assume TF1 saved_model
|
||||
model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w)
|
||||
metadata = Path(w) / 'metadata.yaml'
|
||||
metadata = Path(w) / "metadata.yaml"
|
||||
elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
|
||||
LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...')
|
||||
LOGGER.info(f"Loading {w} for TensorFlow GraphDef inference...")
|
||||
import tensorflow as tf
|
||||
|
||||
from ultralytics.engine.exporter import gd_outputs
|
||||
|
||||
def wrap_frozen_graph(gd, inputs, outputs):
|
||||
"""Wrap frozen graphs for deployment."""
|
||||
x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=''), []) # wrapped
|
||||
x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped
|
||||
ge = x.graph.as_graph_element
|
||||
return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))
|
||||
|
||||
gd = tf.Graph().as_graph_def() # TF GraphDef
|
||||
with open(w, 'rb') as f:
|
||||
with open(w, "rb") as f:
|
||||
gd.ParseFromString(f.read())
|
||||
frozen_func = wrap_frozen_graph(gd, inputs='x:0', outputs=gd_outputs(gd))
|
||||
frozen_func = wrap_frozen_graph(gd, inputs="x:0", outputs=gd_outputs(gd))
|
||||
elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
|
||||
try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu
|
||||
from tflite_runtime.interpreter import Interpreter, load_delegate
|
||||
except ImportError:
|
||||
import tensorflow as tf
|
||||
|
||||
Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate
|
||||
if edgetpu: # TF Edge TPU https://coral.ai/software/#edgetpu-runtime
|
||||
LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...')
|
||||
delegate = {
|
||||
'Linux': 'libedgetpu.so.1',
|
||||
'Darwin': 'libedgetpu.1.dylib',
|
||||
'Windows': 'edgetpu.dll'}[platform.system()]
|
||||
LOGGER.info(f"Loading {w} for TensorFlow Lite Edge TPU inference...")
|
||||
delegate = {"Linux": "libedgetpu.so.1", "Darwin": "libedgetpu.1.dylib", "Windows": "edgetpu.dll"}[
|
||||
platform.system()
|
||||
]
|
||||
interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)])
|
||||
else: # TFLite
|
||||
LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
|
||||
LOGGER.info(f"Loading {w} for TensorFlow Lite inference...")
|
||||
interpreter = Interpreter(model_path=w) # load TFLite model
|
||||
interpreter.allocate_tensors() # allocate
|
||||
input_details = interpreter.get_input_details() # inputs
|
||||
output_details = interpreter.get_output_details() # outputs
|
||||
# Load metadata
|
||||
with contextlib.suppress(zipfile.BadZipFile):
|
||||
with zipfile.ZipFile(w, 'r') as model:
|
||||
with zipfile.ZipFile(w, "r") as model:
|
||||
meta_file = model.namelist()[0]
|
||||
metadata = ast.literal_eval(model.read(meta_file).decode('utf-8'))
|
||||
metadata = ast.literal_eval(model.read(meta_file).decode("utf-8"))
|
||||
elif tfjs: # TF.js
|
||||
raise NotImplementedError('YOLOv8 TF.js inference is not currently supported.')
|
||||
raise NotImplementedError("YOLOv8 TF.js inference is not currently supported.")
|
||||
elif paddle: # PaddlePaddle
|
||||
LOGGER.info(f'Loading {w} for PaddlePaddle inference...')
|
||||
check_requirements('paddlepaddle-gpu' if cuda else 'paddlepaddle')
|
||||
LOGGER.info(f"Loading {w} for PaddlePaddle inference...")
|
||||
check_requirements("paddlepaddle-gpu" if cuda else "paddlepaddle")
|
||||
import paddle.inference as pdi # noqa
|
||||
|
||||
w = Path(w)
|
||||
if not w.is_file(): # if not *.pdmodel
|
||||
w = next(w.rglob('*.pdmodel')) # get *.pdmodel file from *_paddle_model dir
|
||||
config = pdi.Config(str(w), str(w.with_suffix('.pdiparams')))
|
||||
w = next(w.rglob("*.pdmodel")) # get *.pdmodel file from *_paddle_model dir
|
||||
config = pdi.Config(str(w), str(w.with_suffix(".pdiparams")))
|
||||
if cuda:
|
||||
config.enable_use_gpu(memory_pool_init_size_mb=2048, device_id=0)
|
||||
predictor = pdi.create_predictor(config)
|
||||
input_handle = predictor.get_input_handle(predictor.get_input_names()[0])
|
||||
output_names = predictor.get_output_names()
|
||||
metadata = w.parents[1] / 'metadata.yaml'
|
||||
metadata = w.parents[1] / "metadata.yaml"
|
||||
elif ncnn: # ncnn
|
||||
LOGGER.info(f'Loading {w} for ncnn inference...')
|
||||
check_requirements('git+https://github.com/Tencent/ncnn.git' if ARM64 else 'ncnn') # requires ncnn
|
||||
LOGGER.info(f"Loading {w} for ncnn inference...")
|
||||
check_requirements("git+https://github.com/Tencent/ncnn.git" if ARM64 else "ncnn") # requires ncnn
|
||||
import ncnn as pyncnn
|
||||
|
||||
net = pyncnn.Net()
|
||||
net.opt.use_vulkan_compute = cuda
|
||||
w = Path(w)
|
||||
if not w.is_file(): # if not *.param
|
||||
w = next(w.glob('*.param')) # get *.param file from *_ncnn_model dir
|
||||
w = next(w.glob("*.param")) # get *.param file from *_ncnn_model dir
|
||||
net.load_param(str(w))
|
||||
net.load_model(str(w.with_suffix('.bin')))
|
||||
metadata = w.parent / 'metadata.yaml'
|
||||
net.load_model(str(w.with_suffix(".bin")))
|
||||
metadata = w.parent / "metadata.yaml"
|
||||
elif triton: # NVIDIA Triton Inference Server
|
||||
check_requirements('tritonclient[all]')
|
||||
check_requirements("tritonclient[all]")
|
||||
from ultralytics.utils.triton import TritonRemoteModel
|
||||
|
||||
model = TritonRemoteModel(w)
|
||||
else:
|
||||
from ultralytics.engine.exporter import export_formats
|
||||
raise TypeError(f"model='{w}' is not a supported model format. "
|
||||
'See https://docs.ultralytics.com/modes/predict for help.'
|
||||
f'\n\n{export_formats()}')
|
||||
|
||||
raise TypeError(
|
||||
f"model='{w}' is not a supported model format. "
|
||||
"See https://docs.ultralytics.com/modes/predict for help."
|
||||
f"\n\n{export_formats()}"
|
||||
)
|
||||
|
||||
# Load external metadata YAML
|
||||
if isinstance(metadata, (str, Path)) and Path(metadata).exists():
|
||||
metadata = yaml_load(metadata)
|
||||
if metadata:
|
||||
for k, v in metadata.items():
|
||||
if k in ('stride', 'batch'):
|
||||
if k in ("stride", "batch"):
|
||||
metadata[k] = int(v)
|
||||
elif k in ('imgsz', 'names', 'kpt_shape') and isinstance(v, str):
|
||||
elif k in ("imgsz", "names", "kpt_shape") and isinstance(v, str):
|
||||
metadata[k] = eval(v)
|
||||
stride = metadata['stride']
|
||||
task = metadata['task']
|
||||
batch = metadata['batch']
|
||||
imgsz = metadata['imgsz']
|
||||
names = metadata['names']
|
||||
kpt_shape = metadata.get('kpt_shape')
|
||||
stride = metadata["stride"]
|
||||
task = metadata["task"]
|
||||
batch = metadata["batch"]
|
||||
imgsz = metadata["imgsz"]
|
||||
names = metadata["names"]
|
||||
kpt_shape = metadata.get("kpt_shape")
|
||||
elif not (pt or triton or nn_module):
|
||||
LOGGER.warning(f"WARNING ⚠️ Metadata not found for 'model={weights}'")
|
||||
|
||||
# Check names
|
||||
if 'names' not in locals(): # names missing
|
||||
if "names" not in locals(): # names missing
|
||||
names = default_class_names(data)
|
||||
names = check_class_names(names)
|
||||
|
||||
|
|
@ -367,26 +395,28 @@ class AutoBackend(nn.Module):
|
|||
im = im.cpu().numpy() # FP32
|
||||
y = list(self.ov_compiled_model(im).values())
|
||||
elif self.engine: # TensorRT
|
||||
if self.dynamic and im.shape != self.bindings['images'].shape:
|
||||
i = self.model.get_binding_index('images')
|
||||
if self.dynamic and im.shape != self.bindings["images"].shape:
|
||||
i = self.model.get_binding_index("images")
|
||||
self.context.set_binding_shape(i, im.shape) # reshape if dynamic
|
||||
self.bindings['images'] = self.bindings['images']._replace(shape=im.shape)
|
||||
self.bindings["images"] = self.bindings["images"]._replace(shape=im.shape)
|
||||
for name in self.output_names:
|
||||
i = self.model.get_binding_index(name)
|
||||
self.bindings[name].data.resize_(tuple(self.context.get_binding_shape(i)))
|
||||
s = self.bindings['images'].shape
|
||||
s = self.bindings["images"].shape
|
||||
assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"
|
||||
self.binding_addrs['images'] = int(im.data_ptr())
|
||||
self.binding_addrs["images"] = int(im.data_ptr())
|
||||
self.context.execute_v2(list(self.binding_addrs.values()))
|
||||
y = [self.bindings[x].data for x in sorted(self.output_names)]
|
||||
elif self.coreml: # CoreML
|
||||
im = im[0].cpu().numpy()
|
||||
im_pil = Image.fromarray((im * 255).astype('uint8'))
|
||||
im_pil = Image.fromarray((im * 255).astype("uint8"))
|
||||
# im = im.resize((192, 320), Image.BILINEAR)
|
||||
y = self.model.predict({'image': im_pil}) # coordinates are xywh normalized
|
||||
if 'confidence' in y:
|
||||
raise TypeError('Ultralytics only supports inference of non-pipelined CoreML models exported with '
|
||||
f"'nms=False', but 'model={w}' has an NMS pipeline created by an 'nms=True' export.")
|
||||
y = self.model.predict({"image": im_pil}) # coordinates are xywh normalized
|
||||
if "confidence" in y:
|
||||
raise TypeError(
|
||||
"Ultralytics only supports inference of non-pipelined CoreML models exported with "
|
||||
f"'nms=False', but 'model={w}' has an NMS pipeline created by an 'nms=True' export."
|
||||
)
|
||||
# TODO: CoreML NMS inference handling
|
||||
# from ultralytics.utils.ops import xywh2xyxy
|
||||
# box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels
|
||||
|
|
@ -425,20 +455,20 @@ class AutoBackend(nn.Module):
|
|||
if len(y) == 2 and len(self.names) == 999: # segments and names not defined
|
||||
ip, ib = (0, 1) if len(y[0].shape) == 4 else (1, 0) # index of protos, boxes
|
||||
nc = y[ib].shape[1] - y[ip].shape[3] - 4 # y = (1, 160, 160, 32), (1, 116, 8400)
|
||||
self.names = {i: f'class{i}' for i in range(nc)}
|
||||
self.names = {i: f"class{i}" for i in range(nc)}
|
||||
else: # Lite or Edge TPU
|
||||
details = self.input_details[0]
|
||||
integer = details['dtype'] in (np.int8, np.int16) # is TFLite quantized int8 or int16 model
|
||||
integer = details["dtype"] in (np.int8, np.int16) # is TFLite quantized int8 or int16 model
|
||||
if integer:
|
||||
scale, zero_point = details['quantization']
|
||||
im = (im / scale + zero_point).astype(details['dtype']) # de-scale
|
||||
self.interpreter.set_tensor(details['index'], im)
|
||||
scale, zero_point = details["quantization"]
|
||||
im = (im / scale + zero_point).astype(details["dtype"]) # de-scale
|
||||
self.interpreter.set_tensor(details["index"], im)
|
||||
self.interpreter.invoke()
|
||||
y = []
|
||||
for output in self.output_details:
|
||||
x = self.interpreter.get_tensor(output['index'])
|
||||
x = self.interpreter.get_tensor(output["index"])
|
||||
if integer:
|
||||
scale, zero_point = output['quantization']
|
||||
scale, zero_point = output["quantization"]
|
||||
x = (x.astype(np.float32) - zero_point) * scale # re-scale
|
||||
if x.ndim > 2: # if task is not classification
|
||||
# Denormalize xywh by image size. See https://github.com/ultralytics/ultralytics/pull/1695
|
||||
|
|
@ -483,13 +513,13 @@ class AutoBackend(nn.Module):
|
|||
(None): This method runs the forward pass and don't return any value
|
||||
"""
|
||||
warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton, self.nn_module
|
||||
if any(warmup_types) and (self.device.type != 'cpu' or self.triton):
|
||||
if any(warmup_types) and (self.device.type != "cpu" or self.triton):
|
||||
im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input
|
||||
for _ in range(2 if self.jit else 1):
|
||||
self.forward(im) # warmup
|
||||
|
||||
@staticmethod
|
||||
def _model_type(p='path/to/model.pt'):
|
||||
def _model_type(p="path/to/model.pt"):
|
||||
"""
|
||||
This function takes a path to a model file and returns the model type.
|
||||
|
||||
|
|
@ -499,18 +529,20 @@ class AutoBackend(nn.Module):
|
|||
# Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx
|
||||
# types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle]
|
||||
from ultralytics.engine.exporter import export_formats
|
||||
|
||||
sf = list(export_formats().Suffix) # export suffixes
|
||||
if not is_url(p, check=False) and not isinstance(p, str):
|
||||
check_suffix(p, sf) # checks
|
||||
name = Path(p).name
|
||||
types = [s in name for s in sf]
|
||||
types[5] |= name.endswith('.mlmodel') # retain support for older Apple CoreML *.mlmodel formats
|
||||
types[5] |= name.endswith(".mlmodel") # retain support for older Apple CoreML *.mlmodel formats
|
||||
types[8] &= not types[9] # tflite &= not edgetpu
|
||||
if any(types):
|
||||
triton = False
|
||||
else:
|
||||
from urllib.parse import urlsplit
|
||||
|
||||
url = urlsplit(p)
|
||||
triton = url.netloc and url.path and url.scheme in {'http', 'grpc'}
|
||||
triton = url.netloc and url.path and url.scheme in {"http", "grpc"}
|
||||
|
||||
return types + [triton]
|
||||
|
|
|
|||
|
|
@ -17,18 +17,101 @@ Example:
|
|||
```
|
||||
"""
|
||||
|
||||
from .block import (C1, C2, C3, C3TR, DFL, SPP, SPPF, Bottleneck, BottleneckCSP, C2f, C3Ghost, C3x, GhostBottleneck,
|
||||
HGBlock, HGStem, Proto, RepC3, ResNetLayer)
|
||||
from .conv import (CBAM, ChannelAttention, Concat, Conv, Conv2, ConvTranspose, DWConv, DWConvTranspose2d, Focus,
|
||||
GhostConv, LightConv, RepConv, SpatialAttention)
|
||||
from .block import (
|
||||
C1,
|
||||
C2,
|
||||
C3,
|
||||
C3TR,
|
||||
DFL,
|
||||
SPP,
|
||||
SPPF,
|
||||
Bottleneck,
|
||||
BottleneckCSP,
|
||||
C2f,
|
||||
C3Ghost,
|
||||
C3x,
|
||||
GhostBottleneck,
|
||||
HGBlock,
|
||||
HGStem,
|
||||
Proto,
|
||||
RepC3,
|
||||
ResNetLayer,
|
||||
)
|
||||
from .conv import (
|
||||
CBAM,
|
||||
ChannelAttention,
|
||||
Concat,
|
||||
Conv,
|
||||
Conv2,
|
||||
ConvTranspose,
|
||||
DWConv,
|
||||
DWConvTranspose2d,
|
||||
Focus,
|
||||
GhostConv,
|
||||
LightConv,
|
||||
RepConv,
|
||||
SpatialAttention,
|
||||
)
|
||||
from .head import OBB, Classify, Detect, Pose, RTDETRDecoder, Segment
|
||||
from .transformer import (AIFI, MLP, DeformableTransformerDecoder, DeformableTransformerDecoderLayer, LayerNorm2d,
|
||||
MLPBlock, MSDeformAttn, TransformerBlock, TransformerEncoderLayer, TransformerLayer)
|
||||
from .transformer import (
|
||||
AIFI,
|
||||
MLP,
|
||||
DeformableTransformerDecoder,
|
||||
DeformableTransformerDecoderLayer,
|
||||
LayerNorm2d,
|
||||
MLPBlock,
|
||||
MSDeformAttn,
|
||||
TransformerBlock,
|
||||
TransformerEncoderLayer,
|
||||
TransformerLayer,
|
||||
)
|
||||
|
||||
__all__ = ('Conv', 'Conv2', 'LightConv', 'RepConv', 'DWConv', 'DWConvTranspose2d', 'ConvTranspose', 'Focus',
|
||||
'GhostConv', 'ChannelAttention', 'SpatialAttention', 'CBAM', 'Concat', 'TransformerLayer',
|
||||
'TransformerBlock', 'MLPBlock', 'LayerNorm2d', 'DFL', 'HGBlock', 'HGStem', 'SPP', 'SPPF', 'C1', 'C2', 'C3',
|
||||
'C2f', 'C3x', 'C3TR', 'C3Ghost', 'GhostBottleneck', 'Bottleneck', 'BottleneckCSP', 'Proto', 'Detect',
|
||||
'Segment', 'Pose', 'Classify', 'TransformerEncoderLayer', 'RepC3', 'RTDETRDecoder', 'AIFI',
|
||||
'DeformableTransformerDecoder', 'DeformableTransformerDecoderLayer', 'MSDeformAttn', 'MLP', 'ResNetLayer',
|
||||
'OBB')
|
||||
__all__ = (
|
||||
"Conv",
|
||||
"Conv2",
|
||||
"LightConv",
|
||||
"RepConv",
|
||||
"DWConv",
|
||||
"DWConvTranspose2d",
|
||||
"ConvTranspose",
|
||||
"Focus",
|
||||
"GhostConv",
|
||||
"ChannelAttention",
|
||||
"SpatialAttention",
|
||||
"CBAM",
|
||||
"Concat",
|
||||
"TransformerLayer",
|
||||
"TransformerBlock",
|
||||
"MLPBlock",
|
||||
"LayerNorm2d",
|
||||
"DFL",
|
||||
"HGBlock",
|
||||
"HGStem",
|
||||
"SPP",
|
||||
"SPPF",
|
||||
"C1",
|
||||
"C2",
|
||||
"C3",
|
||||
"C2f",
|
||||
"C3x",
|
||||
"C3TR",
|
||||
"C3Ghost",
|
||||
"GhostBottleneck",
|
||||
"Bottleneck",
|
||||
"BottleneckCSP",
|
||||
"Proto",
|
||||
"Detect",
|
||||
"Segment",
|
||||
"Pose",
|
||||
"Classify",
|
||||
"TransformerEncoderLayer",
|
||||
"RepC3",
|
||||
"RTDETRDecoder",
|
||||
"AIFI",
|
||||
"DeformableTransformerDecoder",
|
||||
"DeformableTransformerDecoderLayer",
|
||||
"MSDeformAttn",
|
||||
"MLP",
|
||||
"ResNetLayer",
|
||||
"OBB",
|
||||
)
|
||||
|
|
|
|||
|
|
@ -8,8 +8,26 @@ import torch.nn.functional as F
|
|||
from .conv import Conv, DWConv, GhostConv, LightConv, RepConv
|
||||
from .transformer import TransformerBlock
|
||||
|
||||
__all__ = ('DFL', 'HGBlock', 'HGStem', 'SPP', 'SPPF', 'C1', 'C2', 'C3', 'C2f', 'C3x', 'C3TR', 'C3Ghost',
|
||||
'GhostBottleneck', 'Bottleneck', 'BottleneckCSP', 'Proto', 'RepC3', 'ResNetLayer')
|
||||
__all__ = (
|
||||
"DFL",
|
||||
"HGBlock",
|
||||
"HGStem",
|
||||
"SPP",
|
||||
"SPPF",
|
||||
"C1",
|
||||
"C2",
|
||||
"C3",
|
||||
"C2f",
|
||||
"C3x",
|
||||
"C3TR",
|
||||
"C3Ghost",
|
||||
"GhostBottleneck",
|
||||
"Bottleneck",
|
||||
"BottleneckCSP",
|
||||
"Proto",
|
||||
"RepC3",
|
||||
"ResNetLayer",
|
||||
)
|
||||
|
||||
|
||||
class DFL(nn.Module):
|
||||
|
|
@ -284,9 +302,11 @@ class GhostBottleneck(nn.Module):
|
|||
self.conv = nn.Sequential(
|
||||
GhostConv(c1, c_, 1, 1), # pw
|
||||
DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
|
||||
GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
|
||||
self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1,
|
||||
act=False)) if s == 2 else nn.Identity()
|
||||
GhostConv(c_, c2, 1, 1, act=False), # pw-linear
|
||||
)
|
||||
self.shortcut = (
|
||||
nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
"""Applies skip connection and concatenation to input tensor."""
|
||||
|
|
@ -359,8 +379,9 @@ class ResNetLayer(nn.Module):
|
|||
self.is_first = is_first
|
||||
|
||||
if self.is_first:
|
||||
self.layer = nn.Sequential(Conv(c1, c2, k=7, s=2, p=3, act=True),
|
||||
nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
|
||||
self.layer = nn.Sequential(
|
||||
Conv(c1, c2, k=7, s=2, p=3, act=True), nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
)
|
||||
else:
|
||||
blocks = [ResNetBlock(c1, c2, s, e=e)]
|
||||
blocks.extend([ResNetBlock(e * c2, c2, 1, e=e) for _ in range(n - 1)])
|
||||
|
|
|
|||
|
|
@ -7,8 +7,21 @@ import numpy as np
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
__all__ = ('Conv', 'Conv2', 'LightConv', 'DWConv', 'DWConvTranspose2d', 'ConvTranspose', 'Focus', 'GhostConv',
|
||||
'ChannelAttention', 'SpatialAttention', 'CBAM', 'Concat', 'RepConv')
|
||||
__all__ = (
|
||||
"Conv",
|
||||
"Conv2",
|
||||
"LightConv",
|
||||
"DWConv",
|
||||
"DWConvTranspose2d",
|
||||
"ConvTranspose",
|
||||
"Focus",
|
||||
"GhostConv",
|
||||
"ChannelAttention",
|
||||
"SpatialAttention",
|
||||
"CBAM",
|
||||
"Concat",
|
||||
"RepConv",
|
||||
)
|
||||
|
||||
|
||||
def autopad(k, p=None, d=1): # kernel, padding, dilation
|
||||
|
|
@ -22,6 +35,7 @@ def autopad(k, p=None, d=1): # kernel, padding, dilation
|
|||
|
||||
class Conv(nn.Module):
|
||||
"""Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)."""
|
||||
|
||||
default_act = nn.SiLU() # default activation
|
||||
|
||||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
|
||||
|
|
@ -60,9 +74,9 @@ class Conv2(Conv):
|
|||
"""Fuse parallel convolutions."""
|
||||
w = torch.zeros_like(self.conv.weight.data)
|
||||
i = [x // 2 for x in w.shape[2:]]
|
||||
w[:, :, i[0]:i[0] + 1, i[1]:i[1] + 1] = self.cv2.weight.data.clone()
|
||||
w[:, :, i[0] : i[0] + 1, i[1] : i[1] + 1] = self.cv2.weight.data.clone()
|
||||
self.conv.weight.data += w
|
||||
self.__delattr__('cv2')
|
||||
self.__delattr__("cv2")
|
||||
self.forward = self.forward_fuse
|
||||
|
||||
|
||||
|
|
@ -102,6 +116,7 @@ class DWConvTranspose2d(nn.ConvTranspose2d):
|
|||
|
||||
class ConvTranspose(nn.Module):
|
||||
"""Convolution transpose 2d layer."""
|
||||
|
||||
default_act = nn.SiLU() # default activation
|
||||
|
||||
def __init__(self, c1, c2, k=2, s=2, p=0, bn=True, act=True):
|
||||
|
|
@ -164,6 +179,7 @@ class RepConv(nn.Module):
|
|||
This module is used in RT-DETR.
|
||||
Based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py
|
||||
"""
|
||||
|
||||
default_act = nn.SiLU() # default activation
|
||||
|
||||
def __init__(self, c1, c2, k=3, s=1, p=1, g=1, d=1, act=True, bn=False, deploy=False):
|
||||
|
|
@ -214,7 +230,7 @@ class RepConv(nn.Module):
|
|||
beta = branch.bn.bias
|
||||
eps = branch.bn.eps
|
||||
elif isinstance(branch, nn.BatchNorm2d):
|
||||
if not hasattr(self, 'id_tensor'):
|
||||
if not hasattr(self, "id_tensor"):
|
||||
input_dim = self.c1 // self.g
|
||||
kernel_value = np.zeros((self.c1, input_dim, 3, 3), dtype=np.float32)
|
||||
for i in range(self.c1):
|
||||
|
|
@ -232,29 +248,31 @@ class RepConv(nn.Module):
|
|||
|
||||
def fuse_convs(self):
|
||||
"""Combines two convolution layers into a single layer and removes unused attributes from the class."""
|
||||
if hasattr(self, 'conv'):
|
||||
if hasattr(self, "conv"):
|
||||
return
|
||||
kernel, bias = self.get_equivalent_kernel_bias()
|
||||
self.conv = nn.Conv2d(in_channels=self.conv1.conv.in_channels,
|
||||
out_channels=self.conv1.conv.out_channels,
|
||||
kernel_size=self.conv1.conv.kernel_size,
|
||||
stride=self.conv1.conv.stride,
|
||||
padding=self.conv1.conv.padding,
|
||||
dilation=self.conv1.conv.dilation,
|
||||
groups=self.conv1.conv.groups,
|
||||
bias=True).requires_grad_(False)
|
||||
self.conv = nn.Conv2d(
|
||||
in_channels=self.conv1.conv.in_channels,
|
||||
out_channels=self.conv1.conv.out_channels,
|
||||
kernel_size=self.conv1.conv.kernel_size,
|
||||
stride=self.conv1.conv.stride,
|
||||
padding=self.conv1.conv.padding,
|
||||
dilation=self.conv1.conv.dilation,
|
||||
groups=self.conv1.conv.groups,
|
||||
bias=True,
|
||||
).requires_grad_(False)
|
||||
self.conv.weight.data = kernel
|
||||
self.conv.bias.data = bias
|
||||
for para in self.parameters():
|
||||
para.detach_()
|
||||
self.__delattr__('conv1')
|
||||
self.__delattr__('conv2')
|
||||
if hasattr(self, 'nm'):
|
||||
self.__delattr__('nm')
|
||||
if hasattr(self, 'bn'):
|
||||
self.__delattr__('bn')
|
||||
if hasattr(self, 'id_tensor'):
|
||||
self.__delattr__('id_tensor')
|
||||
self.__delattr__("conv1")
|
||||
self.__delattr__("conv2")
|
||||
if hasattr(self, "nm"):
|
||||
self.__delattr__("nm")
|
||||
if hasattr(self, "bn"):
|
||||
self.__delattr__("bn")
|
||||
if hasattr(self, "id_tensor"):
|
||||
self.__delattr__("id_tensor")
|
||||
|
||||
|
||||
class ChannelAttention(nn.Module):
|
||||
|
|
@ -278,7 +296,7 @@ class SpatialAttention(nn.Module):
|
|||
def __init__(self, kernel_size=7):
|
||||
"""Initialize Spatial-attention module with kernel size argument."""
|
||||
super().__init__()
|
||||
assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
|
||||
assert kernel_size in (3, 7), "kernel size must be 3 or 7"
|
||||
padding = 3 if kernel_size == 7 else 1
|
||||
self.cv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
|
||||
self.act = nn.Sigmoid()
|
||||
|
|
|
|||
|
|
@ -14,11 +14,12 @@ from .conv import Conv
|
|||
from .transformer import MLP, DeformableTransformerDecoder, DeformableTransformerDecoderLayer
|
||||
from .utils import bias_init_with_prob, linear_init_
|
||||
|
||||
__all__ = 'Detect', 'Segment', 'Pose', 'Classify', 'OBB', 'RTDETRDecoder'
|
||||
__all__ = "Detect", "Segment", "Pose", "Classify", "OBB", "RTDETRDecoder"
|
||||
|
||||
|
||||
class Detect(nn.Module):
|
||||
"""YOLOv8 Detect head for detection models."""
|
||||
|
||||
dynamic = False # force grid reconstruction
|
||||
export = False # export mode
|
||||
shape = None
|
||||
|
|
@ -35,7 +36,8 @@ class Detect(nn.Module):
|
|||
self.stride = torch.zeros(self.nl) # strides computed during build
|
||||
c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], min(self.nc, 100)) # channels
|
||||
self.cv2 = nn.ModuleList(
|
||||
nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch)
|
||||
nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch
|
||||
)
|
||||
self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch)
|
||||
self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity()
|
||||
|
||||
|
|
@ -53,14 +55,14 @@ class Detect(nn.Module):
|
|||
self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
|
||||
self.shape = shape
|
||||
|
||||
if self.export and self.format in ('saved_model', 'pb', 'tflite', 'edgetpu', 'tfjs'): # avoid TF FlexSplitV ops
|
||||
box = x_cat[:, :self.reg_max * 4]
|
||||
cls = x_cat[:, self.reg_max * 4:]
|
||||
if self.export and self.format in ("saved_model", "pb", "tflite", "edgetpu", "tfjs"): # avoid TF FlexSplitV ops
|
||||
box = x_cat[:, : self.reg_max * 4]
|
||||
cls = x_cat[:, self.reg_max * 4 :]
|
||||
else:
|
||||
box, cls = x_cat.split((self.reg_max * 4, self.nc), 1)
|
||||
dbox = self.decode_bboxes(box)
|
||||
|
||||
if self.export and self.format in ('tflite', 'edgetpu'):
|
||||
if self.export and self.format in ("tflite", "edgetpu"):
|
||||
# Precompute normalization factor to increase numerical stability
|
||||
# See https://github.com/ultralytics/ultralytics/issues/7371
|
||||
img_h = shape[2]
|
||||
|
|
@ -79,7 +81,7 @@ class Detect(nn.Module):
|
|||
# ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency
|
||||
for a, b, s in zip(m.cv2, m.cv3, m.stride): # from
|
||||
a[-1].bias.data[:] = 1.0 # box
|
||||
b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (.01 objects, 80 classes, 640 img)
|
||||
b[-1].bias.data[: m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (.01 objects, 80 classes, 640 img)
|
||||
|
||||
def decode_bboxes(self, bboxes):
|
||||
"""Decode bounding boxes."""
|
||||
|
|
@ -214,26 +216,28 @@ class RTDETRDecoder(nn.Module):
|
|||
and class labels for objects in an image. It integrates features from multiple layers and runs through a series of
|
||||
Transformer decoder layers to output the final predictions.
|
||||
"""
|
||||
|
||||
export = False # export mode
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
nc=80,
|
||||
ch=(512, 1024, 2048),
|
||||
hd=256, # hidden dim
|
||||
nq=300, # num queries
|
||||
ndp=4, # num decoder points
|
||||
nh=8, # num head
|
||||
ndl=6, # num decoder layers
|
||||
d_ffn=1024, # dim of feedforward
|
||||
dropout=0.,
|
||||
act=nn.ReLU(),
|
||||
eval_idx=-1,
|
||||
# Training args
|
||||
nd=100, # num denoising
|
||||
label_noise_ratio=0.5,
|
||||
box_noise_scale=1.0,
|
||||
learnt_init_query=False):
|
||||
self,
|
||||
nc=80,
|
||||
ch=(512, 1024, 2048),
|
||||
hd=256, # hidden dim
|
||||
nq=300, # num queries
|
||||
ndp=4, # num decoder points
|
||||
nh=8, # num head
|
||||
ndl=6, # num decoder layers
|
||||
d_ffn=1024, # dim of feedforward
|
||||
dropout=0.0,
|
||||
act=nn.ReLU(),
|
||||
eval_idx=-1,
|
||||
# Training args
|
||||
nd=100, # num denoising
|
||||
label_noise_ratio=0.5,
|
||||
box_noise_scale=1.0,
|
||||
learnt_init_query=False,
|
||||
):
|
||||
"""
|
||||
Initializes the RTDETRDecoder module with the given parameters.
|
||||
|
||||
|
|
@ -302,28 +306,30 @@ class RTDETRDecoder(nn.Module):
|
|||
feats, shapes = self._get_encoder_input(x)
|
||||
|
||||
# Prepare denoising training
|
||||
dn_embed, dn_bbox, attn_mask, dn_meta = \
|
||||
get_cdn_group(batch,
|
||||
self.nc,
|
||||
self.num_queries,
|
||||
self.denoising_class_embed.weight,
|
||||
self.num_denoising,
|
||||
self.label_noise_ratio,
|
||||
self.box_noise_scale,
|
||||
self.training)
|
||||
dn_embed, dn_bbox, attn_mask, dn_meta = get_cdn_group(
|
||||
batch,
|
||||
self.nc,
|
||||
self.num_queries,
|
||||
self.denoising_class_embed.weight,
|
||||
self.num_denoising,
|
||||
self.label_noise_ratio,
|
||||
self.box_noise_scale,
|
||||
self.training,
|
||||
)
|
||||
|
||||
embed, refer_bbox, enc_bboxes, enc_scores = \
|
||||
self._get_decoder_input(feats, shapes, dn_embed, dn_bbox)
|
||||
embed, refer_bbox, enc_bboxes, enc_scores = self._get_decoder_input(feats, shapes, dn_embed, dn_bbox)
|
||||
|
||||
# Decoder
|
||||
dec_bboxes, dec_scores = self.decoder(embed,
|
||||
refer_bbox,
|
||||
feats,
|
||||
shapes,
|
||||
self.dec_bbox_head,
|
||||
self.dec_score_head,
|
||||
self.query_pos_head,
|
||||
attn_mask=attn_mask)
|
||||
dec_bboxes, dec_scores = self.decoder(
|
||||
embed,
|
||||
refer_bbox,
|
||||
feats,
|
||||
shapes,
|
||||
self.dec_bbox_head,
|
||||
self.dec_score_head,
|
||||
self.query_pos_head,
|
||||
attn_mask=attn_mask,
|
||||
)
|
||||
x = dec_bboxes, dec_scores, enc_bboxes, enc_scores, dn_meta
|
||||
if self.training:
|
||||
return x
|
||||
|
|
@ -331,24 +337,24 @@ class RTDETRDecoder(nn.Module):
|
|||
y = torch.cat((dec_bboxes.squeeze(0), dec_scores.squeeze(0).sigmoid()), -1)
|
||||
return y if self.export else (y, x)
|
||||
|
||||
def _generate_anchors(self, shapes, grid_size=0.05, dtype=torch.float32, device='cpu', eps=1e-2):
|
||||
def _generate_anchors(self, shapes, grid_size=0.05, dtype=torch.float32, device="cpu", eps=1e-2):
|
||||
"""Generates anchor bounding boxes for given shapes with specific grid size and validates them."""
|
||||
anchors = []
|
||||
for i, (h, w) in enumerate(shapes):
|
||||
sy = torch.arange(end=h, dtype=dtype, device=device)
|
||||
sx = torch.arange(end=w, dtype=dtype, device=device)
|
||||
grid_y, grid_x = torch.meshgrid(sy, sx, indexing='ij') if TORCH_1_10 else torch.meshgrid(sy, sx)
|
||||
grid_y, grid_x = torch.meshgrid(sy, sx, indexing="ij") if TORCH_1_10 else torch.meshgrid(sy, sx)
|
||||
grid_xy = torch.stack([grid_x, grid_y], -1) # (h, w, 2)
|
||||
|
||||
valid_WH = torch.tensor([w, h], dtype=dtype, device=device)
|
||||
grid_xy = (grid_xy.unsqueeze(0) + 0.5) / valid_WH # (1, h, w, 2)
|
||||
wh = torch.ones_like(grid_xy, dtype=dtype, device=device) * grid_size * (2.0 ** i)
|
||||
wh = torch.ones_like(grid_xy, dtype=dtype, device=device) * grid_size * (2.0**i)
|
||||
anchors.append(torch.cat([grid_xy, wh], -1).view(-1, h * w, 4)) # (1, h*w, 4)
|
||||
|
||||
anchors = torch.cat(anchors, 1) # (1, h*w*nl, 4)
|
||||
valid_mask = ((anchors > eps) * (anchors < 1 - eps)).all(-1, keepdim=True) # 1, h*w*nl, 1
|
||||
anchors = torch.log(anchors / (1 - anchors))
|
||||
anchors = anchors.masked_fill(~valid_mask, float('inf'))
|
||||
anchors = anchors.masked_fill(~valid_mask, float("inf"))
|
||||
return anchors, valid_mask
|
||||
|
||||
def _get_encoder_input(self, x):
|
||||
|
|
@ -415,13 +421,13 @@ class RTDETRDecoder(nn.Module):
|
|||
# NOTE: the weight initialization in `linear_init_` would cause NaN when training with custom datasets.
|
||||
# linear_init_(self.enc_score_head)
|
||||
constant_(self.enc_score_head.bias, bias_cls)
|
||||
constant_(self.enc_bbox_head.layers[-1].weight, 0.)
|
||||
constant_(self.enc_bbox_head.layers[-1].bias, 0.)
|
||||
constant_(self.enc_bbox_head.layers[-1].weight, 0.0)
|
||||
constant_(self.enc_bbox_head.layers[-1].bias, 0.0)
|
||||
for cls_, reg_ in zip(self.dec_score_head, self.dec_bbox_head):
|
||||
# linear_init_(cls_)
|
||||
constant_(cls_.bias, bias_cls)
|
||||
constant_(reg_.layers[-1].weight, 0.)
|
||||
constant_(reg_.layers[-1].bias, 0.)
|
||||
constant_(reg_.layers[-1].weight, 0.0)
|
||||
constant_(reg_.layers[-1].bias, 0.0)
|
||||
|
||||
linear_init_(self.enc_output[0])
|
||||
xavier_uniform_(self.enc_output[0].weight)
|
||||
|
|
|
|||
|
|
@ -11,8 +11,18 @@ from torch.nn.init import constant_, xavier_uniform_
|
|||
from .conv import Conv
|
||||
from .utils import _get_clones, inverse_sigmoid, multi_scale_deformable_attn_pytorch
|
||||
|
||||
__all__ = ('TransformerEncoderLayer', 'TransformerLayer', 'TransformerBlock', 'MLPBlock', 'LayerNorm2d', 'AIFI',
|
||||
'DeformableTransformerDecoder', 'DeformableTransformerDecoderLayer', 'MSDeformAttn', 'MLP')
|
||||
__all__ = (
|
||||
"TransformerEncoderLayer",
|
||||
"TransformerLayer",
|
||||
"TransformerBlock",
|
||||
"MLPBlock",
|
||||
"LayerNorm2d",
|
||||
"AIFI",
|
||||
"DeformableTransformerDecoder",
|
||||
"DeformableTransformerDecoderLayer",
|
||||
"MSDeformAttn",
|
||||
"MLP",
|
||||
)
|
||||
|
||||
|
||||
class TransformerEncoderLayer(nn.Module):
|
||||
|
|
@ -22,9 +32,11 @@ class TransformerEncoderLayer(nn.Module):
|
|||
"""Initialize the TransformerEncoderLayer with specified parameters."""
|
||||
super().__init__()
|
||||
from ...utils.torch_utils import TORCH_1_9
|
||||
|
||||
if not TORCH_1_9:
|
||||
raise ModuleNotFoundError(
|
||||
'TransformerEncoderLayer() requires torch>=1.9 to use nn.MultiheadAttention(batch_first=True).')
|
||||
"TransformerEncoderLayer() requires torch>=1.9 to use nn.MultiheadAttention(batch_first=True)."
|
||||
)
|
||||
self.ma = nn.MultiheadAttention(c1, num_heads, dropout=dropout, batch_first=True)
|
||||
# Implementation of Feedforward model
|
||||
self.fc1 = nn.Linear(c1, cm)
|
||||
|
|
@ -91,12 +103,11 @@ class AIFI(TransformerEncoderLayer):
|
|||
"""Builds 2D sine-cosine position embedding."""
|
||||
grid_w = torch.arange(int(w), dtype=torch.float32)
|
||||
grid_h = torch.arange(int(h), dtype=torch.float32)
|
||||
grid_w, grid_h = torch.meshgrid(grid_w, grid_h, indexing='ij')
|
||||
assert embed_dim % 4 == 0, \
|
||||
'Embed dimension must be divisible by 4 for 2D sin-cos position embedding'
|
||||
grid_w, grid_h = torch.meshgrid(grid_w, grid_h, indexing="ij")
|
||||
assert embed_dim % 4 == 0, "Embed dimension must be divisible by 4 for 2D sin-cos position embedding"
|
||||
pos_dim = embed_dim // 4
|
||||
omega = torch.arange(pos_dim, dtype=torch.float32) / pos_dim
|
||||
omega = 1. / (temperature ** omega)
|
||||
omega = 1.0 / (temperature**omega)
|
||||
|
||||
out_w = grid_w.flatten()[..., None] @ omega[None]
|
||||
out_h = grid_h.flatten()[..., None] @ omega[None]
|
||||
|
|
@ -213,10 +224,10 @@ class MSDeformAttn(nn.Module):
|
|||
"""Initialize MSDeformAttn with the given parameters."""
|
||||
super().__init__()
|
||||
if d_model % n_heads != 0:
|
||||
raise ValueError(f'd_model must be divisible by n_heads, but got {d_model} and {n_heads}')
|
||||
raise ValueError(f"d_model must be divisible by n_heads, but got {d_model} and {n_heads}")
|
||||
_d_per_head = d_model // n_heads
|
||||
# Better to set _d_per_head to a power of 2 which is more efficient in a CUDA implementation
|
||||
assert _d_per_head * n_heads == d_model, '`d_model` must be divisible by `n_heads`'
|
||||
assert _d_per_head * n_heads == d_model, "`d_model` must be divisible by `n_heads`"
|
||||
|
||||
self.im2col_step = 64
|
||||
|
||||
|
|
@ -234,21 +245,24 @@ class MSDeformAttn(nn.Module):
|
|||
|
||||
def _reset_parameters(self):
|
||||
"""Reset module parameters."""
|
||||
constant_(self.sampling_offsets.weight.data, 0.)
|
||||
constant_(self.sampling_offsets.weight.data, 0.0)
|
||||
thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2.0 * math.pi / self.n_heads)
|
||||
grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
|
||||
grid_init = (grid_init / grid_init.abs().max(-1, keepdim=True)[0]).view(self.n_heads, 1, 1, 2).repeat(
|
||||
1, self.n_levels, self.n_points, 1)
|
||||
grid_init = (
|
||||
(grid_init / grid_init.abs().max(-1, keepdim=True)[0])
|
||||
.view(self.n_heads, 1, 1, 2)
|
||||
.repeat(1, self.n_levels, self.n_points, 1)
|
||||
)
|
||||
for i in range(self.n_points):
|
||||
grid_init[:, :, i, :] *= i + 1
|
||||
with torch.no_grad():
|
||||
self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1))
|
||||
constant_(self.attention_weights.weight.data, 0.)
|
||||
constant_(self.attention_weights.bias.data, 0.)
|
||||
constant_(self.attention_weights.weight.data, 0.0)
|
||||
constant_(self.attention_weights.bias.data, 0.0)
|
||||
xavier_uniform_(self.value_proj.weight.data)
|
||||
constant_(self.value_proj.bias.data, 0.)
|
||||
constant_(self.value_proj.bias.data, 0.0)
|
||||
xavier_uniform_(self.output_proj.weight.data)
|
||||
constant_(self.output_proj.bias.data, 0.)
|
||||
constant_(self.output_proj.bias.data, 0.0)
|
||||
|
||||
def forward(self, query, refer_bbox, value, value_shapes, value_mask=None):
|
||||
"""
|
||||
|
|
@ -288,7 +302,7 @@ class MSDeformAttn(nn.Module):
|
|||
add = sampling_offsets / self.n_points * refer_bbox[:, :, None, :, None, 2:] * 0.5
|
||||
sampling_locations = refer_bbox[:, :, None, :, None, :2] + add
|
||||
else:
|
||||
raise ValueError(f'Last dim of reference_points must be 2 or 4, but got {num_points}.')
|
||||
raise ValueError(f"Last dim of reference_points must be 2 or 4, but got {num_points}.")
|
||||
output = multi_scale_deformable_attn_pytorch(value, value_shapes, sampling_locations, attention_weights)
|
||||
return self.output_proj(output)
|
||||
|
||||
|
|
@ -301,7 +315,7 @@ class DeformableTransformerDecoderLayer(nn.Module):
|
|||
https://github.com/fundamentalvision/Deformable-DETR/blob/main/models/deformable_transformer.py
|
||||
"""
|
||||
|
||||
def __init__(self, d_model=256, n_heads=8, d_ffn=1024, dropout=0., act=nn.ReLU(), n_levels=4, n_points=4):
|
||||
def __init__(self, d_model=256, n_heads=8, d_ffn=1024, dropout=0.0, act=nn.ReLU(), n_levels=4, n_points=4):
|
||||
"""Initialize the DeformableTransformerDecoderLayer with the given parameters."""
|
||||
super().__init__()
|
||||
|
||||
|
|
@ -339,14 +353,16 @@ class DeformableTransformerDecoderLayer(nn.Module):
|
|||
|
||||
# Self attention
|
||||
q = k = self.with_pos_embed(embed, query_pos)
|
||||
tgt = self.self_attn(q.transpose(0, 1), k.transpose(0, 1), embed.transpose(0, 1),
|
||||
attn_mask=attn_mask)[0].transpose(0, 1)
|
||||
tgt = self.self_attn(q.transpose(0, 1), k.transpose(0, 1), embed.transpose(0, 1), attn_mask=attn_mask)[
|
||||
0
|
||||
].transpose(0, 1)
|
||||
embed = embed + self.dropout1(tgt)
|
||||
embed = self.norm1(embed)
|
||||
|
||||
# Cross attention
|
||||
tgt = self.cross_attn(self.with_pos_embed(embed, query_pos), refer_bbox.unsqueeze(2), feats, shapes,
|
||||
padding_mask)
|
||||
tgt = self.cross_attn(
|
||||
self.with_pos_embed(embed, query_pos), refer_bbox.unsqueeze(2), feats, shapes, padding_mask
|
||||
)
|
||||
embed = embed + self.dropout2(tgt)
|
||||
embed = self.norm2(embed)
|
||||
|
||||
|
|
@ -370,16 +386,17 @@ class DeformableTransformerDecoder(nn.Module):
|
|||
self.eval_idx = eval_idx if eval_idx >= 0 else num_layers + eval_idx
|
||||
|
||||
def forward(
|
||||
self,
|
||||
embed, # decoder embeddings
|
||||
refer_bbox, # anchor
|
||||
feats, # image features
|
||||
shapes, # feature shapes
|
||||
bbox_head,
|
||||
score_head,
|
||||
pos_mlp,
|
||||
attn_mask=None,
|
||||
padding_mask=None):
|
||||
self,
|
||||
embed, # decoder embeddings
|
||||
refer_bbox, # anchor
|
||||
feats, # image features
|
||||
shapes, # feature shapes
|
||||
bbox_head,
|
||||
score_head,
|
||||
pos_mlp,
|
||||
attn_mask=None,
|
||||
padding_mask=None,
|
||||
):
|
||||
"""Perform the forward pass through the entire decoder."""
|
||||
output = embed
|
||||
dec_bboxes = []
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ import torch.nn as nn
|
|||
import torch.nn.functional as F
|
||||
from torch.nn.init import uniform_
|
||||
|
||||
__all__ = 'multi_scale_deformable_attn_pytorch', 'inverse_sigmoid'
|
||||
__all__ = "multi_scale_deformable_attn_pytorch", "inverse_sigmoid"
|
||||
|
||||
|
||||
def _get_clones(module, n):
|
||||
|
|
@ -27,7 +27,7 @@ def linear_init_(module):
|
|||
"""Initialize the weights and biases of a linear module."""
|
||||
bound = 1 / math.sqrt(module.weight.shape[0])
|
||||
uniform_(module.weight, -bound, bound)
|
||||
if hasattr(module, 'bias') and module.bias is not None:
|
||||
if hasattr(module, "bias") and module.bias is not None:
|
||||
uniform_(module.bias, -bound, bound)
|
||||
|
||||
|
||||
|
|
@ -39,9 +39,12 @@ def inverse_sigmoid(x, eps=1e-5):
|
|||
return torch.log(x1 / x2)
|
||||
|
||||
|
||||
def multi_scale_deformable_attn_pytorch(value: torch.Tensor, value_spatial_shapes: torch.Tensor,
|
||||
sampling_locations: torch.Tensor,
|
||||
attention_weights: torch.Tensor) -> torch.Tensor:
|
||||
def multi_scale_deformable_attn_pytorch(
|
||||
value: torch.Tensor,
|
||||
value_spatial_shapes: torch.Tensor,
|
||||
sampling_locations: torch.Tensor,
|
||||
attention_weights: torch.Tensor,
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Multi-scale deformable attention.
|
||||
|
||||
|
|
@ -58,23 +61,25 @@ def multi_scale_deformable_attn_pytorch(value: torch.Tensor, value_spatial_shape
|
|||
# bs, H_*W_, num_heads*embed_dims ->
|
||||
# bs, num_heads*embed_dims, H_*W_ ->
|
||||
# bs*num_heads, embed_dims, H_, W_
|
||||
value_l_ = (value_list[level].flatten(2).transpose(1, 2).reshape(bs * num_heads, embed_dims, H_, W_))
|
||||
value_l_ = value_list[level].flatten(2).transpose(1, 2).reshape(bs * num_heads, embed_dims, H_, W_)
|
||||
# bs, num_queries, num_heads, num_points, 2 ->
|
||||
# bs, num_heads, num_queries, num_points, 2 ->
|
||||
# bs*num_heads, num_queries, num_points, 2
|
||||
sampling_grid_l_ = sampling_grids[:, :, :, level].transpose(1, 2).flatten(0, 1)
|
||||
# bs*num_heads, embed_dims, num_queries, num_points
|
||||
sampling_value_l_ = F.grid_sample(value_l_,
|
||||
sampling_grid_l_,
|
||||
mode='bilinear',
|
||||
padding_mode='zeros',
|
||||
align_corners=False)
|
||||
sampling_value_l_ = F.grid_sample(
|
||||
value_l_, sampling_grid_l_, mode="bilinear", padding_mode="zeros", align_corners=False
|
||||
)
|
||||
sampling_value_list.append(sampling_value_l_)
|
||||
# (bs, num_queries, num_heads, num_levels, num_points) ->
|
||||
# (bs, num_heads, num_queries, num_levels, num_points) ->
|
||||
# (bs, num_heads, 1, num_queries, num_levels*num_points)
|
||||
attention_weights = attention_weights.transpose(1, 2).reshape(bs * num_heads, 1, num_queries,
|
||||
num_levels * num_points)
|
||||
output = ((torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights).sum(-1).view(
|
||||
bs, num_heads * embed_dims, num_queries))
|
||||
attention_weights = attention_weights.transpose(1, 2).reshape(
|
||||
bs * num_heads, 1, num_queries, num_levels * num_points
|
||||
)
|
||||
output = (
|
||||
(torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights)
|
||||
.sum(-1)
|
||||
.view(bs, num_heads * embed_dims, num_queries)
|
||||
)
|
||||
return output.transpose(1, 2).contiguous()
|
||||
|
|
|
|||
|
|
@ -7,16 +7,54 @@ from pathlib import Path
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from ultralytics.nn.modules import (AIFI, C1, C2, C3, C3TR, OBB, SPP, SPPF, Bottleneck, BottleneckCSP, C2f, C3Ghost,
|
||||
C3x, Classify, Concat, Conv, Conv2, ConvTranspose, Detect, DWConv,
|
||||
DWConvTranspose2d, Focus, GhostBottleneck, GhostConv, HGBlock, HGStem, Pose, RepC3,
|
||||
RepConv, ResNetLayer, RTDETRDecoder, Segment)
|
||||
from ultralytics.nn.modules import (
|
||||
AIFI,
|
||||
C1,
|
||||
C2,
|
||||
C3,
|
||||
C3TR,
|
||||
OBB,
|
||||
SPP,
|
||||
SPPF,
|
||||
Bottleneck,
|
||||
BottleneckCSP,
|
||||
C2f,
|
||||
C3Ghost,
|
||||
C3x,
|
||||
Classify,
|
||||
Concat,
|
||||
Conv,
|
||||
Conv2,
|
||||
ConvTranspose,
|
||||
Detect,
|
||||
DWConv,
|
||||
DWConvTranspose2d,
|
||||
Focus,
|
||||
GhostBottleneck,
|
||||
GhostConv,
|
||||
HGBlock,
|
||||
HGStem,
|
||||
Pose,
|
||||
RepC3,
|
||||
RepConv,
|
||||
ResNetLayer,
|
||||
RTDETRDecoder,
|
||||
Segment,
|
||||
)
|
||||
from ultralytics.utils import DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, colorstr, emojis, yaml_load
|
||||
from ultralytics.utils.checks import check_requirements, check_suffix, check_yaml
|
||||
from ultralytics.utils.loss import v8ClassificationLoss, v8DetectionLoss, v8OBBLoss, v8PoseLoss, v8SegmentationLoss
|
||||
from ultralytics.utils.plotting import feature_visualization
|
||||
from ultralytics.utils.torch_utils import (fuse_conv_and_bn, fuse_deconv_and_bn, initialize_weights, intersect_dicts,
|
||||
make_divisible, model_info, scale_img, time_sync)
|
||||
from ultralytics.utils.torch_utils import (
|
||||
fuse_conv_and_bn,
|
||||
fuse_deconv_and_bn,
|
||||
initialize_weights,
|
||||
intersect_dicts,
|
||||
make_divisible,
|
||||
model_info,
|
||||
scale_img,
|
||||
time_sync,
|
||||
)
|
||||
|
||||
try:
|
||||
import thop
|
||||
|
|
@ -90,8 +128,10 @@ class BaseModel(nn.Module):
|
|||
|
||||
def _predict_augment(self, x):
|
||||
"""Perform augmentations on input image x and return augmented inference."""
|
||||
LOGGER.warning(f'WARNING ⚠️ {self.__class__.__name__} does not support augmented inference yet. '
|
||||
f'Reverting to single-scale inference instead.')
|
||||
LOGGER.warning(
|
||||
f"WARNING ⚠️ {self.__class__.__name__} does not support augmented inference yet. "
|
||||
f"Reverting to single-scale inference instead."
|
||||
)
|
||||
return self._predict_once(x)
|
||||
|
||||
def _profile_one_layer(self, m, x, dt):
|
||||
|
|
@ -108,14 +148,14 @@ class BaseModel(nn.Module):
|
|||
None
|
||||
"""
|
||||
c = m == self.model[-1] and isinstance(x, list) # is final layer list, copy input as inplace fix
|
||||
flops = thop.profile(m, inputs=[x.copy() if c else x], verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPs
|
||||
flops = thop.profile(m, inputs=[x.copy() if c else x], verbose=False)[0] / 1e9 * 2 if thop else 0 # FLOPs
|
||||
t = time_sync()
|
||||
for _ in range(10):
|
||||
m(x.copy() if c else x)
|
||||
dt.append((time_sync() - t) * 100)
|
||||
if m == self.model[0]:
|
||||
LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s} module")
|
||||
LOGGER.info(f'{dt[-1]:10.2f} {flops:10.2f} {m.np:10.0f} {m.type}')
|
||||
LOGGER.info(f"{dt[-1]:10.2f} {flops:10.2f} {m.np:10.0f} {m.type}")
|
||||
if c:
|
||||
LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s} Total")
|
||||
|
||||
|
|
@ -129,15 +169,15 @@ class BaseModel(nn.Module):
|
|||
"""
|
||||
if not self.is_fused():
|
||||
for m in self.model.modules():
|
||||
if isinstance(m, (Conv, Conv2, DWConv)) and hasattr(m, 'bn'):
|
||||
if isinstance(m, (Conv, Conv2, DWConv)) and hasattr(m, "bn"):
|
||||
if isinstance(m, Conv2):
|
||||
m.fuse_convs()
|
||||
m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
|
||||
delattr(m, 'bn') # remove batchnorm
|
||||
delattr(m, "bn") # remove batchnorm
|
||||
m.forward = m.forward_fuse # update forward
|
||||
if isinstance(m, ConvTranspose) and hasattr(m, 'bn'):
|
||||
if isinstance(m, ConvTranspose) and hasattr(m, "bn"):
|
||||
m.conv_transpose = fuse_deconv_and_bn(m.conv_transpose, m.bn)
|
||||
delattr(m, 'bn') # remove batchnorm
|
||||
delattr(m, "bn") # remove batchnorm
|
||||
m.forward = m.forward_fuse # update forward
|
||||
if isinstance(m, RepConv):
|
||||
m.fuse_convs()
|
||||
|
|
@ -156,7 +196,7 @@ class BaseModel(nn.Module):
|
|||
Returns:
|
||||
(bool): True if the number of BatchNorm layers in the model is less than the threshold, False otherwise.
|
||||
"""
|
||||
bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k) # normalization layers, i.e. BatchNorm2d()
|
||||
bn = tuple(v for k, v in nn.__dict__.items() if "Norm" in k) # normalization layers, i.e. BatchNorm2d()
|
||||
return sum(isinstance(v, bn) for v in self.modules()) < thresh # True if < 'thresh' BatchNorm layers in model
|
||||
|
||||
def info(self, detailed=False, verbose=True, imgsz=640):
|
||||
|
|
@ -196,12 +236,12 @@ class BaseModel(nn.Module):
|
|||
weights (dict | torch.nn.Module): The pre-trained weights to be loaded.
|
||||
verbose (bool, optional): Whether to log the transfer progress. Defaults to True.
|
||||
"""
|
||||
model = weights['model'] if isinstance(weights, dict) else weights # torchvision models are not dicts
|
||||
model = weights["model"] if isinstance(weights, dict) else weights # torchvision models are not dicts
|
||||
csd = model.float().state_dict() # checkpoint state_dict as FP32
|
||||
csd = intersect_dicts(csd, self.state_dict()) # intersect
|
||||
self.load_state_dict(csd, strict=False) # load
|
||||
if verbose:
|
||||
LOGGER.info(f'Transferred {len(csd)}/{len(self.model.state_dict())} items from pretrained weights')
|
||||
LOGGER.info(f"Transferred {len(csd)}/{len(self.model.state_dict())} items from pretrained weights")
|
||||
|
||||
def loss(self, batch, preds=None):
|
||||
"""
|
||||
|
|
@ -211,33 +251,33 @@ class BaseModel(nn.Module):
|
|||
batch (dict): Batch to compute loss on
|
||||
preds (torch.Tensor | List[torch.Tensor]): Predictions.
|
||||
"""
|
||||
if not hasattr(self, 'criterion'):
|
||||
if not hasattr(self, "criterion"):
|
||||
self.criterion = self.init_criterion()
|
||||
|
||||
preds = self.forward(batch['img']) if preds is None else preds
|
||||
preds = self.forward(batch["img"]) if preds is None else preds
|
||||
return self.criterion(preds, batch)
|
||||
|
||||
def init_criterion(self):
|
||||
"""Initialize the loss criterion for the BaseModel."""
|
||||
raise NotImplementedError('compute_loss() needs to be implemented by task heads')
|
||||
raise NotImplementedError("compute_loss() needs to be implemented by task heads")
|
||||
|
||||
|
||||
class DetectionModel(BaseModel):
|
||||
"""YOLOv8 detection model."""
|
||||
|
||||
def __init__(self, cfg='yolov8n.yaml', ch=3, nc=None, verbose=True): # model, input channels, number of classes
|
||||
def __init__(self, cfg="yolov8n.yaml", ch=3, nc=None, verbose=True): # model, input channels, number of classes
|
||||
"""Initialize the YOLOv8 detection model with the given config and parameters."""
|
||||
super().__init__()
|
||||
self.yaml = cfg if isinstance(cfg, dict) else yaml_model_load(cfg) # cfg dict
|
||||
|
||||
# Define model
|
||||
ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels
|
||||
if nc and nc != self.yaml['nc']:
|
||||
ch = self.yaml["ch"] = self.yaml.get("ch", ch) # input channels
|
||||
if nc and nc != self.yaml["nc"]:
|
||||
LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
|
||||
self.yaml['nc'] = nc # override YAML value
|
||||
self.yaml["nc"] = nc # override YAML value
|
||||
self.model, self.save = parse_model(deepcopy(self.yaml), ch=ch, verbose=verbose) # model, savelist
|
||||
self.names = {i: f'{i}' for i in range(self.yaml['nc'])} # default names dict
|
||||
self.inplace = self.yaml.get('inplace', True)
|
||||
self.names = {i: f"{i}" for i in range(self.yaml["nc"])} # default names dict
|
||||
self.inplace = self.yaml.get("inplace", True)
|
||||
|
||||
# Build strides
|
||||
m = self.model[-1] # Detect()
|
||||
|
|
@ -255,7 +295,7 @@ class DetectionModel(BaseModel):
|
|||
initialize_weights(self)
|
||||
if verbose:
|
||||
self.info()
|
||||
LOGGER.info('')
|
||||
LOGGER.info("")
|
||||
|
||||
def _predict_augment(self, x):
|
||||
"""Perform augmentations on input image x and return augmented inference and train outputs."""
|
||||
|
|
@ -285,9 +325,9 @@ class DetectionModel(BaseModel):
|
|||
def _clip_augmented(self, y):
|
||||
"""Clip YOLO augmented inference tails."""
|
||||
nl = self.model[-1].nl # number of detection layers (P3-P5)
|
||||
g = sum(4 ** x for x in range(nl)) # grid points
|
||||
g = sum(4**x for x in range(nl)) # grid points
|
||||
e = 1 # exclude layer count
|
||||
i = (y[0].shape[-1] // g) * sum(4 ** x for x in range(e)) # indices
|
||||
i = (y[0].shape[-1] // g) * sum(4**x for x in range(e)) # indices
|
||||
y[0] = y[0][..., :-i] # large
|
||||
i = (y[-1].shape[-1] // g) * sum(4 ** (nl - 1 - x) for x in range(e)) # indices
|
||||
y[-1] = y[-1][..., i:] # small
|
||||
|
|
@ -301,7 +341,7 @@ class DetectionModel(BaseModel):
|
|||
class OBBModel(DetectionModel):
|
||||
""""YOLOv8 Oriented Bounding Box (OBB) model."""
|
||||
|
||||
def __init__(self, cfg='yolov8n-obb.yaml', ch=3, nc=None, verbose=True):
|
||||
def __init__(self, cfg="yolov8n-obb.yaml", ch=3, nc=None, verbose=True):
|
||||
"""Initialize YOLOv8 OBB model with given config and parameters."""
|
||||
super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
|
||||
|
||||
|
|
@ -313,7 +353,7 @@ class OBBModel(DetectionModel):
|
|||
class SegmentationModel(DetectionModel):
|
||||
"""YOLOv8 segmentation model."""
|
||||
|
||||
def __init__(self, cfg='yolov8n-seg.yaml', ch=3, nc=None, verbose=True):
|
||||
def __init__(self, cfg="yolov8n-seg.yaml", ch=3, nc=None, verbose=True):
|
||||
"""Initialize YOLOv8 segmentation model with given config and parameters."""
|
||||
super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
|
||||
|
||||
|
|
@ -325,13 +365,13 @@ class SegmentationModel(DetectionModel):
|
|||
class PoseModel(DetectionModel):
|
||||
"""YOLOv8 pose model."""
|
||||
|
||||
def __init__(self, cfg='yolov8n-pose.yaml', ch=3, nc=None, data_kpt_shape=(None, None), verbose=True):
|
||||
def __init__(self, cfg="yolov8n-pose.yaml", ch=3, nc=None, data_kpt_shape=(None, None), verbose=True):
|
||||
"""Initialize YOLOv8 Pose model."""
|
||||
if not isinstance(cfg, dict):
|
||||
cfg = yaml_model_load(cfg) # load model YAML
|
||||
if any(data_kpt_shape) and list(data_kpt_shape) != list(cfg['kpt_shape']):
|
||||
if any(data_kpt_shape) and list(data_kpt_shape) != list(cfg["kpt_shape"]):
|
||||
LOGGER.info(f"Overriding model.yaml kpt_shape={cfg['kpt_shape']} with kpt_shape={data_kpt_shape}")
|
||||
cfg['kpt_shape'] = data_kpt_shape
|
||||
cfg["kpt_shape"] = data_kpt_shape
|
||||
super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
|
||||
|
||||
def init_criterion(self):
|
||||
|
|
@ -342,7 +382,7 @@ class PoseModel(DetectionModel):
|
|||
class ClassificationModel(BaseModel):
|
||||
"""YOLOv8 classification model."""
|
||||
|
||||
def __init__(self, cfg='yolov8n-cls.yaml', ch=3, nc=None, verbose=True):
|
||||
def __init__(self, cfg="yolov8n-cls.yaml", ch=3, nc=None, verbose=True):
|
||||
"""Init ClassificationModel with YAML, channels, number of classes, verbose flag."""
|
||||
super().__init__()
|
||||
self._from_yaml(cfg, ch, nc, verbose)
|
||||
|
|
@ -352,21 +392,21 @@ class ClassificationModel(BaseModel):
|
|||
self.yaml = cfg if isinstance(cfg, dict) else yaml_model_load(cfg) # cfg dict
|
||||
|
||||
# Define model
|
||||
ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels
|
||||
if nc and nc != self.yaml['nc']:
|
||||
ch = self.yaml["ch"] = self.yaml.get("ch", ch) # input channels
|
||||
if nc and nc != self.yaml["nc"]:
|
||||
LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
|
||||
self.yaml['nc'] = nc # override YAML value
|
||||
elif not nc and not self.yaml.get('nc', None):
|
||||
raise ValueError('nc not specified. Must specify nc in model.yaml or function arguments.')
|
||||
self.yaml["nc"] = nc # override YAML value
|
||||
elif not nc and not self.yaml.get("nc", None):
|
||||
raise ValueError("nc not specified. Must specify nc in model.yaml or function arguments.")
|
||||
self.model, self.save = parse_model(deepcopy(self.yaml), ch=ch, verbose=verbose) # model, savelist
|
||||
self.stride = torch.Tensor([1]) # no stride constraints
|
||||
self.names = {i: f'{i}' for i in range(self.yaml['nc'])} # default names dict
|
||||
self.names = {i: f"{i}" for i in range(self.yaml["nc"])} # default names dict
|
||||
self.info()
|
||||
|
||||
@staticmethod
|
||||
def reshape_outputs(model, nc):
|
||||
"""Update a TorchVision classification model to class count 'n' if required."""
|
||||
name, m = list((model.model if hasattr(model, 'model') else model).named_children())[-1] # last module
|
||||
name, m = list((model.model if hasattr(model, "model") else model).named_children())[-1] # last module
|
||||
if isinstance(m, Classify): # YOLO Classify() head
|
||||
if m.linear.out_features != nc:
|
||||
m.linear = nn.Linear(m.linear.in_features, nc)
|
||||
|
|
@ -409,7 +449,7 @@ class RTDETRDetectionModel(DetectionModel):
|
|||
predict: Performs a forward pass through the network and returns the output.
|
||||
"""
|
||||
|
||||
def __init__(self, cfg='rtdetr-l.yaml', ch=3, nc=None, verbose=True):
|
||||
def __init__(self, cfg="rtdetr-l.yaml", ch=3, nc=None, verbose=True):
|
||||
"""
|
||||
Initialize the RTDETRDetectionModel.
|
||||
|
||||
|
|
@ -438,39 +478,39 @@ class RTDETRDetectionModel(DetectionModel):
|
|||
Returns:
|
||||
(tuple): A tuple containing the total loss and main three losses in a tensor.
|
||||
"""
|
||||
if not hasattr(self, 'criterion'):
|
||||
if not hasattr(self, "criterion"):
|
||||
self.criterion = self.init_criterion()
|
||||
|
||||
img = batch['img']
|
||||
img = batch["img"]
|
||||
# NOTE: preprocess gt_bbox and gt_labels to list.
|
||||
bs = len(img)
|
||||
batch_idx = batch['batch_idx']
|
||||
batch_idx = batch["batch_idx"]
|
||||
gt_groups = [(batch_idx == i).sum().item() for i in range(bs)]
|
||||
targets = {
|
||||
'cls': batch['cls'].to(img.device, dtype=torch.long).view(-1),
|
||||
'bboxes': batch['bboxes'].to(device=img.device),
|
||||
'batch_idx': batch_idx.to(img.device, dtype=torch.long).view(-1),
|
||||
'gt_groups': gt_groups}
|
||||
"cls": batch["cls"].to(img.device, dtype=torch.long).view(-1),
|
||||
"bboxes": batch["bboxes"].to(device=img.device),
|
||||
"batch_idx": batch_idx.to(img.device, dtype=torch.long).view(-1),
|
||||
"gt_groups": gt_groups,
|
||||
}
|
||||
|
||||
preds = self.predict(img, batch=targets) if preds is None else preds
|
||||
dec_bboxes, dec_scores, enc_bboxes, enc_scores, dn_meta = preds if self.training else preds[1]
|
||||
if dn_meta is None:
|
||||
dn_bboxes, dn_scores = None, None
|
||||
else:
|
||||
dn_bboxes, dec_bboxes = torch.split(dec_bboxes, dn_meta['dn_num_split'], dim=2)
|
||||
dn_scores, dec_scores = torch.split(dec_scores, dn_meta['dn_num_split'], dim=2)
|
||||
dn_bboxes, dec_bboxes = torch.split(dec_bboxes, dn_meta["dn_num_split"], dim=2)
|
||||
dn_scores, dec_scores = torch.split(dec_scores, dn_meta["dn_num_split"], dim=2)
|
||||
|
||||
dec_bboxes = torch.cat([enc_bboxes.unsqueeze(0), dec_bboxes]) # (7, bs, 300, 4)
|
||||
dec_scores = torch.cat([enc_scores.unsqueeze(0), dec_scores])
|
||||
|
||||
loss = self.criterion((dec_bboxes, dec_scores),
|
||||
targets,
|
||||
dn_bboxes=dn_bboxes,
|
||||
dn_scores=dn_scores,
|
||||
dn_meta=dn_meta)
|
||||
loss = self.criterion(
|
||||
(dec_bboxes, dec_scores), targets, dn_bboxes=dn_bboxes, dn_scores=dn_scores, dn_meta=dn_meta
|
||||
)
|
||||
# NOTE: There are like 12 losses in RTDETR, backward with all losses but only show the main three losses.
|
||||
return sum(loss.values()), torch.as_tensor([loss[k].detach() for k in ['loss_giou', 'loss_class', 'loss_bbox']],
|
||||
device=img.device)
|
||||
return sum(loss.values()), torch.as_tensor(
|
||||
[loss[k].detach() for k in ["loss_giou", "loss_class", "loss_bbox"]], device=img.device
|
||||
)
|
||||
|
||||
def predict(self, x, profile=False, visualize=False, batch=None, augment=False, embed=None):
|
||||
"""
|
||||
|
|
@ -553,6 +593,7 @@ def temporary_modules(modules=None):
|
|||
|
||||
import importlib
|
||||
import sys
|
||||
|
||||
try:
|
||||
# Set modules in sys.modules under their old name
|
||||
for old, new in modules.items():
|
||||
|
|
@ -580,30 +621,38 @@ def torch_safe_load(weight):
|
|||
"""
|
||||
from ultralytics.utils.downloads import attempt_download_asset
|
||||
|
||||
check_suffix(file=weight, suffix='.pt')
|
||||
check_suffix(file=weight, suffix=".pt")
|
||||
file = attempt_download_asset(weight) # search online if missing locally
|
||||
try:
|
||||
with temporary_modules({
|
||||
'ultralytics.yolo.utils': 'ultralytics.utils',
|
||||
'ultralytics.yolo.v8': 'ultralytics.models.yolo',
|
||||
'ultralytics.yolo.data': 'ultralytics.data'}): # for legacy 8.0 Classify and Pose models
|
||||
return torch.load(file, map_location='cpu'), file # load
|
||||
with temporary_modules(
|
||||
{
|
||||
"ultralytics.yolo.utils": "ultralytics.utils",
|
||||
"ultralytics.yolo.v8": "ultralytics.models.yolo",
|
||||
"ultralytics.yolo.data": "ultralytics.data",
|
||||
}
|
||||
): # for legacy 8.0 Classify and Pose models
|
||||
return torch.load(file, map_location="cpu"), file # load
|
||||
|
||||
except ModuleNotFoundError as e: # e.name is missing module name
|
||||
if e.name == 'models':
|
||||
if e.name == "models":
|
||||
raise TypeError(
|
||||
emojis(f'ERROR ❌️ {weight} appears to be an Ultralytics YOLOv5 model originally trained '
|
||||
f'with https://github.com/ultralytics/yolov5.\nThis model is NOT forwards compatible with '
|
||||
f'YOLOv8 at https://github.com/ultralytics/ultralytics.'
|
||||
f"\nRecommend fixes are to train a new model using the latest 'ultralytics' package or to "
|
||||
f"run a command with an official YOLOv8 model, i.e. 'yolo predict model=yolov8n.pt'")) from e
|
||||
LOGGER.warning(f"WARNING ⚠️ {weight} appears to require '{e.name}', which is not in ultralytics requirements."
|
||||
f"\nAutoInstall will run now for '{e.name}' but this feature will be removed in the future."
|
||||
f"\nRecommend fixes are to train a new model using the latest 'ultralytics' package or to "
|
||||
f"run a command with an official YOLOv8 model, i.e. 'yolo predict model=yolov8n.pt'")
|
||||
emojis(
|
||||
f"ERROR ❌️ {weight} appears to be an Ultralytics YOLOv5 model originally trained "
|
||||
f"with https://github.com/ultralytics/yolov5.\nThis model is NOT forwards compatible with "
|
||||
f"YOLOv8 at https://github.com/ultralytics/ultralytics."
|
||||
f"\nRecommend fixes are to train a new model using the latest 'ultralytics' package or to "
|
||||
f"run a command with an official YOLOv8 model, i.e. 'yolo predict model=yolov8n.pt'"
|
||||
)
|
||||
) from e
|
||||
LOGGER.warning(
|
||||
f"WARNING ⚠️ {weight} appears to require '{e.name}', which is not in ultralytics requirements."
|
||||
f"\nAutoInstall will run now for '{e.name}' but this feature will be removed in the future."
|
||||
f"\nRecommend fixes are to train a new model using the latest 'ultralytics' package or to "
|
||||
f"run a command with an official YOLOv8 model, i.e. 'yolo predict model=yolov8n.pt'"
|
||||
)
|
||||
check_requirements(e.name) # install missing module
|
||||
|
||||
return torch.load(file, map_location='cpu'), file # load
|
||||
return torch.load(file, map_location="cpu"), file # load
|
||||
|
||||
|
||||
def attempt_load_weights(weights, device=None, inplace=True, fuse=False):
|
||||
|
|
@ -612,25 +661,25 @@ def attempt_load_weights(weights, device=None, inplace=True, fuse=False):
|
|||
ensemble = Ensemble()
|
||||
for w in weights if isinstance(weights, list) else [weights]:
|
||||
ckpt, w = torch_safe_load(w) # load ckpt
|
||||
args = {**DEFAULT_CFG_DICT, **ckpt['train_args']} if 'train_args' in ckpt else None # combined args
|
||||
model = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model
|
||||
args = {**DEFAULT_CFG_DICT, **ckpt["train_args"]} if "train_args" in ckpt else None # combined args
|
||||
model = (ckpt.get("ema") or ckpt["model"]).to(device).float() # FP32 model
|
||||
|
||||
# Model compatibility updates
|
||||
model.args = args # attach args to model
|
||||
model.pt_path = w # attach *.pt file path to model
|
||||
model.task = guess_model_task(model)
|
||||
if not hasattr(model, 'stride'):
|
||||
model.stride = torch.tensor([32.])
|
||||
if not hasattr(model, "stride"):
|
||||
model.stride = torch.tensor([32.0])
|
||||
|
||||
# Append
|
||||
ensemble.append(model.fuse().eval() if fuse and hasattr(model, 'fuse') else model.eval()) # model in eval mode
|
||||
ensemble.append(model.fuse().eval() if fuse and hasattr(model, "fuse") else model.eval()) # model in eval mode
|
||||
|
||||
# Module updates
|
||||
for m in ensemble.modules():
|
||||
t = type(m)
|
||||
if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Segment, Pose, OBB):
|
||||
m.inplace = inplace
|
||||
elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'):
|
||||
elif t is nn.Upsample and not hasattr(m, "recompute_scale_factor"):
|
||||
m.recompute_scale_factor = None # torch 1.11.0 compatibility
|
||||
|
||||
# Return model
|
||||
|
|
@ -638,35 +687,35 @@ def attempt_load_weights(weights, device=None, inplace=True, fuse=False):
|
|||
return ensemble[-1]
|
||||
|
||||
# Return ensemble
|
||||
LOGGER.info(f'Ensemble created with {weights}\n')
|
||||
for k in 'names', 'nc', 'yaml':
|
||||
LOGGER.info(f"Ensemble created with {weights}\n")
|
||||
for k in "names", "nc", "yaml":
|
||||
setattr(ensemble, k, getattr(ensemble[0], k))
|
||||
ensemble.stride = ensemble[torch.argmax(torch.tensor([m.stride.max() for m in ensemble])).int()].stride
|
||||
assert all(ensemble[0].nc == m.nc for m in ensemble), f'Models differ in class counts {[m.nc for m in ensemble]}'
|
||||
assert all(ensemble[0].nc == m.nc for m in ensemble), f"Models differ in class counts {[m.nc for m in ensemble]}"
|
||||
return ensemble
|
||||
|
||||
|
||||
def attempt_load_one_weight(weight, device=None, inplace=True, fuse=False):
|
||||
"""Loads a single model weights."""
|
||||
ckpt, weight = torch_safe_load(weight) # load ckpt
|
||||
args = {**DEFAULT_CFG_DICT, **(ckpt.get('train_args', {}))} # combine model and default args, preferring model args
|
||||
model = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model
|
||||
args = {**DEFAULT_CFG_DICT, **(ckpt.get("train_args", {}))} # combine model and default args, preferring model args
|
||||
model = (ckpt.get("ema") or ckpt["model"]).to(device).float() # FP32 model
|
||||
|
||||
# Model compatibility updates
|
||||
model.args = {k: v for k, v in args.items() if k in DEFAULT_CFG_KEYS} # attach args to model
|
||||
model.pt_path = weight # attach *.pt file path to model
|
||||
model.task = guess_model_task(model)
|
||||
if not hasattr(model, 'stride'):
|
||||
model.stride = torch.tensor([32.])
|
||||
if not hasattr(model, "stride"):
|
||||
model.stride = torch.tensor([32.0])
|
||||
|
||||
model = model.fuse().eval() if fuse and hasattr(model, 'fuse') else model.eval() # model in eval mode
|
||||
model = model.fuse().eval() if fuse and hasattr(model, "fuse") else model.eval() # model in eval mode
|
||||
|
||||
# Module updates
|
||||
for m in model.modules():
|
||||
t = type(m)
|
||||
if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Segment, Pose, OBB):
|
||||
m.inplace = inplace
|
||||
elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'):
|
||||
elif t is nn.Upsample and not hasattr(m, "recompute_scale_factor"):
|
||||
m.recompute_scale_factor = None # torch 1.11.0 compatibility
|
||||
|
||||
# Return model and ckpt
|
||||
|
|
@ -678,11 +727,11 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
|
|||
import ast
|
||||
|
||||
# Args
|
||||
max_channels = float('inf')
|
||||
nc, act, scales = (d.get(x) for x in ('nc', 'activation', 'scales'))
|
||||
depth, width, kpt_shape = (d.get(x, 1.0) for x in ('depth_multiple', 'width_multiple', 'kpt_shape'))
|
||||
max_channels = float("inf")
|
||||
nc, act, scales = (d.get(x) for x in ("nc", "activation", "scales"))
|
||||
depth, width, kpt_shape = (d.get(x, 1.0) for x in ("depth_multiple", "width_multiple", "kpt_shape"))
|
||||
if scales:
|
||||
scale = d.get('scale')
|
||||
scale = d.get("scale")
|
||||
if not scale:
|
||||
scale = tuple(scales.keys())[0]
|
||||
LOGGER.warning(f"WARNING ⚠️ no model scale passed. Assuming scale='{scale}'.")
|
||||
|
|
@ -697,16 +746,37 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
|
|||
LOGGER.info(f"\n{'':>3}{'from':>20}{'n':>3}{'params':>10} {'module':<45}{'arguments':<30}")
|
||||
ch = [ch]
|
||||
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
|
||||
for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
|
||||
m = getattr(torch.nn, m[3:]) if 'nn.' in m else globals()[m] # get module
|
||||
for i, (f, n, m, args) in enumerate(d["backbone"] + d["head"]): # from, number, module, args
|
||||
m = getattr(torch.nn, m[3:]) if "nn." in m else globals()[m] # get module
|
||||
for j, a in enumerate(args):
|
||||
if isinstance(a, str):
|
||||
with contextlib.suppress(ValueError):
|
||||
args[j] = locals()[a] if a in locals() else ast.literal_eval(a)
|
||||
|
||||
n = n_ = max(round(n * depth), 1) if n > 1 else n # depth gain
|
||||
if m in (Classify, Conv, ConvTranspose, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, Focus,
|
||||
BottleneckCSP, C1, C2, C2f, C3, C3TR, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x, RepC3):
|
||||
if m in (
|
||||
Classify,
|
||||
Conv,
|
||||
ConvTranspose,
|
||||
GhostConv,
|
||||
Bottleneck,
|
||||
GhostBottleneck,
|
||||
SPP,
|
||||
SPPF,
|
||||
DWConv,
|
||||
Focus,
|
||||
BottleneckCSP,
|
||||
C1,
|
||||
C2,
|
||||
C2f,
|
||||
C3,
|
||||
C3TR,
|
||||
C3Ghost,
|
||||
nn.ConvTranspose2d,
|
||||
DWConvTranspose2d,
|
||||
C3x,
|
||||
RepC3,
|
||||
):
|
||||
c1, c2 = ch[f], args[0]
|
||||
if c2 != nc: # if c2 not equal to number of classes (i.e. for Classify() output)
|
||||
c2 = make_divisible(min(c2, max_channels) * width, 8)
|
||||
|
|
@ -739,11 +809,11 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
|
|||
c2 = ch[f]
|
||||
|
||||
m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
|
||||
t = str(m)[8:-2].replace('__main__.', '') # module type
|
||||
t = str(m)[8:-2].replace("__main__.", "") # module type
|
||||
m.np = sum(x.numel() for x in m_.parameters()) # number params
|
||||
m_.i, m_.f, m_.type = i, f, t # attach index, 'from' index, type
|
||||
if verbose:
|
||||
LOGGER.info(f'{i:>3}{str(f):>20}{n_:>3}{m.np:10.0f} {t:<45}{str(args):<30}') # print
|
||||
LOGGER.info(f"{i:>3}{str(f):>20}{n_:>3}{m.np:10.0f} {t:<45}{str(args):<30}") # print
|
||||
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
|
||||
layers.append(m_)
|
||||
if i == 0:
|
||||
|
|
@ -757,16 +827,16 @@ def yaml_model_load(path):
|
|||
import re
|
||||
|
||||
path = Path(path)
|
||||
if path.stem in (f'yolov{d}{x}6' for x in 'nsmlx' for d in (5, 8)):
|
||||
new_stem = re.sub(r'(\d+)([nslmx])6(.+)?$', r'\1\2-p6\3', path.stem)
|
||||
LOGGER.warning(f'WARNING ⚠️ Ultralytics YOLO P6 models now use -p6 suffix. Renaming {path.stem} to {new_stem}.')
|
||||
if path.stem in (f"yolov{d}{x}6" for x in "nsmlx" for d in (5, 8)):
|
||||
new_stem = re.sub(r"(\d+)([nslmx])6(.+)?$", r"\1\2-p6\3", path.stem)
|
||||
LOGGER.warning(f"WARNING ⚠️ Ultralytics YOLO P6 models now use -p6 suffix. Renaming {path.stem} to {new_stem}.")
|
||||
path = path.with_name(new_stem + path.suffix)
|
||||
|
||||
unified_path = re.sub(r'(\d+)([nslmx])(.+)?$', r'\1\3', str(path)) # i.e. yolov8x.yaml -> yolov8.yaml
|
||||
unified_path = re.sub(r"(\d+)([nslmx])(.+)?$", r"\1\3", str(path)) # i.e. yolov8x.yaml -> yolov8.yaml
|
||||
yaml_file = check_yaml(unified_path, hard=False) or check_yaml(path)
|
||||
d = yaml_load(yaml_file) # model dict
|
||||
d['scale'] = guess_model_scale(path)
|
||||
d['yaml_file'] = str(path)
|
||||
d["scale"] = guess_model_scale(path)
|
||||
d["yaml_file"] = str(path)
|
||||
return d
|
||||
|
||||
|
||||
|
|
@ -784,8 +854,9 @@ def guess_model_scale(model_path):
|
|||
"""
|
||||
with contextlib.suppress(AttributeError):
|
||||
import re
|
||||
return re.search(r'yolov\d+([nslmx])', Path(model_path).stem).group(1) # n, s, m, l, or x
|
||||
return ''
|
||||
|
||||
return re.search(r"yolov\d+([nslmx])", Path(model_path).stem).group(1) # n, s, m, l, or x
|
||||
return ""
|
||||
|
||||
|
||||
def guess_model_task(model):
|
||||
|
|
@ -804,17 +875,17 @@ def guess_model_task(model):
|
|||
|
||||
def cfg2task(cfg):
|
||||
"""Guess from YAML dictionary."""
|
||||
m = cfg['head'][-1][-2].lower() # output module name
|
||||
if m in ('classify', 'classifier', 'cls', 'fc'):
|
||||
return 'classify'
|
||||
if m == 'detect':
|
||||
return 'detect'
|
||||
if m == 'segment':
|
||||
return 'segment'
|
||||
if m == 'pose':
|
||||
return 'pose'
|
||||
if m == 'obb':
|
||||
return 'obb'
|
||||
m = cfg["head"][-1][-2].lower() # output module name
|
||||
if m in ("classify", "classifier", "cls", "fc"):
|
||||
return "classify"
|
||||
if m == "detect":
|
||||
return "detect"
|
||||
if m == "segment":
|
||||
return "segment"
|
||||
if m == "pose":
|
||||
return "pose"
|
||||
if m == "obb":
|
||||
return "obb"
|
||||
|
||||
# Guess from model cfg
|
||||
if isinstance(model, dict):
|
||||
|
|
@ -823,40 +894,42 @@ def guess_model_task(model):
|
|||
|
||||
# Guess from PyTorch model
|
||||
if isinstance(model, nn.Module): # PyTorch model
|
||||
for x in 'model.args', 'model.model.args', 'model.model.model.args':
|
||||
for x in "model.args", "model.model.args", "model.model.model.args":
|
||||
with contextlib.suppress(Exception):
|
||||
return eval(x)['task']
|
||||
for x in 'model.yaml', 'model.model.yaml', 'model.model.model.yaml':
|
||||
return eval(x)["task"]
|
||||
for x in "model.yaml", "model.model.yaml", "model.model.model.yaml":
|
||||
with contextlib.suppress(Exception):
|
||||
return cfg2task(eval(x))
|
||||
|
||||
for m in model.modules():
|
||||
if isinstance(m, Detect):
|
||||
return 'detect'
|
||||
return "detect"
|
||||
elif isinstance(m, Segment):
|
||||
return 'segment'
|
||||
return "segment"
|
||||
elif isinstance(m, Classify):
|
||||
return 'classify'
|
||||
return "classify"
|
||||
elif isinstance(m, Pose):
|
||||
return 'pose'
|
||||
return "pose"
|
||||
elif isinstance(m, OBB):
|
||||
return 'obb'
|
||||
return "obb"
|
||||
|
||||
# Guess from model filename
|
||||
if isinstance(model, (str, Path)):
|
||||
model = Path(model)
|
||||
if '-seg' in model.stem or 'segment' in model.parts:
|
||||
return 'segment'
|
||||
elif '-cls' in model.stem or 'classify' in model.parts:
|
||||
return 'classify'
|
||||
elif '-pose' in model.stem or 'pose' in model.parts:
|
||||
return 'pose'
|
||||
elif '-obb' in model.stem or 'obb' in model.parts:
|
||||
return 'obb'
|
||||
elif 'detect' in model.parts:
|
||||
return 'detect'
|
||||
if "-seg" in model.stem or "segment" in model.parts:
|
||||
return "segment"
|
||||
elif "-cls" in model.stem or "classify" in model.parts:
|
||||
return "classify"
|
||||
elif "-pose" in model.stem or "pose" in model.parts:
|
||||
return "pose"
|
||||
elif "-obb" in model.stem or "obb" in model.parts:
|
||||
return "obb"
|
||||
elif "detect" in model.parts:
|
||||
return "detect"
|
||||
|
||||
# Unable to determine task from model
|
||||
LOGGER.warning("WARNING ⚠️ Unable to automatically guess model task, assuming 'task=detect'. "
|
||||
"Explicitly define task for your model, i.e. 'task=detect', 'segment', 'classify','pose' or 'obb'.")
|
||||
return 'detect' # assume detect
|
||||
LOGGER.warning(
|
||||
"WARNING ⚠️ Unable to automatically guess model task, assuming 'task=detect'. "
|
||||
"Explicitly define task for your model, i.e. 'task=detect', 'segment', 'classify','pose' or 'obb'."
|
||||
)
|
||||
return "detect" # assume detect
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue