ultralytics 8.3.37 TensorRT auto-workspace size (#17748)

Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
This commit is contained in:
Burhan 2024-11-25 05:33:11 -05:00 committed by GitHub
parent 6a762564c8
commit ee6fde0beb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 24 additions and 24 deletions

View file

@ -1,6 +1,6 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
__version__ = "8.3.36"
__version__ = "8.3.37"
import os

View file

@ -83,7 +83,7 @@ int8: False # (bool) CoreML/TF INT8 quantization
dynamic: False # (bool) ONNX/TF/TensorRT: dynamic axes
simplify: True # (bool) ONNX: simplify model using `onnxslim`
opset: # (int, optional) ONNX: opset version
workspace: 4 # (int) TensorRT: workspace size (GB)
workspace: None # (float, optional) TensorRT: workspace size (GiB), `None` will let TensorRT auto-allocate memory
nms: False # (bool) CoreML: add NMS
# Hyperparameters ------------------------------------------------------------------------------------------------------

View file

@ -781,10 +781,10 @@ class Exporter:
# Engine builder
builder = trt.Builder(logger)
config = builder.create_builder_config()
workspace = int(self.args.workspace * (1 << 30))
if is_trt10:
workspace = int(self.args.workspace * (1 << 30)) if self.args.workspace is not None else 0
if is_trt10 and workspace > 0:
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace)
else: # TensorRT versions 7, 8
elif workspace > 0 and not is_trt10: # TensorRT versions 7, 8
config.max_workspace_size = workspace
flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
network = builder.create_network(flag)
@ -823,7 +823,7 @@ class Exporter:
LOGGER.warning(f"{prefix} WARNING ⚠️ 'dynamic=True' model requires max batch size, i.e. 'batch=16'")
profile = builder.create_optimization_profile()
min_shape = (1, shape[1], 32, 32) # minimum input shape
max_shape = (*shape[:2], *(int(max(1, self.args.workspace) * d) for d in shape[2:])) # max input shape
max_shape = (*shape[:2], *(int(max(1, workspace) * d) for d in shape[2:])) # max input shape
for inp in inputs:
profile.set_shape(inp.name, min=min_shape, opt=shape, max=max_shape)
config.add_optimization_profile(profile)