Faster ONNX inference with bindings (#17184)
Co-authored-by: UltralyticsAssistant <web@ultralytics.com> Co-authored-by: Burhan <62214284+Burhan-Q@users.noreply.github.com> Co-authored-by: Ultralytics Assistant <135830346+UltralyticsAssistant@users.noreply.github.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
This commit is contained in:
parent
c4dae56e1a
commit
ca5e9daed1
1 changed files with 39 additions and 3 deletions
|
|
@ -189,10 +189,32 @@ class AutoBackend(nn.Module):
|
||||||
check_requirements("numpy==1.23.5")
|
check_requirements("numpy==1.23.5")
|
||||||
import onnxruntime
|
import onnxruntime
|
||||||
|
|
||||||
providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] if cuda else ["CPUExecutionProvider"]
|
providers = onnxruntime.get_available_providers()
|
||||||
|
if not cuda and "CUDAExecutionProvider" in providers:
|
||||||
|
providers.remove("CUDAExecutionProvider")
|
||||||
|
elif cuda and "CUDAExecutionProvider" not in providers:
|
||||||
|
LOGGER.warning("WARNING ⚠️ Failed to start ONNX Runtime session with CUDA. Falling back to CPU...")
|
||||||
|
device = torch.device("cpu")
|
||||||
|
cuda = False
|
||||||
|
LOGGER.info(f"Preferring ONNX Runtime {providers[0]}")
|
||||||
session = onnxruntime.InferenceSession(w, providers=providers)
|
session = onnxruntime.InferenceSession(w, providers=providers)
|
||||||
output_names = [x.name for x in session.get_outputs()]
|
output_names = [x.name for x in session.get_outputs()]
|
||||||
metadata = session.get_modelmeta().custom_metadata_map
|
metadata = session.get_modelmeta().custom_metadata_map
|
||||||
|
dynamic = isinstance(session.get_outputs()[0].shape[0], str)
|
||||||
|
if not dynamic:
|
||||||
|
io = session.io_binding()
|
||||||
|
bindings = []
|
||||||
|
for output in session.get_outputs():
|
||||||
|
y_tensor = torch.empty(output.shape, dtype=torch.float16 if fp16 else torch.float32).to(device)
|
||||||
|
io.bind_output(
|
||||||
|
name=output.name,
|
||||||
|
device_type=device.type,
|
||||||
|
device_id=device.index if cuda else 0,
|
||||||
|
element_type=np.float16 if fp16 else np.float32,
|
||||||
|
shape=tuple(y_tensor.shape),
|
||||||
|
buffer_ptr=y_tensor.data_ptr(),
|
||||||
|
)
|
||||||
|
bindings.append(y_tensor)
|
||||||
|
|
||||||
# OpenVINO
|
# OpenVINO
|
||||||
elif xml:
|
elif xml:
|
||||||
|
|
@ -477,8 +499,22 @@ class AutoBackend(nn.Module):
|
||||||
|
|
||||||
# ONNX Runtime
|
# ONNX Runtime
|
||||||
elif self.onnx:
|
elif self.onnx:
|
||||||
im = im.cpu().numpy() # torch to numpy
|
if self.dynamic:
|
||||||
y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
|
im = im.cpu().numpy() # torch to numpy
|
||||||
|
y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
|
||||||
|
else:
|
||||||
|
if not self.cuda:
|
||||||
|
im = im.cpu()
|
||||||
|
self.io.bind_input(
|
||||||
|
name="images",
|
||||||
|
device_type=im.device.type,
|
||||||
|
device_id=im.device.index if im.device.type == "cuda" else 0,
|
||||||
|
element_type=np.float16 if self.fp16 else np.float32,
|
||||||
|
shape=tuple(im.shape),
|
||||||
|
buffer_ptr=im.data_ptr(),
|
||||||
|
)
|
||||||
|
self.session.run_with_iobinding(self.io)
|
||||||
|
y = self.bindings
|
||||||
|
|
||||||
# OpenVINO
|
# OpenVINO
|
||||||
elif self.xml:
|
elif self.xml:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue