ultralytics 8.3.72 Fix NVIDIA Jetson DLA core support for DLA inference (#19078)
Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: UltralyticsAssistant <web@ultralytics.com> Co-authored-by: Lakshantha Dissanayake <lakshanthad@yahoo.com> Co-authored-by: Lakshantha Dissanayake <lakshantha@ultralytics.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
This commit is contained in:
parent
84a8b067c4
commit
c1860b8333
5 changed files with 15 additions and 12 deletions
2
.github/workflows/ci.yml
vendored
2
.github/workflows/ci.yml
vendored
|
|
@ -99,7 +99,7 @@ jobs:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
# Temporarily disable windows-latest due to https://github.com/ultralytics/ultralytics/actions/runs/13020330819/job/36319338854?pr=18921
|
# Temporarily disable windows-latest due to https://github.com/ultralytics/ultralytics/actions/runs/13020330819/job/36319338854?pr=18921
|
||||||
os: [ubuntu-latest, macos-15, ubuntu-24.04-arm]
|
os: [ubuntu-latest, macos-15]
|
||||||
python-version: ["3.11"]
|
python-version: ["3.11"]
|
||||||
model: [yolo11n]
|
model: [yolo11n]
|
||||||
steps:
|
steps:
|
||||||
|
|
|
||||||
|
|
@ -289,10 +289,13 @@ The YOLO11n model in PyTorch format is converted to TensorRT to run inference wi
|
||||||
|
|
||||||
The following Jetson devices are equipped with DLA hardware:
|
The following Jetson devices are equipped with DLA hardware:
|
||||||
|
|
||||||
- Jetson Orin NX 16GB
|
| Jetson Device | DLA Cores | DLA Max Frequency |
|
||||||
- Jetson AGX Orin Series
|
| ------------------------ | --------- | ----------------- |
|
||||||
- Jetson AGX Xavier Series
|
| Jetson AGX Orin Series | 2 | 1.6 GHz |
|
||||||
- Jetson Xavier NX Series
|
| Jetson Orin NX 16GB | 2 | 614 MHz |
|
||||||
|
| Jetson Orin NX 8GB | 1 | 614 MHz |
|
||||||
|
| Jetson AGX Xavier Series | 2 | 1.4 GHz |
|
||||||
|
| Jetson Xavier NX Series | 2 | 1.1 GHz |
|
||||||
|
|
||||||
!!! example
|
!!! example
|
||||||
|
|
||||||
|
|
@ -318,6 +321,7 @@ The following Jetson devices are equipped with DLA hardware:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Export a YOLO11n PyTorch model to TensorRT format with DLA enabled (only works with FP16 or INT8)
|
# Export a YOLO11n PyTorch model to TensorRT format with DLA enabled (only works with FP16 or INT8)
|
||||||
|
# Once DLA core number is specified at export, it will use the same core at inference
|
||||||
yolo export model=yolo11n.pt format=engine device="dla:0" half=True # dla:0 or dla:1 corresponds to the DLA cores
|
yolo export model=yolo11n.pt format=engine device="dla:0" half=True # dla:0 or dla:1 corresponds to the DLA cores
|
||||||
|
|
||||||
# Run inference with the exported model on the DLA
|
# Run inference with the exported model on the DLA
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
||||||
|
|
||||||
__version__ = "8.3.71"
|
__version__ = "8.3.72"
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -386,6 +386,8 @@ class Exporter:
|
||||||
"names": model.names,
|
"names": model.names,
|
||||||
"args": {k: v for k, v in self.args if k in fmt_keys},
|
"args": {k: v for k, v in self.args if k in fmt_keys},
|
||||||
} # model metadata
|
} # model metadata
|
||||||
|
if dla is not None:
|
||||||
|
self.metadata["dla"] = dla # make sure `AutoBackend` uses correct dla device if it has one
|
||||||
if model.task == "pose":
|
if model.task == "pose":
|
||||||
self.metadata["kpt_shape"] = model.model[-1].kpt_shape
|
self.metadata["kpt_shape"] = model.model[-1].kpt_shape
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -292,13 +292,10 @@ class AutoBackend(nn.Module):
|
||||||
metadata = json.loads(f.read(meta_len).decode("utf-8")) # read metadata
|
metadata = json.loads(f.read(meta_len).decode("utf-8")) # read metadata
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
f.seek(0) # engine file may lack embedded Ultralytics metadata
|
f.seek(0) # engine file may lack embedded Ultralytics metadata
|
||||||
|
dla = metadata.get("dla", None)
|
||||||
|
if dla is not None:
|
||||||
|
runtime.DLA_core = int(dla)
|
||||||
model = runtime.deserialize_cuda_engine(f.read()) # read engine
|
model = runtime.deserialize_cuda_engine(f.read()) # read engine
|
||||||
if "dla" in str(device.type):
|
|
||||||
dla_core = int(device.type.split(":")[1])
|
|
||||||
assert dla_core in {0, 1}, (
|
|
||||||
"Expected device type for inference in DLA is 'dla:0' or 'dla:1', but received '{device.type}'"
|
|
||||||
)
|
|
||||||
runtime.DLA_core = dla_core
|
|
||||||
|
|
||||||
# Model context
|
# Model context
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue