diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f054257f..b351e450 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -99,7 +99,7 @@ jobs: fail-fast: false matrix: # Temporarily disable windows-latest due to https://github.com/ultralytics/ultralytics/actions/runs/13020330819/job/36319338854?pr=18921 - os: [ubuntu-latest, macos-15, ubuntu-24.04-arm] + os: [ubuntu-latest, macos-15] python-version: ["3.11"] model: [yolo11n] steps: diff --git a/docs/en/guides/nvidia-jetson.md b/docs/en/guides/nvidia-jetson.md index 38301b3c..10ec44ed 100644 --- a/docs/en/guides/nvidia-jetson.md +++ b/docs/en/guides/nvidia-jetson.md @@ -289,10 +289,13 @@ The YOLO11n model in PyTorch format is converted to TensorRT to run inference wi The following Jetson devices are equipped with DLA hardware: -- Jetson Orin NX 16GB -- Jetson AGX Orin Series -- Jetson AGX Xavier Series -- Jetson Xavier NX Series +| Jetson Device | DLA Cores | DLA Max Frequency | +| ------------------------ | --------- | ----------------- | +| Jetson AGX Orin Series | 2 | 1.6 GHz | +| Jetson Orin NX 16GB | 2 | 614 MHz | +| Jetson Orin NX 8GB | 1 | 614 MHz | +| Jetson AGX Xavier Series | 2 | 1.4 GHz | +| Jetson Xavier NX Series | 2 | 1.1 GHz | !!! example @@ -318,6 +321,7 @@ The following Jetson devices are equipped with DLA hardware: ```bash # Export a YOLO11n PyTorch model to TensorRT format with DLA enabled (only works with FP16 or INT8) + # Once DLA core number is specified at export, it will use the same core at inference yolo export model=yolo11n.pt format=engine device="dla:0" half=True # dla:0 or dla:1 corresponds to the DLA cores # Run inference with the exported model on the DLA diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py index 01631ca8..c8862c3d 100644 --- a/ultralytics/__init__.py +++ b/ultralytics/__init__.py @@ -1,6 +1,6 @@ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license -__version__ = "8.3.71" +__version__ = "8.3.72" import os diff --git a/ultralytics/engine/exporter.py b/ultralytics/engine/exporter.py index 2cf3d26c..65dba0b9 100644 --- a/ultralytics/engine/exporter.py +++ b/ultralytics/engine/exporter.py @@ -386,6 +386,8 @@ class Exporter: "names": model.names, "args": {k: v for k, v in self.args if k in fmt_keys}, } # model metadata + if dla is not None: + self.metadata["dla"] = dla # make sure `AutoBackend` uses correct dla device if it has one if model.task == "pose": self.metadata["kpt_shape"] = model.model[-1].kpt_shape diff --git a/ultralytics/nn/autobackend.py b/ultralytics/nn/autobackend.py index ae4ed065..e563e062 100644 --- a/ultralytics/nn/autobackend.py +++ b/ultralytics/nn/autobackend.py @@ -292,13 +292,10 @@ class AutoBackend(nn.Module): metadata = json.loads(f.read(meta_len).decode("utf-8")) # read metadata except UnicodeDecodeError: f.seek(0) # engine file may lack embedded Ultralytics metadata + dla = metadata.get("dla", None) + if dla is not None: + runtime.DLA_core = int(dla) model = runtime.deserialize_cuda_engine(f.read()) # read engine - if "dla" in str(device.type): - dla_core = int(device.type.split(":")[1]) - assert dla_core in {0, 1}, ( - "Expected device type for inference in DLA is 'dla:0' or 'dla:1', but received '{device.type}'" - ) - runtime.DLA_core = dla_core # Model context try: