diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f054257f..b351e450 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -99,7 +99,7 @@ jobs:
       fail-fast: false
       matrix:
         # Temporarily disable windows-latest due to https://github.com/ultralytics/ultralytics/actions/runs/13020330819/job/36319338854?pr=18921
-        os: [ubuntu-latest, macos-15, ubuntu-24.04-arm]
+        os: [ubuntu-latest, macos-15]
         python-version: ["3.11"]
         model: [yolo11n]
     steps:
diff --git a/docs/en/guides/nvidia-jetson.md b/docs/en/guides/nvidia-jetson.md
index 38301b3c..10ec44ed 100644
--- a/docs/en/guides/nvidia-jetson.md
+++ b/docs/en/guides/nvidia-jetson.md
@@ -289,10 +289,13 @@ The YOLO11n model in PyTorch format is converted to TensorRT to run inference wi
 
 The following Jetson devices are equipped with DLA hardware:
 
-- Jetson Orin NX 16GB
-- Jetson AGX Orin Series
-- Jetson AGX Xavier Series
-- Jetson Xavier NX Series
+| Jetson Device            | DLA Cores | DLA Max Frequency |
+| ------------------------ | --------- | ----------------- |
+| Jetson AGX Orin Series   | 2         | 1.6 GHz           |
+| Jetson Orin NX 16GB      | 2         | 614 MHz           |
+| Jetson Orin NX 8GB       | 1         | 614 MHz           |
+| Jetson AGX Xavier Series | 2         | 1.4 GHz           |
+| Jetson Xavier NX Series  | 2         | 1.1 GHz           |
 
 !!! example
 
@@ -318,6 +321,7 @@ The following Jetson devices are equipped with DLA hardware:
 
         ```bash
         # Export a YOLO11n PyTorch model to TensorRT format with DLA enabled (only works with FP16 or INT8)
+        # Once DLA core number is specified at export, it will use the same core at inference
         yolo export model=yolo11n.pt format=engine device="dla:0" half=True  # dla:0 or dla:1 corresponds to the DLA cores
 
         # Run inference with the exported model on the DLA
diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py
index 01631ca8..c8862c3d 100644
--- a/ultralytics/__init__.py
+++ b/ultralytics/__init__.py
@@ -1,6 +1,6 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 
-__version__ = "8.3.71"
+__version__ = "8.3.72"
 
 import os
 
diff --git a/ultralytics/engine/exporter.py b/ultralytics/engine/exporter.py
index 2cf3d26c..65dba0b9 100644
--- a/ultralytics/engine/exporter.py
+++ b/ultralytics/engine/exporter.py
@@ -386,6 +386,8 @@ class Exporter:
             "names": model.names,
             "args": {k: v for k, v in self.args if k in fmt_keys},
         }  # model metadata
+        if dla is not None:
+            self.metadata["dla"] = dla  # make sure `AutoBackend` uses correct dla device if it has one
         if model.task == "pose":
             self.metadata["kpt_shape"] = model.model[-1].kpt_shape
 
diff --git a/ultralytics/nn/autobackend.py b/ultralytics/nn/autobackend.py
index ae4ed065..e563e062 100644
--- a/ultralytics/nn/autobackend.py
+++ b/ultralytics/nn/autobackend.py
@@ -292,13 +292,10 @@ class AutoBackend(nn.Module):
                     metadata = json.loads(f.read(meta_len).decode("utf-8"))  # read metadata
                 except UnicodeDecodeError:
                     f.seek(0)  # engine file may lack embedded Ultralytics metadata
+                dla = metadata.get("dla", None)
+                if dla is not None:
+                    runtime.DLA_core = int(dla)
                 model = runtime.deserialize_cuda_engine(f.read())  # read engine
-                if "dla" in str(device.type):
-                    dla_core = int(device.type.split(":")[1])
-                    assert dla_core in {0, 1}, (
-                        "Expected device type for inference in DLA is 'dla:0' or 'dla:1', but received '{device.type}'"
-                    )
-                    runtime.DLA_core = dla_core
 
             # Model context
             try: