Update Triton Inference Server guide (#17059)
Co-authored-by: UltralyticsAssistant <web@ultralytics.com> Co-authored-by: Ultralytics Assistant <135830346+UltralyticsAssistant@users.noreply.github.com> Co-authored-by: Burhan <62214284+Burhan-Q@users.noreply.github.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
This commit is contained in:
parent
235f2d95af
commit
c4dae56e1a
1 changed files with 24 additions and 2 deletions
|
|
@ -80,6 +80,28 @@ The Triton Model Repository is a storage location where Triton can access and lo
|
|||
|
||||
# Create config file
|
||||
(triton_model_path / "config.pbtxt").touch()
|
||||
|
||||
# (Optional) Enable TensorRT for GPU inference
|
||||
# First run will be slow due to TensorRT engine conversion
|
||||
import json
|
||||
|
||||
data = {
|
||||
"optimization": {
|
||||
"execution_accelerators": {
|
||||
"gpu_execution_accelerator": [
|
||||
{
|
||||
"name": "tensorrt",
|
||||
"parameters": {"key": "precision_mode", "value": "FP16"},
|
||||
"parameters": {"key": "max_workspace_size_bytes", "value": "3221225472"},
|
||||
"parameters": {"key": "trt_engine_cache_enable", "value": "1"},
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
with open(triton_model_path / "config.pbtxt", "w") as f:
|
||||
json.dump(data, f, indent=4)
|
||||
```
|
||||
|
||||
## Running Triton Inference Server
|
||||
|
|
@ -94,7 +116,7 @@ import time
|
|||
from tritonclient.http import InferenceServerClient
|
||||
|
||||
# Define image https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver
|
||||
tag = "nvcr.io/nvidia/tritonserver:23.09-py3" # 6.4 GB
|
||||
tag = "nvcr.io/nvidia/tritonserver:24.09-py3" # 8.57 GB
|
||||
|
||||
# Pull the image
|
||||
subprocess.call(f"docker pull {tag}", shell=True)
|
||||
|
|
@ -187,7 +209,7 @@ Setting up [Ultralytics YOLO11](https://docs.ultralytics.com/models/yolov8/) wit
|
|||
from tritonclient.http import InferenceServerClient
|
||||
|
||||
# Define image https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver
|
||||
tag = "nvcr.io/nvidia/tritonserver:23.09-py3"
|
||||
tag = "nvcr.io/nvidia/tritonserver:24.09-py3"
|
||||
|
||||
subprocess.call(f"docker pull {tag}", shell=True)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue