Update triton-inference-server.md (#17252)

Co-authored-by: UltralyticsAssistant <web@ultralytics.com>
Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
This commit is contained in:
Mohammed Yasin 2024-10-30 19:38:28 +08:00 committed by GitHub
parent e798dbf52e
commit b8c90baffe
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -83,25 +83,34 @@ The Triton Model Repository is a storage location where Triton can access and lo
# (Optional) Enable TensorRT for GPU inference # (Optional) Enable TensorRT for GPU inference
# First run will be slow due to TensorRT engine conversion # First run will be slow due to TensorRT engine conversion
import json data = """
optimization {
data = { execution_accelerators {
"optimization": { gpu_execution_accelerator {
"execution_accelerators": { name: "tensorrt"
"gpu_execution_accelerator": [ parameters {
{ key: "precision_mode"
"name": "tensorrt", value: "FP16"
"parameters": {"key": "precision_mode", "value": "FP16"}, }
"parameters": {"key": "max_workspace_size_bytes", "value": "3221225472"}, parameters {
"parameters": {"key": "trt_engine_cache_enable", "value": "1"}, key: "max_workspace_size_bytes"
} value: "3221225472"
] }
} parameters {
key: "trt_engine_cache_enable"
value: "1"
}
parameters {
key: "trt_engine_cache_path"
value: "/models/yolo/1"
}
} }
}
} }
"""
with open(triton_model_path / "config.pbtxt", "w") as f: with open(triton_model_path / "config.pbtxt", "w") as f:
json.dump(data, f, indent=4) f.write(data)
``` ```
## Running Triton Inference Server ## Running Triton Inference Server
@ -124,7 +133,7 @@ subprocess.call(f"docker pull {tag}", shell=True)
# Run the Triton server and capture the container ID # Run the Triton server and capture the container ID
container_id = ( container_id = (
subprocess.check_output( subprocess.check_output(
f"docker run -d --rm -v {triton_repo_path}:/models -p 8000:8000 {tag} tritonserver --model-repository=/models", f"docker run -d --rm --gpus 0 -v {triton_repo_path}:/models -p 8000:8000 {tag} tritonserver --model-repository=/models",
shell=True, shell=True,
) )
.decode("utf-8") .decode("utf-8")
@ -215,7 +224,7 @@ Setting up [Ultralytics YOLO11](https://docs.ultralytics.com/models/yolov8/) wit
container_id = ( container_id = (
subprocess.check_output( subprocess.check_output(
f"docker run -d --rm -v {triton_repo_path}/models -p 8000:8000 {tag} tritonserver --model-repository=/models", f"docker run -d --rm --gpus 0 -v {triton_repo_path}/models -p 8000:8000 {tag} tritonserver --model-repository=/models",
shell=True, shell=True,
) )
.decode("utf-8") .decode("utf-8")