Update triton-inference-server.md (#17252)
Co-authored-by: UltralyticsAssistant <web@ultralytics.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
This commit is contained in:
parent
e798dbf52e
commit
b8c90baffe
1 changed files with 26 additions and 17 deletions
|
|
@ -83,25 +83,34 @@ The Triton Model Repository is a storage location where Triton can access and lo
|
||||||
|
|
||||||
# (Optional) Enable TensorRT for GPU inference
|
# (Optional) Enable TensorRT for GPU inference
|
||||||
# First run will be slow due to TensorRT engine conversion
|
# First run will be slow due to TensorRT engine conversion
|
||||||
import json
|
data = """
|
||||||
|
optimization {
|
||||||
data = {
|
execution_accelerators {
|
||||||
"optimization": {
|
gpu_execution_accelerator {
|
||||||
"execution_accelerators": {
|
name: "tensorrt"
|
||||||
"gpu_execution_accelerator": [
|
parameters {
|
||||||
{
|
key: "precision_mode"
|
||||||
"name": "tensorrt",
|
value: "FP16"
|
||||||
"parameters": {"key": "precision_mode", "value": "FP16"},
|
}
|
||||||
"parameters": {"key": "max_workspace_size_bytes", "value": "3221225472"},
|
parameters {
|
||||||
"parameters": {"key": "trt_engine_cache_enable", "value": "1"},
|
key: "max_workspace_size_bytes"
|
||||||
}
|
value: "3221225472"
|
||||||
]
|
}
|
||||||
}
|
parameters {
|
||||||
|
key: "trt_engine_cache_enable"
|
||||||
|
value: "1"
|
||||||
|
}
|
||||||
|
parameters {
|
||||||
|
key: "trt_engine_cache_path"
|
||||||
|
value: "/models/yolo/1"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
with open(triton_model_path / "config.pbtxt", "w") as f:
|
with open(triton_model_path / "config.pbtxt", "w") as f:
|
||||||
json.dump(data, f, indent=4)
|
f.write(data)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Running Triton Inference Server
|
## Running Triton Inference Server
|
||||||
|
|
@ -124,7 +133,7 @@ subprocess.call(f"docker pull {tag}", shell=True)
|
||||||
# Run the Triton server and capture the container ID
|
# Run the Triton server and capture the container ID
|
||||||
container_id = (
|
container_id = (
|
||||||
subprocess.check_output(
|
subprocess.check_output(
|
||||||
f"docker run -d --rm -v {triton_repo_path}:/models -p 8000:8000 {tag} tritonserver --model-repository=/models",
|
f"docker run -d --rm --gpus 0 -v {triton_repo_path}:/models -p 8000:8000 {tag} tritonserver --model-repository=/models",
|
||||||
shell=True,
|
shell=True,
|
||||||
)
|
)
|
||||||
.decode("utf-8")
|
.decode("utf-8")
|
||||||
|
|
@ -215,7 +224,7 @@ Setting up [Ultralytics YOLO11](https://docs.ultralytics.com/models/yolov8/) wit
|
||||||
|
|
||||||
container_id = (
|
container_id = (
|
||||||
subprocess.check_output(
|
subprocess.check_output(
|
||||||
f"docker run -d --rm -v {triton_repo_path}/models -p 8000:8000 {tag} tritonserver --model-repository=/models",
|
f"docker run -d --rm --gpus 0 -v {triton_repo_path}/models -p 8000:8000 {tag} tritonserver --model-repository=/models",
|
||||||
shell=True,
|
shell=True,
|
||||||
)
|
)
|
||||||
.decode("utf-8")
|
.decode("utf-8")
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue