ultralytics 8.0.195 NVIDIA Triton Inference Server support (#5257)

Co-authored-by: TheConstant3 <46416203+TheConstant3@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
Glenn Jocher 2023-10-07 19:26:35 +02:00 committed by GitHub
parent 40e3923cfc
commit c7aa83da31
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
21 changed files with 349 additions and 98 deletions

View file

@ -15,7 +15,7 @@ from ultralytics import RTDETR, YOLO
from ultralytics.cfg import TASK2DATA
from ultralytics.data.build import load_inference_source
from ultralytics.utils import (ASSETS, DEFAULT_CFG, DEFAULT_CFG_PATH, LINUX, MACOS, ONLINE, ROOT, WEIGHTS_DIR, WINDOWS,
is_dir_writeable)
checks, is_dir_writeable)
from ultralytics.utils.downloads import download
from ultralytics.utils.torch_utils import TORCH_1_9
@ -343,17 +343,14 @@ def test_utils_init():
def test_utils_checks():
from ultralytics.utils.checks import (check_imgsz, check_imshow, check_requirements, check_version,
check_yolov5u_filename, git_describe, print_args)
check_yolov5u_filename('yolov5n.pt')
# check_imshow(warn=True)
git_describe(ROOT)
check_requirements() # check requirements.txt
check_imgsz([600, 600], max_dim=1)
check_imshow()
check_version('ultralytics', '8.0.0')
print_args()
checks.check_yolov5u_filename('yolov5n.pt')
checks.git_describe(ROOT)
checks.check_requirements() # check requirements.txt
checks.check_imgsz([600, 600], max_dim=1)
checks.check_imshow()
checks.check_version('ultralytics', '8.0.0')
checks.print_args()
# checks.check_imshow(warn=True)
def test_utils_benchmarks():
@ -451,3 +448,53 @@ def test_hub():
export_fmts_hub()
logout()
smart_request('GET', 'http://github.com', progress=True)
@pytest.mark.slow
@pytest.mark.skipif(not ONLINE, reason='environment is offline')
def test_triton():
checks.check_requirements('tritonclient[all]')
import subprocess
import time
from tritonclient.http import InferenceServerClient # noqa
# Create variables
model_name = 'yolo'
triton_repo_path = TMP / 'triton_repo'
triton_model_path = triton_repo_path / model_name
# Export model to ONNX
f = YOLO(MODEL).export(format='onnx', dynamic=True)
# Prepare Triton repo
(triton_model_path / '1').mkdir(parents=True, exist_ok=True)
Path(f).rename(triton_model_path / '1' / 'model.onnx')
(triton_model_path / 'config.pdtxt').touch()
# Define image https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver
tag = 'nvcr.io/nvidia/tritonserver:23.09-py3' # 6.4 GB
# Pull the image
subprocess.call(f'docker pull {tag}', shell=True)
# Run the Triton server and capture the container ID
container_id = subprocess.check_output(
f'docker run -d --rm -v {triton_repo_path}:/models -p 8000:8000 {tag} tritonserver --model-repository=/models',
shell=True).decode('utf-8').strip()
# Wait for the Triton server to start
triton_client = InferenceServerClient(url='localhost:8000', verbose=False, ssl=False)
# Wait until model is ready
for _ in range(10):
with contextlib.suppress(Exception):
assert triton_client.is_model_ready(model_name)
break
time.sleep(1)
# Check Triton inference
YOLO(f'http://localhost:8000/{model_name}', 'detect')(SOURCE) # exported model inference
# Kill and remove the container at the end of the test
subprocess.call(f'docker kill {container_id}', shell=True)