ultralytics 8.0.195 NVIDIA Triton Inference Server support (#5257)

Co-authored-by: TheConstant3 <46416203+TheConstant3@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2023-10-07 19:26:35 +02:00 · 2023-10-07 19:26:35 +02:00 · c7aa83da31
commit c7aa83da31
parent 40e3923cfc
21 changed files with 349 additions and 98 deletions
--- a/tests/test_python.py
+++ b/tests/test_python.py
@ -15,7 +15,7 @@ from ultralytics import RTDETR, YOLO
 from ultralytics.cfg import TASK2DATA
 from ultralytics.data.build import load_inference_source
 from ultralytics.utils import (ASSETS, DEFAULT_CFG, DEFAULT_CFG_PATH, LINUX, MACOS, ONLINE, ROOT, WEIGHTS_DIR, WINDOWS,
-                               is_dir_writeable)
+                               checks, is_dir_writeable)
 from ultralytics.utils.downloads import download
 from ultralytics.utils.torch_utils import TORCH_1_9

@ -343,17 +343,14 @@ def test_utils_init():


 def test_utils_checks():
-    from ultralytics.utils.checks import (check_imgsz, check_imshow, check_requirements, check_version,
-                                          check_yolov5u_filename, git_describe, print_args)
-
-    check_yolov5u_filename('yolov5n.pt')
-    # check_imshow(warn=True)
-    git_describe(ROOT)
-    check_requirements()  # check requirements.txt
-    check_imgsz([600, 600], max_dim=1)
-    check_imshow()
-    check_version('ultralytics', '8.0.0')
-    print_args()
+    checks.check_yolov5u_filename('yolov5n.pt')
+    checks.git_describe(ROOT)
+    checks.check_requirements()  # check requirements.txt
+    checks.check_imgsz([600, 600], max_dim=1)
+    checks.check_imshow()
+    checks.check_version('ultralytics', '8.0.0')
+    checks.print_args()
+    # checks.check_imshow(warn=True)


 def test_utils_benchmarks():
@ -451,3 +448,53 @@ def test_hub():
    export_fmts_hub()
    logout()
    smart_request('GET', 'http://github.com', progress=True)
+
+
+@pytest.mark.slow
+@pytest.mark.skipif(not ONLINE, reason='environment is offline')
+def test_triton():
+    checks.check_requirements('tritonclient[all]')
+    import subprocess
+    import time
+
+    from tritonclient.http import InferenceServerClient  # noqa
+
+    # Create variables
+    model_name = 'yolo'
+    triton_repo_path = TMP / 'triton_repo'
+    triton_model_path = triton_repo_path / model_name
+
+    # Export model to ONNX
+    f = YOLO(MODEL).export(format='onnx', dynamic=True)
+
+    # Prepare Triton repo
+    (triton_model_path / '1').mkdir(parents=True, exist_ok=True)
+    Path(f).rename(triton_model_path / '1' / 'model.onnx')
+    (triton_model_path / 'config.pdtxt').touch()
+
+    # Define image https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver
+    tag = 'nvcr.io/nvidia/tritonserver:23.09-py3'  # 6.4 GB
+
+    # Pull the image
+    subprocess.call(f'docker pull {tag}', shell=True)
+
+    # Run the Triton server and capture the container ID
+    container_id = subprocess.check_output(
+        f'docker run -d --rm -v {triton_repo_path}:/models -p 8000:8000 {tag} tritonserver --model-repository=/models',
+        shell=True).decode('utf-8').strip()
+
+    # Wait for the Triton server to start
+    triton_client = InferenceServerClient(url='localhost:8000', verbose=False, ssl=False)
+
+    # Wait until model is ready
+    for _ in range(10):
+        with contextlib.suppress(Exception):
+            assert triton_client.is_model_ready(model_name)
+            break
+        time.sleep(1)
+
+    # Check Triton inference
+    YOLO(f'http://localhost:8000/{model_name}', 'detect')(SOURCE)  # exported model inference
+
+    # Kill and remove the container at the end of the test
+    subprocess.call(f'docker kill {container_id}', shell=True)