From 5af8a5c0fb6812cf8e9ac26aa79d488eb6f1e5a9 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Tue, 1 Oct 2024 11:53:11 +0200 Subject: [PATCH] `ultralytics 8.3.2` fix AMP checks with `imgsz=256` (#16583) --- tests/test_cuda.py | 8 ++++++++ ultralytics/__init__.py | 2 +- ultralytics/utils/__init__.py | 1 + ultralytics/utils/checks.py | 5 +++-- 4 files changed, 13 insertions(+), 3 deletions(-) diff --git a/tests/test_cuda.py b/tests/test_cuda.py index 0b3429d0..3b08edc6 100644 --- a/tests/test_cuda.py +++ b/tests/test_cuda.py @@ -10,6 +10,7 @@ from tests import CUDA_DEVICE_COUNT, CUDA_IS_AVAILABLE, MODEL, SOURCE from ultralytics import YOLO from ultralytics.cfg import TASK2DATA, TASK2MODEL, TASKS from ultralytics.utils import ASSETS, WEIGHTS_DIR +from ultralytics.utils.checks import check_amp def test_checks(): @@ -18,6 +19,13 @@ def test_checks(): assert torch.cuda.device_count() == CUDA_DEVICE_COUNT +@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason="CUDA is not available") +def test_amp(): + """Test AMP training checks.""" + model = YOLO("yolo11n.pt").model.cuda() + assert check_amp(model) + + @pytest.mark.slow @pytest.mark.skipif(True, reason="CUDA export tests disabled pending additional Ultralytics GPU server availability") @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason="CUDA is not available") diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py index daff29f8..f3d639ad 100644 --- a/ultralytics/__init__.py +++ b/ultralytics/__init__.py @@ -1,6 +1,6 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -__version__ = "8.3.1" +__version__ = "8.3.2" import os diff --git a/ultralytics/utils/__init__.py b/ultralytics/utils/__init__.py index 02610b88..e122d4b5 100644 --- a/ultralytics/utils/__init__.py +++ b/ultralytics/utils/__init__.py @@ -111,6 +111,7 @@ torch.set_printoptions(linewidth=320, precision=4, profile="default") np.set_printoptions(linewidth=320, formatter={"float_kind": "{:11.5g}".format}) # format short g, %precision=5 cv2.setNumThreads(0) # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader) os.environ["NUMEXPR_MAX_THREADS"] = str(NUM_THREADS) # NumExpr max threads +os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" # for deterministic training to avoid CUDA warning os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" # suppress verbose TF compiler warnings in Colab os.environ["TORCH_CPP_LOG_LEVEL"] = "ERROR" # suppress "NNPACK.cpp could not initialize NNPACK" warnings os.environ["KINETO_LOG_LEVEL"] = "5" # suppress verbose PyTorch profiler output when computing FLOPs diff --git a/ultralytics/utils/checks.py b/ultralytics/utils/checks.py index 383c8562..85eccf67 100644 --- a/ultralytics/utils/checks.py +++ b/ultralytics/utils/checks.py @@ -657,9 +657,10 @@ def check_amp(model): def amp_allclose(m, im): """All close FP32 vs AMP results.""" batch = [im] * 8 - a = m(batch, imgsz=128, device=device, verbose=False)[0].boxes.data # FP32 inference + imgsz = max(256, int(model.stride.max() * 4)) # max stride P5-32 and P6-64 + a = m(batch, imgsz=imgsz, device=device, verbose=False)[0].boxes.data # FP32 inference with autocast(enabled=True): - b = m(batch, imgsz=128, device=device, verbose=False)[0].boxes.data # AMP inference + b = m(batch, imgsz=imgsz, device=device, verbose=False)[0].boxes.data # AMP inference del m return a.shape == b.shape and torch.allclose(a, b.float(), atol=0.5) # close to 0.5 absolute tolerance