ultralytics 8.3.2 fix AMP checks with imgsz=256 (#16583)

2024-10-01 11:53:11 +02:00 · 2024-10-01 11:53:11 +02:00 · 5af8a5c0fb
commit 5af8a5c0fb
parent c327b0aae1
4 changed files with 13 additions and 3 deletions
--- a/tests/test_cuda.py
+++ b/tests/test_cuda.py
@ -10,6 +10,7 @@ from tests import CUDA_DEVICE_COUNT, CUDA_IS_AVAILABLE, MODEL, SOURCE
 from ultralytics import YOLO
 from ultralytics.cfg import TASK2DATA, TASK2MODEL, TASKS
 from ultralytics.utils import ASSETS, WEIGHTS_DIR
 from ultralytics.utils.checks import check_amp
 def test_checks():
@ -18,6 +19,13 @@ def test_checks():
    assert torch.cuda.device_count() == CUDA_DEVICE_COUNT
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason="CUDA is not available")
 def test_amp():
    """Test AMP training checks."""
    model = YOLO("yolo11n.pt").model.cuda()
    assert check_amp(model)
@pytest.mark.slow
@pytest.mark.skipif(True, reason="CUDA export tests disabled pending additional Ultralytics GPU server availability")
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason="CUDA is not available")
--- a/ultralytics/init.py
+++ b/ultralytics/init.py
@ -1,6 +1,6 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-__version__ = "8.3.1"
+__version__ = "8.3.2"
 import os
--- a/ultralytics/utils/init.py
+++ b/ultralytics/utils/init.py
@ -111,6 +111,7 @@ torch.set_printoptions(linewidth=320, precision=4, profile="default")
 np.set_printoptions(linewidth=320, formatter={"float_kind": "{:11.5g}".format})  # format short g, %precision=5
 cv2.setNumThreads(0)  # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader)
 os.environ["NUMEXPR_MAX_THREADS"] = str(NUM_THREADS)  # NumExpr max threads
 os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"  # for deterministic training to avoid CUDA warning
 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"  # suppress verbose TF compiler warnings in Colab
 os.environ["TORCH_CPP_LOG_LEVEL"] = "ERROR"  # suppress "NNPACK.cpp could not initialize NNPACK" warnings
 os.environ["KINETO_LOG_LEVEL"] = "5"  # suppress verbose PyTorch profiler output when computing FLOPs
--- a/ultralytics/utils/checks.py
+++ b/ultralytics/utils/checks.py
@ -657,9 +657,10 @@ def check_amp(model):
    def amp_allclose(m, im):
        """All close FP32 vs AMP results."""
        batch = [im] * 8
-        a = m(batch, imgsz=128, device=device, verbose=False)[0].boxes.data  # FP32 inference
+        imgsz = max(256, int(model.stride.max() * 4))  # max stride P5-32 and P6-64
        a = m(batch, imgsz=imgsz, device=device, verbose=False)[0].boxes.data  # FP32 inference
        with autocast(enabled=True):
-            b = m(batch, imgsz=128, device=device, verbose=False)[0].boxes.data  # AMP inference
+            b = m(batch, imgsz=imgsz, device=device, verbose=False)[0].boxes.data  # AMP inference
        del m
        return a.shape == b.shape and torch.allclose(a, b.float(), atol=0.5)  # close to 0.5 absolute tolerance