ultralytics 8.3.2 fix AMP checks with imgsz=256 (#16583)

This commit is contained in:
Glenn Jocher 2024-10-01 11:53:11 +02:00 committed by GitHub
parent c327b0aae1
commit 5af8a5c0fb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 13 additions and 3 deletions

View file

@ -10,6 +10,7 @@ from tests import CUDA_DEVICE_COUNT, CUDA_IS_AVAILABLE, MODEL, SOURCE
from ultralytics import YOLO from ultralytics import YOLO
from ultralytics.cfg import TASK2DATA, TASK2MODEL, TASKS from ultralytics.cfg import TASK2DATA, TASK2MODEL, TASKS
from ultralytics.utils import ASSETS, WEIGHTS_DIR from ultralytics.utils import ASSETS, WEIGHTS_DIR
from ultralytics.utils.checks import check_amp
def test_checks(): def test_checks():
@ -18,6 +19,13 @@ def test_checks():
assert torch.cuda.device_count() == CUDA_DEVICE_COUNT assert torch.cuda.device_count() == CUDA_DEVICE_COUNT
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason="CUDA is not available")
def test_amp():
"""Test AMP training checks."""
model = YOLO("yolo11n.pt").model.cuda()
assert check_amp(model)
@pytest.mark.slow @pytest.mark.slow
@pytest.mark.skipif(True, reason="CUDA export tests disabled pending additional Ultralytics GPU server availability") @pytest.mark.skipif(True, reason="CUDA export tests disabled pending additional Ultralytics GPU server availability")
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason="CUDA is not available") @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason="CUDA is not available")

View file

@ -1,6 +1,6 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license # Ultralytics YOLO 🚀, AGPL-3.0 license
__version__ = "8.3.1" __version__ = "8.3.2"
import os import os

View file

@ -111,6 +111,7 @@ torch.set_printoptions(linewidth=320, precision=4, profile="default")
np.set_printoptions(linewidth=320, formatter={"float_kind": "{:11.5g}".format}) # format short g, %precision=5 np.set_printoptions(linewidth=320, formatter={"float_kind": "{:11.5g}".format}) # format short g, %precision=5
cv2.setNumThreads(0) # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader) cv2.setNumThreads(0) # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader)
os.environ["NUMEXPR_MAX_THREADS"] = str(NUM_THREADS) # NumExpr max threads os.environ["NUMEXPR_MAX_THREADS"] = str(NUM_THREADS) # NumExpr max threads
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" # for deterministic training to avoid CUDA warning
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" # suppress verbose TF compiler warnings in Colab os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" # suppress verbose TF compiler warnings in Colab
os.environ["TORCH_CPP_LOG_LEVEL"] = "ERROR" # suppress "NNPACK.cpp could not initialize NNPACK" warnings os.environ["TORCH_CPP_LOG_LEVEL"] = "ERROR" # suppress "NNPACK.cpp could not initialize NNPACK" warnings
os.environ["KINETO_LOG_LEVEL"] = "5" # suppress verbose PyTorch profiler output when computing FLOPs os.environ["KINETO_LOG_LEVEL"] = "5" # suppress verbose PyTorch profiler output when computing FLOPs

View file

@ -657,9 +657,10 @@ def check_amp(model):
def amp_allclose(m, im): def amp_allclose(m, im):
"""All close FP32 vs AMP results.""" """All close FP32 vs AMP results."""
batch = [im] * 8 batch = [im] * 8
a = m(batch, imgsz=128, device=device, verbose=False)[0].boxes.data # FP32 inference imgsz = max(256, int(model.stride.max() * 4)) # max stride P5-32 and P6-64
a = m(batch, imgsz=imgsz, device=device, verbose=False)[0].boxes.data # FP32 inference
with autocast(enabled=True): with autocast(enabled=True):
b = m(batch, imgsz=128, device=device, verbose=False)[0].boxes.data # AMP inference b = m(batch, imgsz=imgsz, device=device, verbose=False)[0].boxes.data # AMP inference
del m del m
return a.shape == b.shape and torch.allclose(a, b.float(), atol=0.5) # close to 0.5 absolute tolerance return a.shape == b.shape and torch.allclose(a, b.float(), atol=0.5) # close to 0.5 absolute tolerance