ultralytics 8.2.71 Multinode DDP training (#14879)

Co-authored-by: Haris Rehman <haris.rehman.cowlar@gmail.com>
Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
Co-authored-by: UltralyticsAssistant <web@ultralytics.com>
This commit is contained in:
Haris Rehman 2024-08-01 20:31:03 +05:00 committed by GitHub
parent 16fc325308
commit 9c5d1a2451
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 9 additions and 4 deletions

View file

@ -48,11 +48,12 @@ TORCHVISION_0_18 = check_version(TORCHVISION_VERSION, "0.18.0")
def torch_distributed_zero_first(local_rank: int):
"""Ensures all processes in distributed training wait for the local master (rank 0) to complete a task first."""
initialized = dist.is_available() and dist.is_initialized()
if initialized and local_rank not in {-1, 0}:
dist.barrier(device_ids=[local_rank])
yield
if initialized and local_rank == 0:
dist.barrier(device_ids=[0])
dist.barrier(device_ids=[local_rank])
def smart_inference_mode():