Fix HUB session with DDP training (#13103)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com>
Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
Co-authored-by: Burhan <62214284+Burhan-Q@users.noreply.github.com>
Co-authored-by: Ultralytics Assistant <135830346+UltralyticsAssistant@users.noreply.github.com>
Co-authored-by: UltralyticsAssistant <web@ultralytics.com>
This commit is contained in:
Laughing 2024-06-24 02:00:34 +08:00 committed by GitHub
parent 68720288d3
commit 169602442c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 11 additions and 5 deletions

View file

@ -43,8 +43,8 @@ TORCHVISION_0_13 = check_version(TORCHVISION_VERSION, "0.13.0")
@contextmanager
def torch_distributed_zero_first(local_rank: int):
"""Decorator to make all processes in distributed training wait for each local_master to do something."""
initialized = torch.distributed.is_available() and torch.distributed.is_initialized()
"""Ensures all processes in distributed training wait for the local master (rank 0) to complete a task first."""
initialized = dist.is_available() and dist.is_initialized()
if initialized and local_rank not in {-1, 0}:
dist.barrier(device_ids=[local_rank])
yield