mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-15 21:00:47 +00:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/59111 Create a util function for initializing subgroups. By default, each subgroup contains all the ranks within a machine. This util function can be used by both local SGD and SyncBatchNorm optimization. Additionally, clang format `distributed/__init__.py` after importing `_rank_not_in_group` which is used by the unit test, and also clang format `distributed_c10d.py`. Note that this API does not accept another overall main group. Like APEX API `create_syncbn_process_group` [here](https://nvidia.github.io/apex/_modules/apex/parallel.html), always uses the global world size and should only be applied when CUDA is available. #Closes: https://github.com/pytorch/pytorch/issues/53962 ghstack-source-id: 130975027 Test Plan: buck test mode/dev-nosan caffe2/test/distributed:distributed_nccl_fork -- test_new_subgroups buck test mode/dev-nosan caffe2/test/distributed:distributed_nccl_fork -- test_new_subgroups_group_size_exceeds_world_size buck test mode/dev-nosan caffe2/test/distributed:distributed_nccl_fork -- test_new_subgroups_world_size_not_divisible_by_group_size buck test mode/dev-nosan caffe2/test/distributed:distributed_nccl_fork -- test_new_subgroups_by_enumeration buck test mode/dev-nosan caffe2/test/distributed:distributed_nccl_fork -- test_new_subgroups_by_enumeration_input_rank_exceeds_world_size buck test mode/dev-nosan caffe2/test/distributed:distributed_nccl_fork -- test_new_subgroups_overlap_not_allowed Reviewed By: rohan-varma Differential Revision: D28495672 fbshipit-source-id: fdcc405411dd409634eb51806ee0a320d1ecd4e0
64 lines
1.7 KiB
Python
64 lines
1.7 KiB
Python
import os
|
|
import sys
|
|
from enum import Enum
|
|
|
|
import torch
|
|
|
|
|
|
def is_available() -> bool:
|
|
"""
|
|
Returns ``True`` if the distributed package is available. Otherwise,
|
|
``torch.distributed`` does not expose any other APIs. Currently,
|
|
``torch.distributed`` is available on Linux, MacOS and Windows. Set
|
|
``USE_DISTRIBUTED=1`` to enable it when building PyTorch from source.
|
|
Currently, the default value is ``USE_DISTRIBUTED=1`` for Linux and Windows,
|
|
``USE_DISTRIBUTED=0`` for MacOS.
|
|
"""
|
|
return hasattr(torch._C, "_c10d_init")
|
|
|
|
|
|
if is_available() and not torch._C._c10d_init():
|
|
raise RuntimeError("Failed to initialize torch.distributed")
|
|
|
|
|
|
if is_available():
|
|
from torch._C._distributed_c10d import (
|
|
Store,
|
|
FileStore,
|
|
TCPStore,
|
|
ProcessGroup,
|
|
PrefixStore,
|
|
Reducer,
|
|
Logger,
|
|
BuiltinCommHookType,
|
|
GradBucket,
|
|
_DEFAULT_FIRST_BUCKET_BYTES,
|
|
_register_comm_hook,
|
|
_register_builtin_comm_hook,
|
|
_broadcast_coalesced,
|
|
_compute_bucket_assignment_by_size,
|
|
_verify_model_across_ranks,
|
|
_test_python_store,
|
|
_DistributedDebugLevel,
|
|
_get_debug_mode,
|
|
)
|
|
|
|
if sys.platform != "win32":
|
|
from torch._C._distributed_c10d import (
|
|
HashStore,
|
|
_round_robin_process_groups,
|
|
)
|
|
|
|
from .distributed_c10d import * # noqa: F403
|
|
|
|
# Variables prefixed with underscore are not auto imported
|
|
# See the comment in `distributed_c10d.py` above `_backend` on why we expose
|
|
# this.
|
|
|
|
from .distributed_c10d import (
|
|
_backend,
|
|
_all_gather_base,
|
|
_reduce_scatter_base,
|
|
_create_process_group_wrapper,
|
|
_rank_not_in_group,
|
|
)
|