[docs]defis_available()->bool:""" Return ``True`` if the distributed package is available. Otherwise, ``torch.distributed`` does not expose any other APIs. Currently, ``torch.distributed`` is available on Linux, MacOS and Windows. Set ``USE_DISTRIBUTED=1`` to enable it when building PyTorch from source. Currently, the default value is ``USE_DISTRIBUTED=1`` for Linux and Windows, ``USE_DISTRIBUTED=0`` for MacOS. """returnhasattr(torch._C,"_c10d_init")
ifis_available()andnottorch._C._c10d_init():raiseRuntimeError("Failed to initialize torch.distributed")# Custom Runtime Errors thrown from the distributed packageDistError=torch._C._DistErrorDistBackendError=torch._C._DistBackendErrorDistNetworkError=torch._C._DistNetworkErrorDistStoreError=torch._C._DistStoreErrorifis_available():fromtorch._C._distributed_c10dimport(Store,FileStore,TCPStore,ProcessGroupasProcessGroup,Backendas_Backend,PrefixStore,Reducer,Logger,BuiltinCommHookType,GradBucket,Workas_Work,_DEFAULT_FIRST_BUCKET_BYTES,_register_comm_hook,_register_builtin_comm_hook,_broadcast_coalesced,_compute_bucket_assignment_by_size,_verify_params_across_processes,_test_python_store,DebugLevel,get_debug_level,set_debug_level,set_debug_level_from_env,_make_nccl_premul_sum,)
[docs]defbreakpoint(rank:int=0):""" Set a breakpoint, but only on a single rank. All other ranks will wait for you to be done with the breakpoint before continuing. This calls ``breakpoint()`` under the hood, so you can customize it using the normal facilities, e.g., ``PYTHONBREAKPOINT`` environment variable. Args: rank (int): Which rank to break on. Default: ``0`` """ifget_rank()==rank:# This will be the case when your subprocess was created by# multiprocessing.Process, see# https://stackoverflow.com/questions/30134297/python-multiprocessing-stdin-inputold_stdin=Noneifisinstance(sys.stdin,io.TextIOWrapper):old_stdin=sys.stdinsys.stdin=open(0)try:breakpoint(header=("\n!!! ATTENTION !!!\n\n"f"Type 'up' to get to the frame that called dist.breakpoint(rank={rank})\n"))# type: ignore[call-arg]finally:ifold_stdinisnotNone:sys.stdin.close()sys.stdin=old_stdinbarrier()
ifsys.platform!="win32":fromtorch._C._distributed_c10dimport(HashStore,_round_robin_process_groups,)from.distributed_c10dimport*# noqa: F403# Variables prefixed with underscore are not auto imported# See the comment in `distributed_c10d.py` above `_backend` on why we expose# this.from.distributed_c10dimport(_all_gather_base,_reduce_scatter_base,_create_process_group_wrapper,_rank_not_in_group,_coalescing_manager,_CoalescingManager,_get_process_group_name,)from.rendezvousimport(rendezvous,_create_store_from_options,register_rendezvous_handler,)from.remote_deviceimport_remote_deviceset_debug_level_from_env()else:# This stub is sufficient to get# python test/test_public_bindings.py -k test_correct_module_names# working even when USE_DISTRIBUTED=0. Feel free to add more# stubs as necessary.# We cannot define stubs directly because they confuse pyreclass_ProcessGroupStub:passsys.modules["torch.distributed"].ProcessGroup=_ProcessGroupStub# type: ignore[attr-defined]
Docs
Access comprehensive developer documentation for PyTorch
To analyze traffic and optimize your experience, we serve cookies on this site. By clicking or navigating, you agree to allow our usage of cookies. As the current maintainers of this site, Facebook’s Cookies Policy applies. Learn more, including about available controls: Cookies Policy.