# mypy: allow-untyped-defsr""""Contains definitions of the methods used by the _BaseDataLoaderIter workers.These **needs** to be in global scope since Py2 doesn't support serializingstatic methods."""importosimportqueueimportrandomfromdataclassesimportdataclassfromtypingimportOptional,TYPE_CHECKING,Unionimporttorchfromtorch._utilsimportExceptionWrapperfrom.importHAS_NUMPY,IS_WINDOWS,MP_STATUS_CHECK_INTERVAL,signal_handlingifTYPE_CHECKING:fromtorch.utils.dataimportDatasetifIS_WINDOWS:importctypesfromctypes.wintypesimportBOOL,DWORD,HANDLE# On Windows, the parent ID of the worker process remains unchanged when the manager process# is gone, and the only way to check it through OS is to let the worker have a process handle# of the manager and ask if the process status has changed.classManagerWatchdog:def__init__(self)->None:self.manager_pid=os.getppid()# mypy cannot detect this code is windows onlyself.kernel32=ctypes.WinDLL("kernel32",use_last_error=True)# type: ignore[attr-defined]self.kernel32.OpenProcess.argtypes=(DWORD,BOOL,DWORD)self.kernel32.OpenProcess.restype=HANDLEself.kernel32.WaitForSingleObject.argtypes=(HANDLE,DWORD)self.kernel32.WaitForSingleObject.restype=DWORD# Value obtained from https://msdn.microsoft.com/en-us/library/ms684880.aspxSYNCHRONIZE=0x00100000self.manager_handle=self.kernel32.OpenProcess(SYNCHRONIZE,0,self.manager_pid)ifnotself.manager_handle:raisectypes.WinError(ctypes.get_last_error())# type: ignore[attr-defined]self.manager_dead=Falsedefis_alive(self):ifnotself.manager_dead:# Value obtained from https://msdn.microsoft.com/en-us/library/windows/desktop/ms687032.aspxself.manager_dead=(self.kernel32.WaitForSingleObject(self.manager_handle,0)==0)returnnotself.manager_deadelse:classManagerWatchdog:# type: ignore[no-redef]def__init__(self)->None:self.manager_pid=os.getppid()self.manager_dead=Falsedefis_alive(self):ifnotself.manager_dead:self.manager_dead=os.getppid()!=self.manager_pidreturnnotself.manager_dead_worker_info:Optional["WorkerInfo"]=NoneclassWorkerInfo:id:intnum_workers:intseed:intdataset:"Dataset"__initialized=Falsedef__init__(self,**kwargs):fork,vinkwargs.items():setattr(self,k,v)self.__keys=tuple(kwargs.keys())self.__initialized=Truedef__setattr__(self,key,val):ifself.__initialized:raiseRuntimeError(f"Cannot assign attributes to {self.__class__.__name__} objects")returnsuper().__setattr__(key,val)def__repr__(self):items=[f"{k}={getattr(self,k)}"forkinself.__keys]returnf"{self.__class__.__name__}({', '.join(items)})"
[docs]defget_worker_info()->Optional[WorkerInfo]:r"""Returns the information about the current :class:`~torch.utils.data.DataLoader` iterator worker process. When called in a worker, this returns an object guaranteed to have the following attributes: * :attr:`id`: the current worker id. * :attr:`num_workers`: the total number of workers. * :attr:`seed`: the random seed set for the current worker. This value is determined by main process RNG and the worker id. See :class:`~torch.utils.data.DataLoader`'s documentation for more details. * :attr:`dataset`: the copy of the dataset object in **this** process. Note that this will be a different object in a different process than the one in the main process. When called in the main process, this returns ``None``. .. note:: When used in a :attr:`worker_init_fn` passed over to :class:`~torch.utils.data.DataLoader`, this method can be useful to set up each worker process differently, for instance, using ``worker_id`` to configure the ``dataset`` object to only read a specific fraction of a sharded dataset, or use ``seed`` to seed other libraries used in dataset code. """return_worker_info
r"""Dummy class used to signal the end of an IterableDataset"""@dataclass(frozen=True)class_IterableDatasetStopIteration:worker_id:intr"""Dummy class used to resume the fetching when worker reuse is enabled"""@dataclass(frozen=True)class_ResumeIteration:seed:Optional[int]=None# The function `_generate_state` is adapted from `numpy.random.SeedSequence`# from https://github.com/numpy/numpy/blob/main/numpy/random/bit_generator.pyx# It's MIT licensed, here is the copyright:# Copyright (c) 2015 Melissa E. O'Neill# Copyright (c) 2019 NumPy Developers## Permission is hereby granted, free of charge, to any person obtaining a copy# of this software and associated documentation files (the "Software"), to deal# in the Software without restriction, including without limitation the rights# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell# copies of the Software, and to permit persons to whom the Software is# furnished to do so, subject to the following conditions:## The above copyright notice and this permission notice shall be included in# all copies or substantial portions of the Software.## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE# SOFTWARE.# This function generates an array of int32 as the seed for# `numpy.random`, in order to prevent state collision due to same# seed and algorithm for `numpy.random` and `random` modules.# TODO: Implement `SeedSequence` like object for `torch.random`def_generate_state(base_seed,worker_id):INIT_A=0x43B0D7E5MULT_A=0x931E8875INIT_B=0x8B51F9DDMULT_B=0x58F38DEDMIX_MULT_L=0xCA01F9DDMIX_MULT_R=0x4973F715XSHIFT=4*8//2MASK32=0xFFFFFFFFentropy=[worker_id,base_seed&MASK32,base_seed>>32,0]pool=[0]*4hash_const_A=INIT_Adefhash(value):nonlocalhash_const_Avalue=(value^hash_const_A)&MASK32hash_const_A=(hash_const_A*MULT_A)&MASK32value=(value*hash_const_A)&MASK32value=(value^(value>>XSHIFT))&MASK32returnvaluedefmix(x,y):result_x=(MIX_MULT_L*x)&MASK32result_y=(MIX_MULT_R*y)&MASK32result=(result_x-result_y)&MASK32result=(result^(result>>XSHIFT))&MASK32returnresult# Add in the entropy to the pool.foriinrange(len(pool)):pool[i]=hash(entropy[i])# Mix all bits together so late bits can affect earlier bits.fori_srcinrange(len(pool)):fori_dstinrange(len(pool)):ifi_src!=i_dst:pool[i_dst]=mix(pool[i_dst],hash(pool[i_src]))hash_const_B=INIT_Bstate=[]fori_dstinrange(4):data_val=pool[i_dst]data_val=(data_val^hash_const_B)&MASK32hash_const_B=(hash_const_B*MULT_B)&MASK32data_val=(data_val*hash_const_B)&MASK32data_val=(data_val^(data_val>>XSHIFT))&MASK32state.append(data_val)returnstatedef_worker_loop(dataset_kind,dataset,index_queue,data_queue,done_event,auto_collation,collate_fn,drop_last,base_seed,init_fn,worker_id,num_workers,persistent_workers,shared_seed,):# See NOTE [ Data Loader Multiprocessing Shutdown Logic ] for details on the# logic of this function.try:# Initialize C side signal handlers for SIGBUS and SIGSEGV. Python signal# module's handlers are executed after Python returns from C low-level# handlers, likely when the same fatal signal had already happened# again.# https://docs.python.org/3/library/signal.html#execution-of-python-signal-handlerssignal_handling._set_worker_signal_handlers()torch.multiprocessing._set_thread_name("pt_data_worker")torch.set_num_threads(1)seed=base_seed+worker_idrandom.seed(seed)torch.manual_seed(seed)ifHAS_NUMPY:np_seed=_generate_state(base_seed,worker_id)importnumpyasnpnp.random.seed(np_seed)fromtorch.utils.dataimportIterDataPipefromtorch.utils.data.graph_settingsimportapply_random_seedshared_rng=torch.Generator()ifisinstance(dataset,IterDataPipe):assertshared_seedisnotNoneshared_rng.manual_seed(shared_seed)dataset=apply_random_seed(dataset,shared_rng)global_worker_info_worker_info=WorkerInfo(id=worker_id,num_workers=num_workers,seed=seed,dataset=dataset)fromtorch.utils.dataimport_DatasetKindinit_exception=Nonetry:ifinit_fnisnotNone:init_fn(worker_id)fetcher=_DatasetKind.create_fetcher(dataset_kind,dataset,auto_collation,collate_fn,drop_last)exceptException:init_exception=ExceptionWrapper(where=f"in DataLoader worker process {worker_id}")# When using Iterable mode, some worker can exit earlier than others due# to the IterableDataset behaving differently for different workers.# When such things happen, an `_IterableDatasetStopIteration` object is# sent over to the main process with the ID of this worker, so that the# main process won't send more tasks to this worker, and will send# `None` to this worker to properly exit it.## Note that we cannot set `done_event` from a worker as it is shared# among all processes. Instead, we set the `iteration_end` flag to# signify that the iterator is exhausted. When either `done_event` or# `iteration_end` is set, we skip all processing step and just wait for# `None`.iteration_end=Falsewatchdog=ManagerWatchdog()whilewatchdog.is_alive():try:r=index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL)exceptqueue.Empty:continueifisinstance(r,_ResumeIteration):# Acknowledge the main processdata_queue.put((r,None))iteration_end=Falseifisinstance(dataset,IterDataPipe):assertr.seedisnotNoneshared_rng.manual_seed(r.seed)dataset=apply_random_seed(dataset,shared_rng)# Recreate the fetcher for worker-reuse policyfetcher=_DatasetKind.create_fetcher(dataset_kind,dataset,auto_collation,collate_fn,drop_last)continueelifrisNone:# Received the final signalassertdone_event.is_set()oriteration_endbreakelifdone_event.is_set()oriteration_end:# `done_event` is set. But I haven't received the final signal# (None) yet. I will keep continuing until get it, and skip the# processing steps.continueidx,index=rdata:Union[_IterableDatasetStopIteration,ExceptionWrapper]ifinit_exceptionisnotNone:data=init_exceptioninit_exception=Noneelse:try:data=fetcher.fetch(index)# type: ignore[possibly-undefined]exceptExceptionase:if(isinstance(e,StopIteration)anddataset_kind==_DatasetKind.Iterable):data=_IterableDatasetStopIteration(worker_id)# Set `iteration_end`# (1) to save future `next(...)` calls, and# (2) to avoid sending multiple `_IterableDatasetStopIteration`s.iteration_end=Trueelse:# It is important that we don't store exc_info in a variable.# `ExceptionWrapper` does the correct thing.# See NOTE [ Python Traceback Reference Cycle Problem ]data=ExceptionWrapper(where=f"in DataLoader worker process {worker_id}")data_queue.put((idx,data))deldata,idx,index,r# save memoryexceptKeyboardInterrupt:# Main process will raise KeyboardInterrupt anyways.passifdone_event.is_set():data_queue.cancel_join_thread()data_queue.close()
Docs
Access comprehensive developer documentation for PyTorch
To analyze traffic and optimize your experience, we serve cookies on this site. By clicking or navigating, you agree to allow our usage of cookies. As the current maintainers of this site, Facebook’s Cookies Policy applies. Learn more, including about available controls: Cookies Policy.