[docs]classDistributedSampler(Sampler[T_co]):r"""Sampler that restricts data loading to a subset of the dataset. It is especially useful in conjunction with :class:`torch.nn.parallel.DistributedDataParallel`. In such a case, each process can pass a :class:`~torch.utils.data.DistributedSampler` instance as a :class:`~torch.utils.data.DataLoader` sampler, and load a subset of the original dataset that is exclusive to it. .. note:: Dataset is assumed to be of constant size and that any instance of it always returns the same elements in the same order. Args: dataset: Dataset used for sampling. num_replicas (int, optional): Number of processes participating in distributed training. By default, :attr:`world_size` is retrieved from the current distributed group. rank (int, optional): Rank of the current process within :attr:`num_replicas`. By default, :attr:`rank` is retrieved from the current distributed group. shuffle (bool, optional): If ``True`` (default), sampler will shuffle the indices. seed (int, optional): random seed used to shuffle the sampler if :attr:`shuffle=True`. This number should be identical across all processes in the distributed group. Default: ``0``. drop_last (bool, optional): if ``True``, then the sampler will drop the tail of the data to make it evenly divisible across the number of replicas. If ``False``, the sampler will add extra indices to make the data evenly divisible across the replicas. Default: ``False``. .. warning:: In distributed mode, calling the :meth:`set_epoch` method at the beginning of each epoch **before** creating the :class:`DataLoader` iterator is necessary to make shuffling work properly across multiple epochs. Otherwise, the same ordering will be always used. Example:: >>> # xdoctest: +SKIP >>> sampler = DistributedSampler(dataset) if is_distributed else None >>> loader = DataLoader(dataset, shuffle=(sampler is None), ... sampler=sampler) >>> for epoch in range(start_epoch, n_epochs): ... if is_distributed: ... sampler.set_epoch(epoch) ... train(loader) """def__init__(self,dataset:Dataset,num_replicas:Optional[int]=None,rank:Optional[int]=None,shuffle:bool=True,seed:int=0,drop_last:bool=False)->None:ifnum_replicasisNone:ifnotdist.is_available():raiseRuntimeError("Requires distributed package to be available")num_replicas=dist.get_world_size()ifrankisNone:ifnotdist.is_available():raiseRuntimeError("Requires distributed package to be available")rank=dist.get_rank()ifrank>=num_replicasorrank<0:raiseValueError(f"Invalid rank {rank}, rank should be in the interval [0, {num_replicas-1}]")self.dataset=datasetself.num_replicas=num_replicasself.rank=rankself.epoch=0self.drop_last=drop_last# If the dataset length is evenly divisible by # of replicas, then there# is no need to drop any data, since the dataset will be split equally.ifself.drop_lastandlen(self.dataset)%self.num_replicas!=0:# type: ignore[arg-type]# Split to nearest available length that is evenly divisible.# This is to ensure each rank receives the same amount of data when# using this Sampler.self.num_samples=math.ceil((len(self.dataset)-self.num_replicas)/self.num_replicas# type: ignore[arg-type])else:self.num_samples=math.ceil(len(self.dataset)/self.num_replicas)# type: ignore[arg-type]self.total_size=self.num_samples*self.num_replicasself.shuffle=shuffleself.seed=seeddef__iter__(self)->Iterator[T_co]:ifself.shuffle:# deterministically shuffle based on epoch and seedg=torch.Generator()g.manual_seed(self.seed+self.epoch)indices=torch.randperm(len(self.dataset),generator=g).tolist()# type: ignore[arg-type]else:indices=list(range(len(self.dataset)))# type: ignore[arg-type]ifnotself.drop_last:# add extra samples to make it evenly divisiblepadding_size=self.total_size-len(indices)ifpadding_size<=len(indices):indices+=indices[:padding_size]else:indices+=(indices*math.ceil(padding_size/len(indices)))[:padding_size]else:# remove tail of data to make it evenly divisible.indices=indices[:self.total_size]assertlen(indices)==self.total_size# subsampleindices=indices[self.rank:self.total_size:self.num_replicas]assertlen(indices)==self.num_samplesreturniter(indices)def__len__(self)->int:returnself.num_samplesdefset_epoch(self,epoch:int)->None:r""" Sets the epoch for this sampler. When :attr:`shuffle=True`, this ensures all replicas use a different random ordering for each epoch. Otherwise, the next iteration of this sampler will yield the same ordering. Args: epoch (int): Epoch number. """self.epoch=epoch
Docs
Access comprehensive developer documentation for PyTorch
To analyze traffic and optimize your experience, we serve cookies on this site. By clicking or navigating, you agree to allow our usage of cookies. As the current maintainers of this site, Facebook’s Cookies Policy applies. Learn more, including about available controls: Cookies Policy.