[docs]@torch.no_grad()defstep(self,closure=None):"""Performs a single optimization step. Args: closure (callable, optional): A closure that reevaluates the model and returns the loss. """loss=NoneifclosureisnotNone:withtorch.enable_grad():loss=closure()forgroupinself.param_groups:params_with_grad=[]grads=[]exp_avgs=[]exp_avg_sqs=[]mu_products=[]state_steps=[]beta1,beta2=group['betas']forpingroup['params']:ifp.gradisnotNone:params_with_grad.append(p)ifp.grad.is_sparse:raiseRuntimeError('NAdam does not support sparse gradients')grads.append(p.grad)state=self.state[p]# Lazy state initializationiflen(state)==0:state['step']=0state['mu_product']=1.# Exponential moving average of gradient valuesstate['exp_avg']=torch.zeros_like(p,memory_format=torch.preserve_format)# Exponential moving average of squared gradient valuesstate['exp_avg_sq']=torch.zeros_like(p,memory_format=torch.preserve_format)exp_avgs.append(state['exp_avg'])exp_avg_sqs.append(state['exp_avg_sq'])mu_products.append(state['mu_product'])# update the steps for each param group updatestate['step']+=1# record the step after step updatestate_steps.append(state['step'])F.nadam(params_with_grad,grads,exp_avgs,exp_avg_sqs,mu_products,state_steps,beta1=beta1,beta2=beta2,lr=group['lr'],weight_decay=group['weight_decay'],momentum_decay=group['momentum_decay'],eps=group['eps'])# update mu_productforp,mu_productinzip(params_with_grad,mu_products):state=self.state[p]state['mu_product']=state['mu_product']*beta1* \
(1.-0.5*(0.96**(state['step']*group['momentum_decay'])))returnloss
Docs
Access comprehensive developer documentation for PyTorch
To analyze traffic and optimize your experience, we serve cookies on this site. By clicking or navigating, you agree to allow our usage of cookies. As the current maintainers of this site, Facebook’s Cookies Policy applies. Learn more, including about available controls: Cookies Policy.