
    NgY                     >    d dl Z d dlZd dlmZ  G d de          ZdS )    N)	Optimizerc                   Z     e Zd ZdZ	 	 d
 fd	Z ej                    dd	            Z xZS )Nadama  Implements Nadam algorithm (a variant of Adam based on Nesterov momentum).

    It has been proposed in `Incorporating Nesterov Momentum into Adam`__.

    Arguments:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float, optional): learning rate (default: 2e-3)
        betas (Tuple[float, float], optional): coefficients used for computing
            running averages of gradient and its square
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-8)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        schedule_decay (float, optional): momentum schedule decay (default: 4e-3)

    __ http://cs229.stanford.edu/proj2015/054_report.pdf
    __ http://www.cs.toronto.edu/~fritz/absps/momentum.pdf

        Originally taken from: https://github.com/pytorch/pytorch/pull/1408
        NOTE: Has potential issues but does work well on some problems.
    Mb`?g?g+?:0yE>r   Mbp?c                     d|k    s"t          d                    |                    t          |||||          }t          t          |                               ||           d S )Ng        zInvalid learning rate: {})lrbetasepsweight_decayschedule_decay)
ValueErrorformatdictsuperr   __init__)	selfparamsr   r   r   r   r   defaults	__class__s	           L/var/www/html/ai-engine/env/lib/python3.11/site-packages/timm/optim/nadam.pyr   zNadam.__init__   st    byy8??CCDDD%)
 
 
 	eT##FH55555    Nc           	         d}|5t          j                    5   |            }ddd           n# 1 swxY w Y   | j        D ]}|d         D ]}|j        |j        }| j        |         }t          |          dk    r8d|d<   d|d<   t          j        |          |d<   t          j        |          |d<   |d         }|d	         }|d         |d         }
}	|d
         \  }}|d         }|dxx         dz  cc<   |d         }d||z  z
  }|d         dk    r|                    ||d                   }|ddd||z  z  z  z
  z  }|ddd|dz   |z  z  z  z
  z  }||z  }||z  |z  }||d<   |	                    |          	                    |d|z
             |
                    |          
                    ||d|z
             |
                                t          j        |          z  	                    |          }|                    |||d          d|z
  z  d|z
  z             |                    |	||d          |z  d|z
  z             	|S )zPerforms a single optimization step.

        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r   stepg      ?
m_scheduleexp_avg
exp_avg_sqr   r   r      r   )alphag      ?gQ?)valuer   )torchenable_gradparam_groupsgradstatelen
zeros_likeaddmul_add_addcmul_sqrtmathaddcdiv_)r   closurelossgrouppr&   r'   r   r   r   r   beta1beta2r   tbias_correction2momentum_cache_tmomentum_cache_t_1m_schedule_newm_schedule_nextdenoms                        r   r   z
Nadam.step+   s    "$$ ! !wyy! ! ! ! ! ! ! ! ! ! ! ! ! ! ! & '	m '	mE8_ &m &m6>v
1 u::??$%E&M*,E,'','7':':E)$*/*:1*=*=E,' #<0
!&'7!8&+I&6l8K$W~uElf"&M#$uz> (A--88AU>-B8CCD#(B^AS8T1U,U#V %*b3$AE^C[:\3].]%^"!+.>!>",/?"?BT"T&4l# U##((R%Z(@@@&&//d"u*/MMM#**TY7G-H-HHNNsSS

4uT{lbCS>S.TXZ]kXk.l
mmm

7E%+@R1RVX[jVj1k
llllM&mP s   /33)r   r   r   r   r	   )N)	__name__
__module____qualname____doc__r   r#   no_gradr   __classcell__)r   s   @r   r   r      sy         , AE046 6 6 6 6 6 U]__5 5 5 _5 5 5 5 5r   r   )r/   r#   torch.optim.optimizerr   r    r   r   <module>rF      sg      + + + + + +Z Z Z Z ZI Z Z Z Z Zr   