
    Ng                     B    d Z ddlZddlZddlmZ  G d de          ZdS )z AdamW Optimizer
Impl copied from PyTorch master

NOTE: Builtin optim.AdamW is used by the factory, this impl only serves as a Python based reference, will be removed
someday
    N)	Optimizerc                   d     e Zd ZdZ	 	 d fd	Z fdZ ej                    dd
            Z xZ	S )AdamWa  Implements AdamW algorithm.

    The original Adam algorithm was proposed in `Adam: A Method for Stochastic Optimization`_.
    The AdamW variant was proposed in `Decoupled Weight Decay Regularization`_.

    Arguments:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float, optional): learning rate (default: 1e-3)
        betas (Tuple[float, float], optional): coefficients used for computing
            running averages of gradient and its square (default: (0.9, 0.999))
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-8)
        weight_decay (float, optional): weight decay coefficient (default: 1e-2)
        amsgrad (boolean, optional): whether to use the AMSGrad variant of this
            algorithm from the paper `On the Convergence of Adam and Beyond`_
            (default: False)

    .. _Adam\: A Method for Stochastic Optimization:
        https://arxiv.org/abs/1412.6980
    .. _Decoupled Weight Decay Regularization:
        https://arxiv.org/abs/1711.05101
    .. _On the Convergence of Adam and Beyond:
        https://openreview.net/forum?id=ryQu7f-RZ
    MbP?g?g+?:0yE>{Gz?Fc                    d|k    s"t          d                    |                    d|k    s"t          d                    |                    d|d         cxk    rdk     s*n t          d                    |d                             d|d         cxk    rdk     s*n t          d                    |d                             t          |||||	          }t          t          |                               ||           d S )
Ng        zInvalid learning rate: {}zInvalid epsilon value: {}r   g      ?z%Invalid beta parameter at index 0: {}   z%Invalid beta parameter at index 1: {})lrbetasepsweight_decayamsgrad)
ValueErrorformatdictsuperr   __init__)	selfparamsr   r   r   r   r   defaults	__class__s	           L/var/www/html/ai-engine/env/lib/python3.11/site-packages/timm/optim/adamw.pyr   zAdamW.__init__'   s!   byy8??CCDDDczz8??DDEEEeAh$$$$$$$$DKKERSHUUVVVeAh$$$$$$$$DKKERSHUUVVV2U%17D D DeT##FH55555    c                     t          t          |                               |           | j        D ]}|                    dd           d S )Nr   F)r   r   __setstate__param_groups
setdefault)r   stategroupr   s      r   r   zAdamW.__setstate__5   sV    eT''...& 	/ 	/EY....	/ 	/r   Nc                    d}|5t          j                    5   |            }ddd           n# 1 swxY w Y   | j        D ]C}|d         D ]6}|j        |j                            d|d         |d         z  z
             |j        }|j        rt          d          |d         }| j        |         }t          |          dk    rLd|d	<   t          j
        |          |d
<   t          j
        |          |d<   |rt          j
        |          |d<   |d
         |d         }	}|r|d         }
|d         \  }}|d	xx         dz  cc<   d||d	         z  z
  }d||d	         z  z
  }|                    |                              |d|z
             |	                    |                              ||d|z
             |rZt          j        |
|	|
           |
                                t          j        |          z                      |d                   }nB|	                                t          j        |          z                      |d                   }|d         |z  }|                    |||            8E|S )zPerforms a single optimization step.

        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r   r   r   zJAdam does not support sparse gradients, please consider SparseAdam insteadr   r   stepexp_avg
exp_avg_sqmax_exp_avg_sqr   )alpha)value)outr   )torchenable_gradr   graddatamul_	is_sparseRuntimeErrorr    len
zeros_likeadd_addcmul_maxsqrtmathaddcdiv_)r   closurelossr!   pr,   r   r    r$   r%   r&   beta1beta2bias_correction1bias_correction2denom	step_sizes                    r   r#   z
AdamW.step:   s     "$$ ! !wyy! ! ! ! ! ! ! ! ! ! ! ! ! ! ! & 1	= 1	=E8_ 0= 0=6> AdeN.C CCDDD v> u&'sttt	*
1 u::??$%E&M','7':':E)$*/*:1*=*=E,' F272B12E2E./&+I&6l8K =%*+;%<N$W~uf"#$uf'=#= #$uf'=#=  U##((QY(???&&//d!e)/LLL aInjnMMMM+0022TY?O5P5PPVVW\]bWcddEE'__..;K1L1LLRRSXY^S_``E!$K*::	

7E)
<<<<a0=d s   /33)r   r   r   r	   F)N)
__name__
__module____qualname____doc__r   r   r*   no_gradr#   __classcell__)r   s   @r   r   r      s         4 AE,16 6 6 6 6 6/ / / / /
 U]__? ? ? _? ? ? ? ?r   r   )rE   r7   r*   torch.optim.optimizerr   r    r   r   <module>rJ      ss       + + + + + +n n n n nI n n n n nr   