
    Ng                     B    d Z ddlZddlZddlmZ  G d de          ZdS )zRAdam Optimizer.
Implementation lifted from: https://github.com/LiyuanLucasLiu/RAdam
Paper: `On the Variance of the Adaptive Learning Rate and Beyond` - https://arxiv.org/abs/1908.03265
    N)	Optimizerc                   \     e Zd Zd	 fd	Z fdZ ej                    d
d            Z xZS )RAdamMbP?g?g+?:0yE>r   c           
          t          ||||d t          d          D                       }t          t          |                               ||           d S )Nc                     g | ]}g d S ))NNN ).0_s     L/var/www/html/ai-engine/env/lib/python3.11/site-packages/timm/optim/radam.py
<listcomp>z"RAdam.__init__.<locals>.<listcomp>   s    :::1&&&:::    
   )lrbetasepsweight_decaybuffer)dictrangesuperr   __init__)selfparamsr   r   r   r   defaults	__class__s          r   r   zRAdam.__init__   s_    Cl::b		:::< < < 	eT##FH55555r   c                 X    t          t          |                               |           d S N)r   r   __setstate__)r   stater   s     r   r!   zRAdam.__setstate__   s'    eT''.....r   Nc                    d }|5t          j                    5   |            }d d d            n# 1 swxY w Y   | j        D ]}|d         D ]}|j        |j                                        }|j        rt          d          |                                }| j        |         }t          |          dk    r4d|d<   t          j	        |          |d<   t          j	        |          |d<   n<|d         
                    |          |d<   |d         
                    |          |d<   |d         |d         }	}|d         \  }
}|	                    |                              ||d|z
  	           |                    |
                              |d|
z
  
           |dxx         dz  cc<   |d         t          |d         dz                     }|d         |d         k    r|d         |d         }}n|d         |d<   ||d         z  }dd|z
  z  dz
  }|d|d         z  |z  d|z
  z  z
  }||d<   |dk    rN|d         t          j        d|z
  |dz
  z  |dz
  z  |dz
  z  |z  |z  |dz
  z            z  d|
|d         z  z
  z  }n|d         d|
|d         z  z
  z  }||d<   |d         dk    r'|                    ||d          |d         z  
           |dk    rG|	                                                    |d                   }|                    ||| 	           n|                    || 
           |                    |           |S )Nr   z'RAdam does not support sparse gradientsr   stepexp_avg
exp_avg_sqr      )value)alphar   r         r      r   r   )torchenable_gradparam_groupsgradfloat	is_sparseRuntimeErrorr"   len
zeros_liketype_asmul_addcmul_add_intmathsqrtaddcdiv_copy_)r   closurelossgrouppr0   p_fp32r"   r%   r&   beta1beta2bufferednum_sma	step_sizebeta2_tnum_sma_maxdenoms                     r   r$   z
RAdam.step   s   "$$ ! !wyy! ! ! ! ! ! ! ! ! ! ! ! ! ! ! & ;	  ;	 E8_ 9  9 6>v||~~> R&'PQQQ
1u::??$%E&M','7'?'?E)$*/*:6*B*BE,''',Y'7'?'?'G'GE)$*/*=*E*Ef*M*ME,'&+I&6l8K$W~u&&//d!e)/LLLU##((QY(???f" ?3uV}r/A+B+BC=HQK//)1!hqkYGG"'-HQK#uV}4G"#q5y/A"5K)Af,=,G1w;,WWG")HQK !||$)$K$)[$q[*-81_>$q[*,34 (( ,7?<3= 3= %= ABEUSY]DZ@Z	%\		 %*$K1uf7M3M$N	"+HQK(A--KKu^/D.DuT{.RKSSS a<<&OO--225<@@EOOGU9*OEEEEKK	zK:::s9 v s   /33)r   r   r   r   r    )	__name__
__module____qualname__r   r!   r-   no_gradr$   __classcell__)r   s   @r   r   r   
   s        6 6 6 6 6 6/ / / / / U]__C C C _C C C C Cr   r   )__doc__r;   r-   torch.optim.optimizerr   r   r   r   r   <module>rS      ss       + + + + + +O O O O OI O O O O Or   