
    NgQ0                        d Z ddlZddlmZmZ ddlZddlmZ  G d dej        j                  Z		 	 ddee         d	ee         d
ee         dee         dee         dee
         de
dededededede
ddfdZdee         d	ee         d
ee         dee         dee         dededededede
de
fdZdee         d	ee         d
ee         dee         dee         dededededede
de
fdZdS )z NAdamW Optimizer

Based on simplified algorithm in https://github.com/mlcommons/algorithmic-efficiency/tree/main/baselines/nadamw

Added multi-tensor (foreach) path.
    N)ListOptional)Tensorc                        e Zd ZdZ	 	 	 	 	 	 	 dded	ee         d
ef fdZ fdZ ej	                    dd            Z
 xZS )NAdamWa?  Implements NAdamW algorithm.

      See Table 1 in https://arxiv.org/abs/1910.05446 for the implementation of
      the NAdam algorithm (there is also a comment in the code which highlights
      the only difference of NAdamW and AdamW).
      For further details regarding the algorithm we refer to
      `Decoupled Weight Decay Regularization`_.

      Args:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float, optional): learning rate (default: 1e-3)
        betas (Tuple[float, float], optional): coefficients used for computing
            running averages of gradient and its square (default: (0.9, 0.999))
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-8)
        weight_decay (float, optional): weight decay coefficient (default: 1e-2)
      .. _Decoupled Weight Decay Regularization:
          https://arxiv.org/abs/1711.05101
      .. _On the Convergence of Adam and Beyond:
          https://openreview.net/forum?id=ryQu7f-RZ
    MbP?g?g+?:0yE>{Gz?FNmaximizeforeach
capturablec	           	         d|k    st          d|           d|k    st          d|           d|d         cxk    rdk     sn t          d|d                    d|d         cxk    rdk     sn t          d|d                    d|k    st          d	|           t          |||||||
          }	t                                          ||	           d S )N        zInvalid learning rate: zInvalid epsilon value: r         ?z#Invalid beta parameter at index 0:    z#Invalid beta parameter at index 1: zInvalid weight_decay value: )lrbetasepsweight_decayr   r   r   )
ValueErrordictsuper__init__)selfparamsr   r   r   r   r   r   r   defaults	__class__s             M/var/www/html/ai-engine/env/lib/python3.11/site-packages/timm/optim/nadamw.pyr   zNAdamW.__init__'   s.    byy;r;;<<<czz<s<<===eAh$$$$$$$$M58MMNNNeAh$$$$$$$$M58MMNNNl""JLJJKKK%!
 
 
 	*****    c                 b   t                                          |           t          | j                                                  }t          |          dk    ot          j        |d         d                   }|s/|D ].}t          j        t          |d                             |d<   -d S d S )Nr   step)
r   __setstate__liststatevalueslentorch	is_tensortensorfloat)r   r%   state_valuesstep_is_tensorsr   s        r   r#   zNAdamW.__setstate__G   s    U###DJ--//00l++q0 %eoOF#7% 7% 	;! ; ;!Lqy)9)9::&			; 	;; ;r    c                    |                                   d}|5t          j                    5   |            }ddd           n# 1 swxY w Y   | j        D ]~}g }g }g }g }g }|d         \  }	}
|d         D ]#}|j        |                    |           |j        j        rt          d          |                    |j                   | j        |         }t          |          dk    r]t          j
        d          |d<   t          j        |t          j                  |d	<   t          j        |t          j                  |d
<   |                    |d	                    |                    |d
                    |                    |d                    %t          ||||||	|
|d         |d         |d         |d         |d                    |S )zPerforms a single optimization step.

            Args:
              closure (callable, optional): A closure that reevaluates the model
                  and returns the loss.
        Nr   r   z(NAdamW does not support sparse gradientsr   r   r"   )memory_formatexp_avg
exp_avg_sqr   r   r   r   r   beta1beta2r   r   r   r   r   ) _cuda_graph_capture_health_checkr(   enable_gradparam_groupsgradappend	is_sparseRuntimeErrorr%   r'   r*   
zeros_likepreserve_formatnadamw)r   closurelossgroupparams_with_gradgradsexp_avgsexp_avg_sqsstate_stepsr4   r5   pr%   s                r   r"   zNAdamW.stepP   s?    	--///"$$ ! !wyy! ! ! ! ! ! ! ! ! ! ! ! ! ! ! & +	 +	E!EHKK >LE58_ 2 26> ''***6# S&'QRRRQV$$$
1 u::??$)L$4$4E&M','7I^'_'_'_E)$*/*:1ELa*b*b*bE,'i 0111""5#6777""5=1111 ;">2%Lz* .     s   AA
A)r   r	   r
   r   FNFN)__name__
__module____qualname____doc__boolr   r   r#   r(   no_gradr"   __classcell__)r   s   @r   r   r      s         4 "&*$+ + + d^+ + + + + + +@; ; ; ; ; U]__; ; ; _; ; ; ; ;r    r   Fr   rD   rE   rF   rG   r   r   r4   r5   r   r   r   r   returnc                    t          d |D                       st          d          |d}|r&t          j                                        st
          }nt          } || |||||||	|
|||           dS )zcFunctional API that performs NAdamW algorithm computation.
      See NAdamW class for details.
    c              3   J   K   | ]}t          |t          j                  V  d S rI   )
isinstancer(   r   ).0ts     r   	<genexpr>znadamw.<locals>.<genexpr>   s.      @@qz!U\**@@@@@@r    zPAPI has changed, `state_steps` argument must contain a list of singleton tensorsNTr3   )allr<   r(   jitis_scripting_multi_tensor_nadamw_single_tensor_nadamw)r   rD   rE   rF   rG   r   r   r4   r5   r   r   r   r   funcs                 r   r?   r?      s    ( @@K@@@@@ "!" " 	"  %uy--// %#$D!     r    c                   t          |           D ]/\  }}|
s||         n||          }||         }||         }||         }|dz  }|                    d||z  z
             |                    |                              |d|z
             |                    |                              ||d|z
             |r|}dt	          j        ||          z
  }dt	          j        ||          z
  }||z  }|                                }|                                }|                    |                              |d|z
            }|                                ||z  z                      |	|z            }|	                    ||           |
                                }d||z  z
  }d||z  z
  }||z  }t          j        |          }|                    |                              |d|z
            }|                                |z                      |	          }|	                    |||            1d S )Nr   r   alpha)value)	enumeratemul_add_addcmul_r(   pownegsqrtmuladdcdiv_itemmath)r   rD   rE   rF   rG   r4   r5   r   r   r   r   r   iparamr9   r1   r2   step_tr"   bias_correction1bias_correction2	step_sizestep_size_negbias_correction2_sqrtdenoms                            r   r\   r\      sd     f%% /= /=5'6uQxxeAhY1+ ^
Q 	! 	

2\))*** 	U  QY 777''d!e)'DDD 	=D  !59UD#9#99 59UD#9#99--I%MMOOM$4$9$9$;$;! kk%((--d!e)-DDG__&&*?-*OPVVWZ]jWjkkENN7E****;;==D 5D=0 5D=0--I$(I.>$?$?! kk%((--d!e)-DDG__&&)>>DDSIIENN7E)N<<<<_/= /=r    c                   t          |           dk    rd S |r1t          d t          | |          D                       s
J d            |
r!t          j        t          |                    }d |D             }d |D             }d |D             }d | D             } t          j        |d           t          j        | d|z  z
             t          j        |           t          j        ||dz
  	           t          j        |           t          j        |||dz
             |rfd
|D             }fd|D             }t          j	        |d           t          j	        |d           t          j
        |           t          j
        |           t          j        |          }t          j        |           t          j
        |           t          j        |          }t          j        |          }t          j        ||dz
  	           t          j        |          }t          j        |t          j        ||                     t          j        ||	          }t          j        |           t          j        ||          }t          j        | ||           d S fd|D             }fd|D             }fd|D             }d |D             }t          j        |          }t          j        ||dz
  	           t          j        |          }t          j        ||           t          j        ||	          }t          j        | |||           d S )Nr   c              3   8   K   | ]\  }}|j         o|j         V  d S rI   )is_cuda)rU   rH   r"   s      r   rW   z'_multi_tensor_nadamw.<locals>.<genexpr>  s@       
 
+21dAI&$,
 
 
 
 
 
r    z@If capturable=True, params and state_steps must be CUDA tensors.c                 b    g | ],}t          j        |          rt          j        |          n|-S  r(   
is_complexview_as_realrU   xs     r   
<listcomp>z(_multi_tensor_nadamw.<locals>.<listcomp>  s8    PPPQe&6q&9&9@U"""qPPPr    c                 b    g | ],}t          j        |          rt          j        |          n|-S rz   r{   r~   s     r   r   z(_multi_tensor_nadamw.<locals>.<listcomp>  s8    VVV)9!)<)<C"1%%%!VVVr    c                 b    g | ],}t          j        |          rt          j        |          n|-S rz   r{   r~   s     r   r   z(_multi_tensor_nadamw.<locals>.<listcomp>  s8    \\\1E,<Q,?,?F5%a(((Q\\\r    c                 b    g | ],}t          j        |          rt          j        |          n|-S rz   r{   r~   s     r   r   z(_multi_tensor_nadamw.<locals>.<listcomp>  s8    RRRau'7':':Ae ###RRRr    r   r_   c                 :    g | ]}t          j        |          S rz   r(   rf   rU   r"   r4   s     r   r   z(_multi_tensor_nadamw.<locals>.<listcomp>/  %    KKKtEIeT22KKKr    c                 :    g | ]}t          j        |          S rz   r   rU   r"   r5   s     r   r   z(_multi_tensor_nadamw.<locals>.<listcomp>0  r   r    c                 D    g | ]}d |                                 z  z
  S r   rk   r   s     r   r   z(_multi_tensor_nadamw.<locals>.<listcomp>M  +    MMMA 44MMMr    c                 D    g | ]}d |                                 z  z
  S r   r   r   s     r   r   z(_multi_tensor_nadamw.<locals>.<listcomp>N  r   r    c                      g | ]
}|z  d z  S )rz   )rU   bcr   s     r   r   z(_multi_tensor_nadamw.<locals>.<listcomp>P  s!    ???b2g^???r    c                 6    g | ]}t          j        |          S rz   )rl   rh   )rU   r   s     r   r   z(_multi_tensor_nadamw.<locals>.<listcomp>R  s      J J J22 J J Jr    )r'   rX   zipr(   _foreach_negtuple_foreach_add__foreach_mul__foreach_addcmul__foreach_sub__foreach_neg__foreach_div_foreach_reciprocal__foreach_sqrt_foreach_mul_foreach_div__foreach_add_foreach_addcdiv_)r   rD   rE   rF   rG   r4   r5   r   r   r   r   r   rp   rq   rr   rt   exp_avg_sq_sqrteps_over_step_sizeru   s        ```           r   r[   r[     s    6{{a N 
 
69&+6N6N
 
 
 
 
 	N 	NM	N 	N 	N  1"5<<00PP%PPPEVVXVVVH\\P[\\\KRR6RRRF 
Q''' 
B$5 5666 
%(((	%q5y9999	U+++	Kq5yAAA 0DKKKK{KKKKKKK{KKK,a000,a000,---,--- &'7<<	"9---I&&& % 34D E E %h66He1u9====-k::U/0EyQQ	
 	
 	
 #/	3??"#5666"?4FGG%88888MMMMMMMMMMMMMM????.>???	 J J9I J J J %h66He1u9====-k::O-BCCC"?C88%CCCCCr    )NF)rM   rl   typingr   r   r(   r   optim	Optimizerr   rN   r+   r?   r\   r[   rz   r    r   <module>r      s     ! ! ! ! ! ! ! !       } } } } }U[" } } }L #' - -V-F|- v,- &\	-
 &\- $- - - - - - - - 
- - - -`?=V?=F|?= v,?= &\	?=
 &\?= ?= ?= ?= ?= ?= ?= ?= ?= ?= ?=D\DV\DF|\D v,\D &\	\D
 &\\D \D \D \D \D \D \D \D \D \D \D \D \Dr    