
    Χg_f              &          d Z ddlmZmZmZmZmZ ddlZddlmZ ddl	m
Z
mZmZmZmZmZmZmZmZmZmZmZmZmZ ddgZ G d	 de          Zd
de de de
 de d	z   e_         dee         dee         dee         dee         dee         dee         dededededededededededef"dZdee         dee         dee         dee         dee         dee         dededededededededededef"d Z ee!          	 	 	 	 	 	 d%dee         dee         dee         dee         dee         dee         ded#ee         dedededededededededef$d$            ZdS )&z'Implementation for the NAdam algorithm.    )castListOptionalTupleUnionN)Tensor   )_capturable_doc_default_to_fused_or_foreach_differentiable_doc_disable_dynamo_if_unsupported_foreach_doc!_get_capturable_supported_devices_get_scalar_dtype
_get_value_maximize_doc_stack_if_compiling_use_grad_for_differentiable_view_as_real	OptimizerParamsTNAdamnadamc                        e Zd Z	 	 	 	 	 	 ddddddd	ed
eeef         deeef         dedededede	e         dededef fdZ
 fdZd Zedd            Z xZS )r   Mb`?g?g+?:0yE>r   Mbp?FN)foreachmaximize
capturabledifferentiableparamslrbetasepsweight_decaymomentum_decaydecoupled_weight_decayr   r    r!   r"   c                j   t          |t                    r'|                                dk    rt          d          d|k    st          d|           d|k    st          d|           d|d         cxk    rdk     sn t          d|d                    d|d         cxk    rdk     sn t          d	|d                    d|k    st          d
|           d|k    st          d|           t	          |||||||	||
|
  
        }t                                          ||           d S )Nr	   zTensor lr must be 1-element        zInvalid learning rate: zInvalid epsilon value: r         ?z#Invalid beta parameter at index 0: z#Invalid beta parameter at index 1: zInvalid weight_decay value: zInvalid momentum_decay value: )
r$   r%   r&   r'   r(   r)   r    r   r!   r"   )
isinstancer   numel
ValueErrordictsuper__init__)selfr#   r$   r%   r&   r'   r(   r)   r   r    r!   r"   defaults	__class__s                M/var/www/html/ai-engine/env/lib/python3.11/site-packages/torch/optim/nadam.pyr2   zNAdam.__init__   s    b&!! 	<bhhjjAoo:;;;byy;r;;<<<czz<s<<===eAh$$$$$$$$M58MMNNNeAh$$$$$$$$M58MMNNNl""JLJJKKKn$$NnNNOOO%)#9!)
 
 
 	*****    c                    t                                          |           | j        D ]}|                    dd           |                    dd            |                    dd           |                    dd           |                    dd           |d         D ],}| j                            |g           }t          |          dk    rt          j        |d	                   sjt          |d	                   }|d         r(t          j
        |t                      |j        
          n!t          j
        |t                                |d	<   t          j        |d                   s]|d         }|d         r(t          j
        |t                      |j        
          n!t          j
        |t                                |d<   .d S )Nr    Fr   r!   r"   r)   r#   r   stepdtypedevicer;   
mu_product)r1   __setstate__param_groups
setdefaultstategetlentorch	is_tensorfloattensorr   r<   )r3   rB   grouppp_statestep_valmu_prod_valr5   s          r6   r?   zNAdam.__setstate__J   s   U###& 	 	EZ///Y---\5111-u5555u===8_  *..B//w<<1$$ ?76?;; #(#9#9
  %\2SEL (0A0C0CAH    "'h>O>Q>Q!R!R!R   !?7<+@AA &-l&;
  %\2VEL +3D3F3Fqx    "'kARATAT!U!U!U  -	 	r7   c                    d}|d         D ]}	|	j         |t          j        |	          z  }|                    |	           |	j         j        rt          d          |                    |	j                    | j        |	         }
t          |
          dk    r|d         r(t          j        dt                      |	j
                  n!t          j        dt                      	          |
d
<   |d         r(t          j        dt                      |	j
                  n!t          j        dt                      	          |
d<   t          j        |	t          j                  |
d<   t          j        |	t          j                  |
d<   |                    |
d                    |                    |
d                    |                    |
d                    |                    |
d
                    |S )NFr#   z'NAdam does not support sparse gradientsr   r!    r:   r+   r=   r9   r,   r>   )memory_formatexp_avg
exp_avg_sq)gradrE   
is_complexappend	is_sparseRuntimeErrorrB   rD   zerosr   r<   rH   ones
zeros_likepreserve_format)r3   rI   params_with_gradgradsexp_avgsexp_avg_sqsmu_productsstate_stepshas_complexrJ   rB   s              r6   _init_groupzNAdam._init_grouph   s    x $	2 $	2Av!u/222 ''***6# R&'PQQQQV$$$
1u::?? !.JB.?.A.A!(SSSS"\#5F5H5HIII &M !.J
2->-@-@RRRR"\#5F5H5HIII ,' (-'7)>( ( (E)$ +0*:)>+ + +E,' i 0111""5#6777""5#6777""5=111r7   c                    |                                   d}|5t          j                    5   |            }ddd           n# 1 swxY w Y   | j        D ]}g }g }g }g }g }g }	t	          t
          t          t          f         |d                   \  }
}|                     |||||||	          }t          ||||||	|
||d         |d         |d         |d         |d         |d         |d	         |d
         |d         |           |S )zPerform a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr%   r$   r'   r(   r&   r    r)   r   r!   r"   )beta1beta2r$   r'   r(   r&   r    r)   r   r!   r"   rb   )	 _cuda_graph_capture_health_checkrE   enable_gradr@   r   r   rG   rc   r   )r3   closurelossrI   r\   r]   r^   r_   r`   ra   re   rf   rb   s                r6   r9   z
NAdam.step   s    	--///"$$ ! !wyy! ! ! ! ! ! ! ! ! ! ! ! ! ! ! & &	 &	E-/"$E%'H(*K(*K(*KeUl 3U7^DDLE5**  K  ;">2$%56%Lz*',-E'Fi( .$%56'%    * s   AA
A)r   r   r   r   r   FN)__name__
__module____qualname__r   r   rG   r   r   boolr   r2   r?   rc   r   r9   __classcell__)r5   s   @r6   r   r      s;        $(%1 $',)+ #' $)+ )+ )+)+ %- )+ UE\"	)+
 )+ )+ )+ !%)+ $)+ )+ )+ )+ )+ )+ )+ )+ )+V    <0 0 0d "6 6 6 "!6 6 6 6 6r7   a  Implements NAdam algorithm.

    .. math::
       \begin{aligned}
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{input}      : \gamma_t \text{ (lr)}, \: \beta_1,\beta_2 \text{ (betas)},
                \: \theta_0 \text{ (params)}, \: f(\theta) \text{ (objective)}                   \\
            &\hspace{13mm} \: \lambda \text{ (weight decay)}, \:\psi \text{ (momentum decay)}    \\
            &\hspace{13mm} \: \textit{decoupled\_weight\_decay}, \:\textit{maximize}             \\
            &\textbf{initialize} :  m_0 \leftarrow 0 \text{ ( first moment)},
                v_0 \leftarrow 0 \text{ ( second moment)}                                 \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do}                         \\
            &\hspace{5mm}\textbf{if} \: \textit{maximize}:                                       \\
            &\hspace{10mm}g_t           \leftarrow   -\nabla_{\theta} f_t (\theta_{t-1})         \\
            &\hspace{5mm}\textbf{else}                                                           \\
            &\hspace{10mm}g_t           \leftarrow   \nabla_{\theta} f_t (\theta_{t-1})          \\
            &\hspace{5mm} \theta_t \leftarrow \theta_{t-1}                                       \\
            &\hspace{5mm} \textbf{if} \: \lambda \neq 0                                          \\
            &\hspace{10mm}\textbf{if} \: \textit{decoupled\_weight\_decay}                       \\
            &\hspace{15mm} \theta_t \leftarrow \theta_{t-1} - \gamma \lambda \theta_{t-1}                    \\
            &\hspace{10mm}\textbf{else}                                                          \\
            &\hspace{15mm} g_t \leftarrow g_t + \lambda \theta_{t-1}                             \\
            &\hspace{5mm} \mu_t \leftarrow \beta_1 \big(1 - \frac{1}{2}  0.96^{t \psi} \big)     \\
            &\hspace{5mm} \mu_{t+1} \leftarrow \beta_1 \big(1 - \frac{1}{2} 0.96^{(t+1)\psi}\big)\\
            &\hspace{5mm}m_t           \leftarrow   \beta_1 m_{t-1} + (1 - \beta_1) g_t          \\
            &\hspace{5mm}v_t           \leftarrow   \beta_2 v_{t-1} + (1-\beta_2) g^2_t          \\
            &\hspace{5mm}\widehat{m_t} \leftarrow \mu_{t+1} m_t/(1-\prod_{i=1}^{t+1}\mu_i)\\[-1.ex]
            & \hspace{11mm} + (1-\mu_t) g_t /(1-\prod_{i=1}^{t} \mu_{i})                         \\
            &\hspace{5mm}\widehat{v_t} \leftarrow   v_t/\big(1-\beta_2^t \big)                   \\
            &\hspace{5mm}\theta_t \leftarrow \theta_t - \gamma \widehat{m_t}/
                \big(\sqrt{\widehat{v_t}} + \epsilon \big)                                       \\
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
            &\bf{return} \:  \theta_t                                                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
       \end{aligned}

    For further details regarding the algorithm we refer to `Incorporating Nesterov Momentum into Adam`_.
    a  
    Args:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float, Tensor, optional): learning rate (default: 2e-3)
        betas (Tuple[float, float], optional): coefficients used for computing
            running averages of gradient and its square (default: (0.9, 0.999))
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-8)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        momentum_decay (float, optional): momentum momentum_decay (default: 4e-3)
        decoupled_weight_decay (bool, optional): whether to use decoupled weight
            decay as in AdamW to obtain NAdamW (default: False)
        z	
        z

    .. _Incorporating Nesterov Momentum into Adam:
        https://openreview.net/forum?id=OM0jvwB8jIp57ZJjtNEZ
    .. _Decoupled Weight Decay Regularization:
        https://arxiv.org/abs/1711.05101

    r#   r]   r^   r_   r`   ra   re   rf   r$   r'   r(   r&   r)   r    r!   r"   rb   c          
      
   t          |           D ]\  }}|s||         n||          }||         }||         }||         }||         }t          j        |          rPt          j        |          }t          j        |          }t          j        |          }t          j        |          }t          j                                        sZ|rXt                      }|j        j        |j        j        cxk    r|j        j        k    rn n|j        j        |v sJ d| d            |dz  }|r|}nt          |          }d||z  z
  }|	dk    r5|r|
                    d||	z  z
             n|                    ||	          }|ddd||
z  z  z  z
  z  }|ddd|dz   |
z  z  z  z
  z  }||z  }|                    |d|z
             |
                    |                              ||d|z
  	           |                    |                                          }|s|ri|                    |          }||z  }|| d|z
  z  d|z
  z  z  }|| |z  d|z
  z  z  }|                    ||           |                    ||           vt          |          |z  }|                    |           |                    ||| d|z
  z  dt          |          z
  z  	           |                    ||| |z  d|z
  z  	           d S )
NzVIf capturable=True, params, mu_products and state_steps must be on supported devices: .r	   r   alphar,         ?Q?)value)	enumeraterE   rT   view_as_real_utilsis_compilingr   r<   typer   mul_addlerp_addcmul_divsqrtaddcdiv_add_)r#   r]   r^   r_   r`   ra   re   rf   r$   r'   r(   r&   r)   r    r!   r"   rb   iparamrS   rQ   rR   r>   step_tcapturable_supported_devicesr9   bias_correction2mumu_nextdenommu_product_nexts                                  r6   _single_tensor_nadamr     s   ( f%% G G5'6uQxxeAhY1+ ^
 ^
QE"" 	8&u--E%d++D(11G+J77J |((** 	z 	+L+N+N(!Z%6%;QQQQv}?QQQQQQL%)EEEEI)EI I I FEE 	! 	&DDf%%Dud{?1% ;

1rL001111xx\x:: cC4D>,A#BCCD3$(n1L(M!NNO 	b
 	dAI&&&''d!e)'DDD/005577 	Z 	IIcNNE )72OB3#(+sZ/?@AD"w#2G!HIGNN4'''NN7E****(44w>OJJsOOONNeRC38$4j>T>T8T$U     NNsW}9N&O     KG Gr7   c                  
( t          |           dk    rd S |r
J d            t          j                                        sJ|rHt	          d          (t          (fdt          | ||          D                       sJ d( d            t          j        | |||||g          }|	                                D ]1\  \  }}}}}}}t          t          t                   |          }t          t          t                   |          }t          t          t                   |          }t          t          t                   |          }t          t          t                   |          }t          t          t                   |          }|rt          ||||           |rt          j        |          }t          j                                        s9|d         j        r,t          j        |t          j        dd	
          d           nt          j        |d           |	dk    rO|rt          j        |d|	z  z
             n1|rt          j        |||	           nt          j        |||	          }t          j        ||dz
             t          j        |           t          j        |||dz
             t          j        |          }|r&t          j        |
          } t          j        d|           }!t          j        |!d           t          j        |!d           t          j        |!           t          j        | 
           t          j        d|           }"t          j        |"d           t          j        |"d           t          j        |"           ~ t          j        |          }#t          j        |#d           t          j        |#           t          j        |#           n,fd|D             }#
fd|D             }!
fd|D             }"t          j        ||!           t          j        ||#           t          j        ||           ~#|rt          j        |!d           t          j        |!           t          j        |d          }$t          j        |$           t          j        |!|$           |!}%~$t          j        ||"          }$t          j        |"           t          j        |$d           t          j        |"|$           |"}&~$t          j        |%|          }'t          j        |'|&|           t          j        ||'|           t?          fdt          ||!          D                       }%t?          fdt          ||"          D                       }&t          j        ||||%           t          j        ||||&           3d S )Nr   z#_foreach ops don't support autogradF)supports_xlac              3      K   | ]D\  }}}|j         j        |j         j        cxk    o|j         j        k    nc o|j         j        v V  Ed S rk   )r<   r|   ).0rJ   mpr9   r   s       r6   	<genexpr>z&_multi_tensor_nadam.<locals>.<genexpr>  sz       
 
 2t HMRY^????t{/????? >!==
 
 
 
 
 
r7   zWIf capturable=True, params, mu_products, and state_steps must be on supported devices: rr   r,   cpu)r<   rs   r	   rv   g      c                 @    g | ]}d t          |          z  z
  dz  S )r	   ru   r   )r   r9   rf   s     r6   
<listcomp>z'_multi_tensor_nadam.<locals>.<listcomp>  s=     $ $ $;?Uj....36$ $ $r7   c           	      L    g | ] }d ddt          |          z  z  z  z
  z  !S )r,   ru   rv   r   r   r9   re   r(   s     r6   r   z'_multi_tensor_nadam.<locals>.<listcomp>  sJ        sdz$/?/?./P&QRRS  r7   c           	      R    g | ]#}d ddt          |          dz   z  z  z  z
  z  $S )r,   ru   rv   r	   r   r   s     r6   r   z'_multi_tensor_nadam.<locals>.<listcomp>  sU         *T*:*:Q*>.)P QRRT  r7   c                 l    g | ]0\  }}t                    d |z
  z  d t          |          z
  z  dz  1S r,   r   )r   r>   r   r$   s      r6   r   z'_multi_tensor_nadam.<locals>.<listcomp>+  sT       &
B  ^^sRx0C*Z:P:P4PQUWW  r7   c                 l    g | ]0\  }}t                    |z  d t          |          |z  z
  z  dz  1S r   r   )r   r>   r   r$   s      r6   r   z'_multi_tensor_nadam.<locals>.<listcomp>1  sb        ,
G #2!"J!7!7'!AAC   r7   ) rD   rE   rz   r{   r   allzipr   "_group_tensors_by_device_and_dtypevaluesr   r   r   r   _foreach_negis_cpu_foreach_add_rH   _foreach_mul__foreach_add_foreach_lerp__foreach_addcmul__foreach_sqrt_foreach_mul_foreach_pow_foreach_sub__foreach_neg__foreach_sqrt__foreach_div__foreach_sub_foreach_addcdiv_r   ))r#   r]   r^   r_   r`   ra   re   rf   r$   r'   r(   r&   r)   r    r!   r"   rb   grouped_tensorsgrouped_params_grouped_grads_grouped_exp_avgs_grouped_exp_avg_sqs_grouped_mu_products_grouped_state_steps__grouped_paramsgrouped_gradsgrouped_exp_avgsgrouped_exp_avg_sqsgrouped_mu_productsgrouped_state_stepsexp_avg_sq_sqrtexponentmusmu_nextsbias_correction_sqrtr   step_size_gradsstep_size_expavg	numeratorr   s)         ``` `                             @r6   _multi_tensor_nadamr   u  s*   ( 6{{aDDDDDD <$$&& E: E'H(
 (
 (
$  
 
 
 
  #6;DD
 
 
 
 
 	E 	E E  fB  E  E  E		E 	E 	E  B	+{KH O ""$$e e 		 	d6lO<<T&\>::V.?@@"4<1EFF"4<1EFF"4<1EFF  	/?AT    	>!.}==M |((** 	8/B1/E/L 	8#U\#e%D%D%DC      3Q7771% #NA\8I4IJJJJ  '%~\     %*$6%~\% % %M
 	-}a%iHHH/777q5y	
 	
 	
  -.ABB
  #	)*=~NNH$T844CT***S)))U+++ .999)$99H$///#...%000 #(#5e=P#Q#Q  4c::: 4555 !56666$ $ $ $CV$ $ $     /  C     0  H 	/555O-ABBBOS111 ! 8	S)))R(((&':C@@E&&&U+++!O &':HEEE"--- s+++%000' *?MJJI#I/?AQRRR #NIOOOO1   *-.A3*G*G   O  3    033F/Q/Q  
  
  #   # 0/CS   Ge er7   )single_tensor_fnFr   c                   t          d |D                       st          d          t          d |D                       st          d          |t          | |	d          \  }}|r-t          j                                        rt          d          |r&t          j                                        st          }nt          } || |||||||||||||||	|
	           dS )
zpFunctional API that performs NAdam algorithm computation.

    See :class:`~torch.optim.NAdam` for details.
    c              3   J   K   | ]}t          |t          j                  V  d S rk   r-   rE   r   r   ts     r6   r   znadam.<locals>.<genexpr>`  .      @@qz!U\**@@@@@@r7   zPAPI has changed, `state_steps` argument must contain a list of singleton tensorsc              3   J   K   | ]}t          |t          j                  V  d S rk   r   r   s     r6   r   znadam.<locals>.<genexpr>e  r   r7   zPAPI has changed, `mu_products` argument must contain a list of singleton tensorsNF)	use_fusedz6torch.jit.script not supported with foreach optimizers)re   rf   r$   r'   r(   r    r)   r&   r!   r"   rb   )r   rW   r   rE   jitis_scriptingr   r   )r#   r]   r^   r_   r`   ra   r)   r   r!   r"   rb   r    re   rf   r$   r'   r(   r&   r   funcs                       r6   r   r   D  sC   8 @@K@@@@@ 
^
 
 	
 @@K@@@@@ 
^
 
 	
 1Ne
 
 

7  U59))++ USTTT $uy--// $"#D!%5%#     r7   )FNFFFF)__doc__typingr   r   r   r   r   rE   r   	optimizerr
   r   r   r   r   r   r   r   r   r   r   r   r   r   __all__r   rG   ro   r   r   r   rO   r7   r6   <module>r      s   / . 5 5 5 5 5 5 5 5 5 5 5 5 5 5                                      $ G
s s s s sI s s sn&N 
  
  
   
!  O> F[L[<[ 6l[ f	[
 f[ f[ [ [ 	[ [ [ 
[ ![ [  ![" #[$ %[ [ [ [|LLL<L 6lL f	L
 fL fL L L 	L L L 
L !L L  !L" #L$ %L L L L^  1EFFF $)" D DLD<D 6lD f	D
 fD fD !D d^D D D D D  !D" #D$ 	%D& 'D( )D* 
+D D D GFD D Dr7   