
    קg                         U d dl mZmZmZmZ d dlZd dlmc mZ	 d dlm
Z
 g Zee         ed<   ej        j         G d d                      ZdS )    )DictListOptionalTupleN)Tensor__all__c                       e Zd Z	 	 	 	 	 	 	 	 	 ddee         dedeeef         d	ed
edededededefdZdede	e         fdZ
dee	e                  fdZdS )_FunctionalAdamWMbP?g?g+?:0yE>{Gz?Fparamslrbetasepsweight_decayamsgradmaximizeforeachfused_allow_empty_param_listc                    d|k    st          d|           d|k    st          d|           d|d         cxk    rdk     sn t          d|d                    d|d         cxk    rdk     sn t          d|d                    d|k    st          d	|           |||d         |d         |d
| _        || _        || _        || _        |	| _        t          j                            t          t          j
        t          t          t          j
        f         f         i           | _        t          |          dk    r|
st          d          d|i| _        d S )N        zInvalid learning rate: zInvalid epsilon value: r   g      ?z#Invalid beta parameter at index 0:    z#Invalid beta parameter at index 1: zInvalid weight_decay value: )r   r   beta1beta2r   z%optimizer got an empty parameter listr   )
ValueErrordefaultsr   r   r   r   torchjitannotater   r   strstatelenparam_group)selfr   r   r   r   r   r   r   r   r   r   s              d/var/www/html/ai-engine/env/lib/python3.11/site-packages/torch/distributed/optim/functional_adamw.py__init__z_FunctionalAdamW.__init__   s    byy;r;;<<<czz<s<<===eAh$$$$$$$$M58MMNNNeAh$$$$$$$$M58MMNNNl""JLJJKKK 1X1X(
 
  
Y''U\4U\@Q;R-R(SUWXX
v;;!$;DEEE %f-    paramgradc                 &   g }g }g }g }g }g }t          j        |          }	|*|                    |           |                    |           || j        vri | j        |<   | j        |         }
t          j        d          |
d<   t          j        |t           j                  |
d<   t          j        |t           j                  |
d<   | j        r#t          j        |t           j                  |
d<   | j        |         }
|                    |
d                    |                    |
d                    | j        r|                    |
d                    |                    |
d                    t          j                    5  t          j
        ||||||| j        | j        | j        d         | j        d         | j        d	         | j        d
         | j        d         | j        | j        d d |	           d d d            d S # 1 swxY w Y   d S )Nr   stepmemory_formatexp_avg
exp_avg_sqmax_exp_avg_sqr   r   r   r   r   r   r   r   r   r   r   r   r   r   
grad_scale	found_infhas_complex)r    
is_complexappendr$   tensor
zeros_likepreserve_formatr   no_gradFadamwr   r   r   r   )r'   r+   r,   params_with_gradgradsexp_avgsexp_avg_sqsmax_exp_avg_sqsstate_stepsr7   r$   s              r(   
step_paramz_FunctionalAdamW.step_paramC   st   $&&u--##E***LL
"" "DJuJu%E!L--E&M$/U%:     E) #("2U%:# # #E, | */*:)>+ + +&' 
5!i()))5.///< 	<""5)9#:;;;5=)))]__ 	 	G mG,mG,=&!]>:M%(j'%   	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   A2HH
H
	gradientsc                 >   | j         d         }g }g }g }g }g }g }t          |          t          |          k    r6t          ddt          |           dz   dt          |           z             d}	t          | j         d         |          D ]q\  }
}|h|	t	          j        |
          z  }	|                    |
           |                    |           |
| j        vri | j        |
<   | j        |
         }t	          j        d          |d<   t	          j	        |
t          j
        	          |d
<   t	          j	        |
t          j
        	          |d<   | j        r#t	          j	        |
t          j
        	          |d<   | j        |
         }|                    |d
                    |                    |d                    | j        r|                    |d                    |                    |d                    st	          j                    5  t          j        ||||||| j        | j        | j        d         | j        d         | j        d         | j        d         | j        d         | j        | j        d d |	           d d d            d S # 1 swxY w Y   d S )Nr   zEthe gradients passed in does not equal to the size of the parameters!zParams length: z. zGradients length: Fr   r.   r/   r1   r2   r3   r   r   r   r   r   r4   )r&   r%   r   zipr    r8   r9   r$   r:   r;   r<   r   r=   r>   r?   r   r   r   r   )r'   rG   r   r@   rA   rB   rC   rD   rE   r7   r+   gradientr$   s                r(   r.   z_FunctionalAdamW.step   s   !(+$&v;;#i..((W3CKK33347s9~~778   "4#3H#=yII  	2  	2OE8#u/666 ''...X&&&
**(*DJu% Ju-E$)L$5$5E&M','7U-B( ( (E)$ +0*:U-B+ + +E,' | 272B!1F3 3 3./ 
5)i 0111""5#6777< D#**51A+BCCC""5=111]__ 	 	G mG,mG,=&!]>:M%(j'%   	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   A2JJJN)	r   r   r   r   FFFFF)__name__
__module____qualname__r   r   floatr   boolr)   r   rF   r.    r*   r(   r
   r
      s       
 %1"(-*. *.V*. *. UE\"	*.
 *. *. *. *. *. *. "&*. *. *. *.X; ;hv.> ; ; ; ;zGd8F#34 G G G G G Gr*   r
   )typingr   r   r   r   r    torch.optim._functionaloptim_functionalr>   r   r   r#   __annotations__r!   scriptr
   rP   r*   r(   <module>rW      s    . . . . . . . . . . . . .  # # # # # # # # #       c    q q q q q q q q q qr*   