
    ΧgC                      x   d dl mZmZmZmZmZ d dlZd dlmZ ddlm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZ ddgZ G d de          Zd	d
e de de de	 d	z   e_        dee         dee         dee         dee         dee         dededededededededefdZdee         dee         dee         dee         dee         dededededededededefdZ ee          	 	 	 	 	 d!dee         dee         dee         dee         dee         dee         dededededededededefd             ZdS )"    )castListOptionalTupleUnionN)Tensor   )_capturable_doc_default_to_fused_or_foreach_differentiable_doc_disable_dynamo_if_unsupported_foreach_doc!_get_capturable_supported_devices_get_scalar_dtype
_get_value_maximize_doc_use_grad_for_differentiable_view_as_real	OptimizerParamsTAdamaxadamaxc                        e Zd Z	 	 	 	 	 dddddded	eeef         d
eeef         dededee	         de	de	de	f fdZ
 fdZd Zedd            Z xZS )r   Mb`?g?g+?:0yE>r   NF)maximizedifferentiable
capturableparamslrbetasepsweight_decayforeachr   r   r   c          
      6   t          |t                    r'|                                dk    rt          d          d|k    st          d|           d|k    st          d|           d|d         cxk    rdk     sn t          d|d                    d|d         cxk    rdk     sn t          d	|d                    d|k    st          d
|           t	          ||||||||	          }
t                                          ||
           d S )Nr	   zTensor lr must be 1-element        zInvalid learning rate: zInvalid epsilon value: r         ?z#Invalid beta parameter at index 0: z#Invalid beta parameter at index 1: zInvalid weight_decay value: )r!   r"   r#   r$   r%   r   r   r   )
isinstancer   numel
ValueErrordictsuper__init__)selfr    r!   r"   r#   r$   r%   r   r   r   defaults	__class__s              N/var/www/html/ai-engine/env/lib/python3.11/site-packages/torch/optim/adamax.pyr.   zAdamax.__init__   sb    b&!! 	<bhhjjAoo:;;;byy;r;;<<<czz<s<<===eAh$$$$$$$$M58MMNNNeAh$$$$$$$$M58MMNNNl""JLJJKKK%)!	
 	
 	
 	*****    c                    t                                          |           | j        D ]}|                    dd            |                    dd           |                    dd           |                    dd           |d         D ]}| j                            |g           }t          |          dk    rt          j        |d                   sjt          |d                   }|d         r(t          j
        |t                      |j        	          n!t          j
        |t                      
          |d<   d S )Nr%   r   Fr   r   r    r   stepdtypedevicer7   )r-   __setstate__param_groups
setdefaultstategetlentorch	is_tensorfloattensorr   r8   )r/   r=   grouppp_statestep_valr1   s         r2   r:   zAdamax.__setstate__C   sO   U###& 	 	EY---Z///-u555\51118_ 
 
*..B//w<<1$$U_WV_-M-M$$WV_55H
 !.O$,=,?,?    #\(:K:M:MNNN FO	
	 	r3   c                    d}|d         D ]x}|j         |t          j        |          z  }|                    |           |j         j        rt          d          |                    |j                    | j        |         }	t          |	          dk    r|d         r(t          j        dt                      |j
                  n!t          j        dt                      	          |	d
<   t          j        |t          j                  |	d<   t          j        |t          j                  |	d<   |                    |	d                    |                    |	d                    |                    |	d
                    z|S )NFr    z(Adamax does not support sparse gradientsr   r    r6   r'   r9   r5   )memory_formatexp_avgexp_inf)gradr@   
is_complexappend	is_sparseRuntimeErrorr=   r?   zerosr   r8   rC   
zeros_likepreserve_format)
r/   rD   params_with_gradgradsexp_avgsexp_infsstate_stepshas_complexrE   r=   s
             r2   _init_groupzAdamax._init_groupV   s    x 	. 	.Av~5+A...K##A&&&v O"#MNNNLL   JqME 5zzQ \*FEK*;*=*=ahOOOOc1B1D1DEEE f
 $)#3U%:$ $ $i  $)#3U%:$ $ $i  OOE),---OOE),---uV}----r3   c                    |                                   d}|5t          j                    5   |            }ddd           n# 1 swxY w Y   | j        D ]}g }g }g }g }g }|d         \  }	}
|d         }|d         }|d         }|d         }|d         }|d         }|d	         }|                     ||||||          }t          |||||||	|
|||||||
           |S )zPerforms a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr"   r#   r!   r$   r%   r   r   r   )
r#   beta1beta2r!   r$   r%   r   r   r   rZ   ) _cuda_graph_capture_health_checkr@   enable_gradr;   r[   r   )r/   closurelossrD   rU   rV   rW   rX   rY   r]   r^   r#   r!   r$   r%   r   r   r   rZ   s                      r2   r5   zAdamax.stepy   s    	--///"$$ ! !wyy! ! ! ! ! ! ! ! ! ! ! ! ! ! ! & $	 $	E-/"$E%'H%'H(*K >LE5,CtB 0LI&GZ(H"#34N|,J**'(K K  )!-%'    $ s   AA
A)r   r   r   r   NN)__name__
__module____qualname__r   r   rB   r   r   r   boolr.   r:   r[   r   r5   __classcell__)r1   s   @r2   r   r      s!        $(%1"&$+ $ $+ $+ $+$+ %- $+ UE\"	$+
 $+ $+ $$+ $+ $+ $+ $+ $+ $+ $+ $+L    &! ! !F "4 4 4 "!4 4 4 4 4r3   a  Implements Adamax algorithm (a variant of Adam based on infinity norm).

    .. math::
       \begin{aligned}
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{input}      : \gamma \text{ (lr)}, \beta_1, \beta_2
                \text{ (betas)},\theta_0 \text{ (params)},f(\theta) \text{ (objective)},
                \: \lambda \text{ (weight decay)},                                                \\
            &\hspace{13mm}    \epsilon \text{ (epsilon)}                                          \\
            &\textbf{initialize} :  m_0 \leftarrow 0 \text{ ( first moment)},
                u_0 \leftarrow 0 \text{ ( infinity norm)}                                 \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do}                         \\
            &\hspace{5mm}g_t           \leftarrow   \nabla_{\theta} f_t (\theta_{t-1})           \\
            &\hspace{5mm}if \: \lambda \neq 0                                                    \\
            &\hspace{10mm} g_t \leftarrow g_t + \lambda  \theta_{t-1}                            \\
            &\hspace{5mm}m_t      \leftarrow   \beta_1 m_{t-1} + (1 - \beta_1) g_t               \\
            &\hspace{5mm}u_t      \leftarrow   \mathrm{max}(\beta_2 u_{t-1}, |g_{t}|+\epsilon)   \\
            &\hspace{5mm}\theta_t \leftarrow \theta_{t-1} - \frac{\gamma m_t}{(1-\beta^t_1) u_t} \\
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
            &\bf{return} \:  \theta_t                                                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
       \end{aligned}

    For further details regarding the algorithm we refer to `Adam: A Method for Stochastic Optimization`_.
    a  
    Args:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float, Tensor, optional): learning rate (default: 2e-3)
        betas (Tuple[float, float], optional): coefficients used for computing
            running averages of gradient and its square
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-8)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        z	
        zd

    .. _Adam\: A Method for Stochastic Optimization:
        https://arxiv.org/abs/1412.6980

    r    rV   rW   rX   rY   r#   r]   r^   r!   r$   r   r   r   rZ   c       	         H   t          |           D ]\  }}||         }|
s|n| }||         }||         }||         }t          j                                        sF|rDt	                      }|j        j        |j        j        k    r|j        j        |v sJ d| d            |dz  }|	dk    r|                    ||	          }t          j        |          rPt          j	        |          }t          j	        |          }t          j	        |          }t          j	        |          }|
                    |d|z
             |sPt          j        |                    |          |                                                    |          |           nt          j        |                    |                              d          |                                                    |                              d          gd          }|                    t          j        |dd                     |r:||z  dz
  }|                    |           ||z  }|                    ||           ]d|t-          |          z  z
  }||z  }|                    ||| 	           d S )
NIIf capturable=True, params and state_steps must be on supported devices: .r	   r   alpha)outF)keepdim)value)	enumerater@   _utilsis_compilingr   r8   typeaddrN   view_as_reallerp_maximummul_absadd_cat	unsqueeze
unsqueeze_copy_amaxdiv_addcdiv_r   )r    rV   rW   rX   rY   r#   r]   r^   r!   r$   r   r   r   rZ   iparamrM   rK   rL   step_tcapturable_supported_devicesnorm_bufneg_bias_correctiondenombias_correctionclrs                             r2   _single_tensor_adamaxr      s   " f%% 69 695Qx#.tt$1+1+Q |((** 	{z 	{+L+N+N(!V]%777L%)EEEEz[wzzz FEE 	!188E866DE"" 	2&u--E%d++D(11G(11G 	dAI&&& 	BMU##

$$     ye$$..q11488::??33G3G3R3RST3U3UV H MM%*Xq%@@@AAA 	9 #(-!"3$$R(((11ENN7E****%:f+=+="==O&CNN7GC4N8888m69 69r3   c       	            |r
J d            t          |           dk    rd S t          j                                        sI|rGt	          d          t          fdt          | |          D                       sJ d d            t          j        | ||||g          }|	                                D ]\  \  }}}}}}t          t          t                   |          }t          t          t                   |          }t          t          t                   |          }t          t          t                   |          }t          t          t                   |          }|rt          ||||           |
rt          j        |          }t          j                                        s9|d         j        r,t          j        |t          j        dd	
          d           nt          j        |d           |	dk    r1|
rt          j        |||	           nt          j        |||	          }t          j        ||dz
             t          j        ||           |
s|	dk    rt          j        |          }nt          j        |           t          j        ||           t          j        ||           |rlt          j        |          }t          j        |d           t          j        |           t          j        ||          }t          j        |||           pfd|D             }fd|D             }t          j        ||||           d S )Nz#_foreach ops don't support autogradr   F)supports_xlac              3   n   K   | ]/\  }}|j         j        |j         j        k    o|j         j        v V  0d S rc   )r8   rt   ).0rE   r5   r   s      r2   	<genexpr>z'_multi_tensor_adamax.<locals>.<genexpr>G  s]       
 
 4 HMT[-- >!==
 
 
 
 
 
r3   rj   rk   r(   cpu)r8   rl   r	   c                 :    g | ]}d t          |          z  z
  S )r	   r   )r   r5   r]   s     r2   
<listcomp>z(_multi_tensor_adamax.<locals>.<listcomp>  s8          26EZ----     r3   c                 :    g | ]}t                    |z  d z  S )r   )r   bcr!   s     r2   r   z(_multi_tensor_adamax.<locals>.<listcomp>  s)    OOO*R..2-3OOOr3   )r?   r@   rr   rs   r   allzipr   "_group_tensors_by_device_and_dtypevaluesr   r   r   r   _foreach_negis_cpu_foreach_add_rC   _foreach_add_foreach_lerp__foreach_mul__foreach_abs_foreach_abs__foreach_maximum__foreach_pow_foreach_sub__foreach_div__foreach_mul_foreach_addcdiv_)r    rV   rW   rX   rY   r#   r]   r^   r!   r$   r   r   r   rZ   grouped_tensorsgrouped_params_grouped_grads_grouped_exp_avgs_grouped_exp_infs_grouped_state_steps__grouped_paramsgrouped_gradsgrouped_exp_avgsgrouped_exp_infsgrouped_state_stepsbias_correctionsr   	step_sizer   s         ` `                    @r2   _multi_tensor_adamaxr   ,  s   " DDDDDD
6{{a <$$&& w: w'H(
 (
 (
$  
 
 
 
 v{33
 
 
 
 
 	w 	w wWsvvv		w 	w 	w  B	(K8 O ""$$I I 		 	d6lO<<T&\>::V.?@@V.?@@"4<1EFF 	/?AQ    	>!.}==M |((** 	8/B1/E/L 	8#U\#e%D%D%DC      3Q7771 #M>VVVVV % 2!>! ! !
 	-}a%iHHH 	,e444  	/LA--!.}==MM...M3/// 0-@@@  	$1%9LMM 0!444 0"555&'79IJJE#N4DeLLLL       :M      POOO>NOOOI# 02BI   OI Ir3   )single_tensor_fnFr%   c
                   t           j                                        s(t          d |D                       st	          d          |t          | |d          \  }}|r-t           j                                        rt	          d          |r&t           j                                        st          }nt          } || |||||
|||||||	|           dS )zrFunctional API that performs adamax algorithm computation.

    See :class:`~torch.optim.Adamax` for details.
    c              3   J   K   | ]}t          |t          j                  V  d S rc   )r)   r@   r   )r   ts     r2   r   zadamax.<locals>.<genexpr>  s?       3 3()
1el##3 3 3 3 3 3r3   zPAPI has changed, `state_steps` argument must contain a list of singleton tensorsNF)	use_fusedz6torch.jit.script not supported with foreach optimizers)	r#   r]   r^   r!   r$   r   r   rZ   r   )
r@   rr   rs   r   rQ   r   jitis_scriptingr   r   )r    rV   rW   rX   rY   r%   r   r   r   rZ   r#   r]   r^   r!   r$   r   funcs                    r2   r   r     s*   4 <$$&& 
s 3 3-83 3 3 0 0 
 ^
 
 	
 1Ne
 
 

7  U59))++ USTTT %uy--// %#$D!%     r3   )NFFFF)typingr   r   r   r   r   r@   r   	optimizerr
   r   r   r   r   r   r   r   r   r   r   r   r   __all__r   __doc__rB   rg   r   r   r   rI   r3   r2   <module>r      s   6 5 5 5 5 5 5 5 5 5 5 5 5 5                                    " X
R R R R RY R R Rl4 
  
  
  
  5, bG9LG9<G9 6lG9 6l	G9
 fG9 
G9 G9 G9 	G9 G9 G9 G9 G9 G9 G9 G9 G9TmLm<m 6lm 6l	m
 fm 
m m m 	m m m m m m m m m`  1FGGG # < <L<<< 6l< 6l	<
 f< d^< < < < < 
< <  !<" 	#<$ %< < < HG< < <r3   