
    ΧgQ              !          d dl mZmZmZmZ d dlZd dlmZ ddlmZm	Z	m
Z
mZmZmZmZmZmZmZmZ ddgZ G d de          Zd	d
e de de
 dz   e_        	 	 	 	 	 	 	 d$dee         dee         dee         dee         dee         dee         dee         dedee         dededededededef dZd Zdee         dee         dee         dee         dee         dee         dedededededededefd Zdee         dee         dee         dee         dee         dee         dedededededededefd!Zdee         dee         dee         dee         dee         dee         dedededededededed"dfd#ZdS )%    )castListOptionalUnionN)Tensor   )_default_to_fused_or_foreach_device_dtype_check_for_fused_differentiable_doc_foreach_doc_get_scalar_dtype
_get_value_maximize_doc_use_grad_for_differentiable_view_as_real	OptimizerParamsTAdagradadagradc                        e Zd Z	 	 	 	 	 	 ddddddedeeef         d	ed
edededee         dededee         f fdZ	 fdZ
d Zd Zedd            Z xZS )r   {Gz?r   绽|=NF)maximizedifferentiablefusedparamslrlr_decayweight_decayinitial_accumulator_valueepsforeachr   r   r   c                   t          |t                    r'|                                dk    rt          d          d|k    st          d|           d|k    st          d|           d|k    st          d|           d|k    st          d|           d|k    st          d|           t	          ||||||||	|
		  	        }t                                          ||           |
r)|	rt          d
          |rt          d          d| _        | j	        D ]}|d         D ]}| j
        |         }|d         r0t          j        dt          |d                   |j                  n!t          j        dt                                |d<   t          j        |          rt#          ||          n|}t          j        ||t          j                  |d<   d S )Nr   zTensor lr must be 1-elementg        zInvalid learning rate: zInvalid lr_decay value: zInvalid weight_decay value: z)Invalid initial_accumulator_value value: zInvalid epsilon value: )	r   r   r!   r   r    r"   r   r   r   z)`fused` does not support `differentiable`z0`fused` and `foreach` cannot be `True` together.Tr   r    is_fused)dtypedevicer'   step)memory_formatsum)
isinstancer   numel
ValueErrordictsuper__init__RuntimeError"_need_device_dtype_check_for_fusedparam_groupsstatetorchzerosr   r(   tensor
is_complexcomplex	full_likepreserve_format)selfr   r   r   r   r    r!   r"   r   r   r   defaultsgrouppr6   
init_value	__class__s                   O/var/www/html/ai-engine/env/lib/python3.11/site-packages/torch/optim/adagrad.pyr2   zAdagrad.__init__   si    b&!! 	<bhhjjAoo:;;;byy;r;;<<<hBBBCCCl""JLJJKKK///W<UWW   czz<s<<===%&?)

 

 

 	*** 	; P"#NOOO W"#UVVV6:D3& 	 	E8_  
1 W~FEK/wHHH x    c1B1D1DEEE f '**3G57PQQQ2 
  %z1F     e!	 	    c                 J   t                                          |           d }| j        D ]Z}|                    dd            |                    dd           |                    dd           |                    dd           }[t	          | j                                                  }t          |          dk    ot          j	        |d         d                   }|s?|D ]>}t          j
        t          |d                   t          |          	          |d<   =d S d S )
Nr"   r   Fr   r   r   r*   r%   r)   )r1   __setstate__r5   
setdefaultlistr6   valueslenr7   	is_tensorr9   floatr   )r>   r6   r   r@   state_valuesstep_is_tensorsrC   s          rD   rG   zAdagrad.__setstate__`   s@   U### & 	4 	4EY---Z///-u555$$Wd33EEDJ--//00l++q0 
eoOF#7
 7
  	!  !L!F)$$,=u,M,M,M  &			 	 rE   c                 ~    | j         D ]4}|d         D ])}| j        |         }|d                                          *5d S )Nr   r,   )r5   r6   share_memory_)r>   r@   rA   r6   s       rD   share_memoryzAdagrad.share_memoryu   sZ    & 	- 	-E8_ - -
1e**,,,,-	- 	-rE   c                    d\  }}|d         D ]}|j         |d         r)t          | dd          rt          |d           d| _        ||j         j        z  }|t          j        |          z  }|                    |           |                    |j                    | j        |         }	|                    |	d                    |                    |	d	                    ||fS )
N)FFr   r   r4   T)cuda_unsupportedFr,   r*   )	gradgetattrr
   r4   	is_sparser7   r:   appendr6   )
r>   r@   params_with_gradgrads
state_sumsstate_stepshas_sparse_gradhas_complexrA   r6   s
             rD   _init_groupzAdagrad._init_group{   s    '3$x 	2 	2Av!> Dg8' ' D
 2!dKKKK>CD;16#33u/222 ''***QV$$$
1!!%,///""5=111++rE   c                    d}|5t          j                    5   |            }ddd           n# 1 swxY w Y   | j        D ]}g }g }g }g }|                     |||||          \  }}	t	          |||||d         |d         |d         |d         ||d         |d         |d         |	|d	         t          | d
d          t          | dd                     |S )zPerform a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r   r   r!   r"   r   r   r   
grad_scale	found_inf)r   r   r   r!   r^   r"   r   r   r_   r   rb   rc   )r7   enable_gradr5   r`   r   rW   )
r>   closurelossr@   rZ   r[   r\   r]   r^   r_   s
             rD   r*   zAdagrad.step   s`    "$$ ! !wyy! ! ! ! ! ! ! ! ! ! ! ! ! ! ! & 	 	E-/"$E')J(*K+/+;+;'
K, ,(O[  ;">2z*%L /i(z*$%56'Gn"4t<<!$T::!    & s   /33)r   r   r   r   r   NN)__name__
__module____qualname__r   r   rM   r   r   boolr2   rG   rS   r`   r   r*   __classcell__)rC   s   @rD   r   r      sJ        $(+,"&D $ $D D DD %- D 	D
 D $)D D $D D D ~D D D D D DL    *- - -, , ,* "* * * "!* * * * *rE   a[  Implements Adagrad algorithm.

    .. math::
       \begin{aligned}
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{input}      : \gamma \text{ (lr)}, \: \theta_0 \text{ (params)}, \: f(\theta)
                \text{ (objective)}, \: \lambda \text{ (weight decay)},                          \\
            &\hspace{12mm}    \tau \text{ (initial accumulator value)}, \: \eta\text{ (lr decay)}\\
            &\textbf{initialize} :  state\_sum_0 \leftarrow \tau                          \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do}                         \\
            &\hspace{5mm}g_t           \leftarrow   \nabla_{\theta} f_t (\theta_{t-1})           \\
            &\hspace{5mm} \tilde{\gamma}    \leftarrow \gamma / (1 +(t-1) \eta)                  \\
            &\hspace{5mm} \textbf{if} \: \lambda \neq 0                                          \\
            &\hspace{10mm} g_t \leftarrow g_t + \lambda \theta_{t-1}                             \\
            &\hspace{5mm}state\_sum_t  \leftarrow  state\_sum_{t-1} + g^2_t                      \\
            &\hspace{5mm}\theta_t \leftarrow
                \theta_{t-1}- \tilde{\gamma} \frac{g_t}{\sqrt{state\_sum_t}+\epsilon}            \\
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
            &\bf{return} \:  \theta_t                                                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
       \end{aligned}

    For further details regarding the algorithm we refer to `Adaptive Subgradient Methods for Online Learning
    and Stochastic Optimization`_.
    aP  
    Args:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float, Tensor, optional): learning rate (default: 1e-2)
        lr_decay (float, optional): learning rate decay (default: 0)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        initial_accumulator_value (float, optional): initial value of the
            sum of squares of gradients (default: 0)
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-10)
        z	
        a  
        fused (bool, optional): whether the fused implementation (CPU only) is used.
            Currently, `torch.float64`, `torch.float32`, `torch.float16`, and `torch.bfloat16`
            are supported. (default: None). Please note that the fused implementations does not
            support sparse or complex gradients.
    .. _Adaptive Subgradient Methods for Online Learning and Stochastic
        Optimization: http://jmlr.org/papers/v12/duchi11a.html

    Fr   r[   r\   r]   r   rb   rc   r^   r"   r   r_   r   r   r   r!   r   c                4   t          d |D                       st          d          ||t          | |	d          \  }}|d}|d}|r-t          j                                        rt          d          |r-t          j                                        rt          d          |r&t          j                                        st          }n/|r&t          j                                        st          }nt          } || ||||||||||	|
||           dS )	ztFunctional API that performs Adagrad algorithm computation.

    See :class:`~torch.optim.Adagrad` for details.
    c              3   J   K   | ]}t          |t          j                  V  d S rg   )r-   r7   r   ).0ts     rD   	<genexpr>zadagrad.<locals>.<genexpr>  s.      @@qz!U\**@@@@@@rE   zPAPI has changed, `state_steps` argument must contain a list of singleton tensorsNF)	use_fusedz6torch.jit.script not supported with foreach optimizersz4torch.jit.script not supported with fused optimizers
r   r   r   r!   r^   r   r   r_   rb   rc   )	allr3   r	   r7   jitis_scripting_fused_adagrad_multi_tensor_adagrad_single_tensor_adagrad)r   r[   r\   r]   r   rb   rc   r^   r"   r   r_   r   r   r   r!   r   _funcs                     rD   r   r      sn   2 @@K@@@@@ 
^
 
 	
 }1Ne
 
 

7 } U59))++ USTTT S'')) SQRRR &UY++-- &	 &//11 &$%D!'%     rE   c                 V    |                                  }t          j        |||          S rg   )sizer7   sparse_coo_tensor)rV   grad_indicesrJ   r}   s       rD   _make_sparser   <  s$    99;;D"<>>>rE   c          
         ||J t          | |||          D ]F\  }}}}|dz  }t          |          }|s|n| }|dk    r-|j        rt          d          |                    ||          }|d|dz
  |z  z   z  }|j        r|                                }|                                }|                                }|                    t          |||
                    d                               |                    |          }|                                                                                    |	          }|                    t          ||||z            |            Xt          j        |          }|r<t          j        |          }t          j        |          }t          j        |          }|                    ||d           |r|                                |	z   }n'|                                                    |	          }|                    |||            |r(t          j        |          }t          j        |          }Hd S )Nr   r   z;weight_decay option is not compatible with sparse gradientsalpha   value)zipr   rX   r3   addcoalesce_indices_valuesadd_r   powsparse_masksqrt_r7   r:   view_as_realaddcmul_sqrtaddcdiv_view_as_complex)r   r[   r\   r]   rb   rc   r   r   r   r!   r^   r   r   r_   paramrV   	state_sumstep_tr*   clrr   grad_valuesstd
std_valuesr:   s                            rD   ry   ry   A  sw   " )"3"3"3*-feZ*U*U (= (=&tY!&!!#.tt$1~ "Q   88E866DAX--.> 	===??D==??L,,..KNN<lKOOA<N<NOOPPP''--C,,..33C88JJJT<z1IJJSVRV      )%00J 2)$//!.y99	*511tT333 1nn&&,nn&&++C00NN4SDN111 =-e44!1)<<	Q(= (=rE   c                   |r
J d            ||J t          |           dk    rd S t          j        | |||g          }|                                D ]-\  \  }}}}}t	          t
          t                   |          }t	          t
          t                   |          }t	          t
          t                   |          }t	          t
          t                   |          }|
ot          d |D                       }|rt          ||||||	d|||||           |rt          |||           |rt          j        |          }t          j                                        s9|d         j        r,t          j        |t          j        dd          d	           nt          j        |d
           |dk    r1|rt          j        |||	           nt          j        |||	          }fd|D             }t          j        |||d
           t          j        |          }t          j        ||	           |dk    s|rt          j        ||           |}nt          j        ||          }t          j        |||           /d S )Nz#_foreach ops don't support autogradr   c              3   $   K   | ]}|j         V  d S rg   )rX   )ro   rV   s     rD   rq   z(_multi_tensor_adagrad.<locals>.<genexpr>  s5       9
 9
#DN9
 9
 9
 9
 9
 9
rE   Trs   g      ?cpu)r(   r   r   c                 H    g | ]} d t          |          d z
  z  z   z  S )r   )r   )ro   r*   r   r   s     rD   
<listcomp>z)_multi_tensor_adagrad.<locals>.<listcomp>  sD     
 
 
>BRC1
4((1,889
 
 
rE   r   )rK   r   "_group_tensors_by_device_and_dtyperJ   r   r   r   anyry   r   r7   _foreach_neg_utilsis_compilingis_cpu_foreach_add_r9   _foreach_add_foreach_addcmul__foreach_sqrt_foreach_mul__foreach_mul_foreach_addcdiv_)r   r[   r\   r]   rb   rc   r   r   r   r!   r^   r   r   r_   grouped_tensorlistsdevice_params_device_grads_device_state_sums_device_state_steps_rz   device_paramsdevice_gradsdevice_state_sumsdevice_state_stepsdevice_has_sparse_grad	minus_clrr   	numerators         ` `                   rD   rx   rx   ~  s1   " DDDDDD)"3"3"3 6{{a#F	
K0   &&((M? M? 		 	T&\>::DL-88 f/ABB!$v,0CDD!0 "
S 9
 9
'39
 9
 9
 6
 6
 " 	"!")! $!-'%#       	J-7HIII 	< -l;;L |((** 	7/A!/D/K 	7"ELU$C$C$C3      2A6661 #L-|TTTTT$1 -|     
 
 
 
 
FX
 
 
	 	 1<UVWWWW!"344C%%%1i888$II*<CCIy#>>>>[M? M?rE   returnc                r   | sd S |
s|rt          d          |rt          d          |	|j        |ind }|	|j        |ind }t          j        | |||g          }|                                D ]F\  \  }}\  \  }}}}}t          t          t                   |          }t          t          t                   |          }t          t          t                   |          }t          t          t                   |          }d\  }}|(|&||vr|                    |d          ||<   ||         }|(|&||vr|                    |d          ||<   ||         }t          j
        |d           t          j        ||||||||	|||           |&t          j        ||gt          |          z             Hd S )Nz5`fused` does not support sparse grad or complex paramz<adagrad with fused=True does not support differentiable=True)NNT)non_blockingr   )r   r   r   r!   r   rb   rc   )r3   r(   r   r   itemsr   r   r   tor7   r   _fused_adagrad__foreach_sub_rK   )r   r[   r\   r]   rb   rc   r   r   r   r!   r^   r   r   r_   grad_scale_dictfound_inf_dictgrouped_tensorsr(   rz   r   r   r   r   r   r   r   r   device_grad_scaledevice_found_infs                                rD   rw   rw     sC   "   T+ TRSSS 
J
 
 	

 ,6+A	J''t  7@6Ki&	22QUNB	
K0 O 
			 	 ( ( 	 	
	T&\>::DL-88 f/ABB!$v,0CDD.8++!o&A_,,*4--T-*R*R' / 7 ^%?..)2f4)P)Pv&-f5.222%(&	
 	
 	
 	
 '"%5$6=O9P9P$P  M( (rE   )NNNFNFF)typingr   r   r   r   r7   r   	optimizerr	   r
   r   r   r   r   r   r   r   r   r   __all__r   __doc__rk   rM   r   r   ry   rx   rw   r$   rE   rD   <module>r      s   . . . . . . . . . . . .                                 i
 b b b b bi b b bL4 
  
  
  5/ r !#'"& "" G GLG<G VG f	G
 D>G  G G G d^G G G 	G  !G" #G$ 
%G& 'G G G GT? ? ?
:=L:=<:= V:= f	:=
  := := 	:= := := 
:= := := := := := := :=zh?Lh?<h? Vh? f	h?
  h? h? 	h? h? h? 
h? h? h? h? h? h? h? h?VKLK<K VK f	K
  K K 	K K K 
K K K K K  
!K K K K K KrE   