
    Ng                     B    d Z ddlZ G d dej        j                  ZdS )z AdaHessian Optimizer

Lifted from https://github.com/davda54/ada-hessian/blob/master/ada_hessian.py
Originally licensed MIT, Copyright 2020, David Samuel
    Nc                        e Zd ZdZ	 	 d fd		Zed
             Zd Zd Z e	j
                    d             Z e	j
                    dd            Z xZS )
Adahessiana  
    Implements the AdaHessian algorithm from "ADAHESSIAN: An Adaptive Second OrderOptimizer for Machine Learning"

    Arguments:
        params (iterable): iterable of parameters to optimize or dicts defining parameter groups
        lr (float, optional): learning rate (default: 0.1)
        betas ((float, float), optional): coefficients used for computing running averages of gradient and the
            squared hessian trace (default: (0.9, 0.999))
        eps (float, optional): term added to the denominator to improve numerical stability (default: 1e-8)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0.0)
        hessian_power (float, optional): exponent of the hessian trace (default: 1.0)
        update_each (int, optional): compute the hessian trace approximation only after *this* number of steps
            (to save time) (default: 1)
        n_samples (int, optional): how many times to sample `z` for the approximation of the hessian trace (default: 1)
    皙?g?g+?:0yE>              ?   Fc
                    d|k    st          d|           d|k    st          d|           d|d         cxk    rdk     sn t          d|d                    d|d         cxk    rdk     sn t          d|d                    d|cxk    rdk    sn t          d	|           || _        || _        |	| _        d
| _        t          j                                        | j                  | _        t          |||||          }
t          t          |                               ||
           |                                 D ]}d|_        d| j        |         d<   d S )Nr   zInvalid learning rate: zInvalid epsilon value: r   r	   z#Invalid beta parameter at index 0: r
   z#Invalid beta parameter at index 1: zInvalid Hessian power value: i)lrbetasepsweight_decayhessian_powerhessian step)
ValueError	n_samplesupdate_eachavg_conv_kernelseedtorch	Generatormanual_seed	generatordictsuperr   __init__
get_paramshessstate)selfparamsr   r   r   r   r   r   r   r   defaultsp	__class__s               Q/var/www/html/ai-engine/env/lib/python3.11/site-packages/timm/optim/adahessian.pyr   zAdahessian.__init__   s   byy;r;;<<<czz<s<<===eAh$$$$$$$$M58MMNNNeAh$$$$$$$$M58MMNNNm****s****L]LLMMM"&. 	**66tyAA2U,^klllj$((:::"" 	. 	.AAF,-DJqM.))	. 	.    c                     dS )NT r!   s    r&   is_second_orderzAdahessian.is_second_order6   s    tr'   c                 $    d | j         D             S )zH
        Gets all parameters in all param_groups with gradients
        c              3   >   K   | ]}|d          D ]}|j         	|V  dS )r"   N)requires_grad).0groupr$   s      r&   	<genexpr>z(Adahessian.get_params.<locals>.<genexpr>?   s:      ]]e%/]]QQ_]]]]]]]]r'   )param_groupsr*   s    r&   r   zAdahessian.get_params:   s    
 ^]t0]]]]r'   c                     |                                  D ]T}t          |j        t                    s8| j        |         d         | j        z  dk    r|j                                         UdS )z;
        Zeros out the accumalated hessian traces.
        r   r   N)r   
isinstancer   floatr    r   zero_)r!   r$   s     r&   zero_hessianzAdahessian.zero_hessianA   sj    
 "" 	 	Aafe,, A~1NQUQa1aef1f1f	 	r'   c           
          g }t          d                                            D ]Q} j        |         d          j        z  dk    r|                    |            j        |         dxx         dz  cc<   Rt          |          dk    rdS  j        j        |d         j        k    r<t          j	        |d         j                  
                     j                   _        d |D             }t           j                  D ]s} fd|D             }t          j                            |||d| j        dz
  k     	          }t!          |||          D ]!\  }}}|xj        ||z   j        z  z  c_        "tdS )
z}
        Computes the Hutchinson approximation of the hessian trace and accumulates it for each trainable parameter.
        c                     | j         d uS Ngrad)r$   s    r&   <lambda>z(Adahessian.set_hessian.<locals>.<lambda>Q   s    !&"4 r'   r   r   r
   Nc                     g | ]	}|j         
S r)   r;   )r/   r$   s     r&   
<listcomp>z*Adahessian.set_hessian.<locals>.<listcomp>\   s    (((A(((r'   c           	          g | ]=}t          j        d d|                                j        |j                  dz  dz
  >S )r      )r   deviceg       @r	   )r   randintsizer   rB   )r/   r$   r!   s     r&   r?   z*Adahessian.set_hessian.<locals>.<listcomp>`   sI    wwwkl%-1affhh$.QRQYZZZ]``cffwwwr'   T)grad_outputsonly_inputsretain_graph)filterr   r    r   appendlenr   rB   r   r   r   r   ranger   autogradr<   zipr   )	r!   r"   r$   gradsizsh_zsh_zzs	   `        r&   set_hessianzAdahessian.set_hessianJ   s    44doo6G6GHH 	/ 	/Az!}^,t/??1DDa   JqM.)))Q.))))v;;!F> F1I$444"_VAY-=>>JJ49UUDN(((((t~&& 	3 	3AwwwwpvwwwB>&&vBDqSWSadeSeOe ' g gD r622 3 3	Q#'DN223	3 	3r'   Nc                    d}|
 |            }|                                   |                                  | j        D ](}|d         D ]}|j        |j        | j        rw|                                dk    r_t          j        |j                  	                    ddgd          
                    |j                                                  |_        |                    d|d	         |d
         z  z
             | j        |         }t          |          dk    r3d|d<   t          j        |          |d<   t          j        |          |d<   |d         |d         }}|d         \  }}	|dxx         dz  cc<   |                    |                              |j        d|z
             |                    |	                              |j        |j        d|	z
             d||d         z  z
  }
d|	|d         z  z
  }|d         }||z                      |dz                                |d                   }|d	         |
z  }|                    |||            *|S )z
        Performs a single optimization step.
        Arguments:
            closure (callable, optional) -- a closure that reevaluates the model and returns the loss (default: None)
        Nr"      rA      T)dimkeepdimr
   r   r   r   stepexp_avgexp_hessian_diag_sqr   )alpha)valuer   r   )r7   rT   r2   r<   r   r   rX   r   absmean	expand_asclonemul_r    rJ   
zeros_likeadd_addcmul_pow_addcdiv_)r!   closurelossr0   r$   r    r[   r\   beta1beta2bias_correction1bias_correction2kdenom	step_sizes                  r&   rZ   zAdahessian.stepf   s    799D& %	= %	=E8_ $= $=6>QV^' hAEEGGqLL"Yqv..33A3MMWWXYX^__eeggAF q5;~)>>>???
1 u::??$%E&M','7':':E)$383CA3F3FE/0/4Y/?G\A],$W~uf" U##((q5y(AAA#((//88qSXy8YYY#$uf'=#= #$uf'=#= /*,/??EEa!eLLQQRWX]R^__ "$K*::	

7E)
<<<<I$=L r'   )r   r   r   r   r	   r
   r
   Fr:   )__name__
__module____qualname____doc__r   propertyr+   r   r7   r   no_gradrT   rZ   __classcell__)r%   s   @r&   r   r   	   s           SVPU. . . . . .8   X^ ^ ^   U]__3 3 _36 U]__5 5 5 _5 5 5 5 5r'   r   )ru   r   optim	Optimizerr   r)   r'   r&   <module>r{      s]    
 S S S S S& S S S S Sr'   