
    Ng                     B    d Z ddlZddlmZ ddlZ G d de          ZdS )a6   Nvidia NovoGrad Optimizer.
Original impl by Nvidia from Jasper example:
    - https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/SpeechRecognition/Jasper
Paper: `Stochastic Gradient Methods with Layer-wise Adaptive Moments for Training of Deep Networks`
    - https://arxiv.org/abs/1905.11286
    N)	Optimizerc                   d     e Zd ZdZ	 	 d fd	Z fdZ ej                    dd
            Z xZ	S )
NvNovoGrada(  
    Implements Novograd algorithm.

    Args:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float, optional): learning rate (default: 1e-3)
        betas (Tuple[float, float], optional): coefficients used for computing
            running averages of gradient and its square (default: (0.95, 0.98))
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-8)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        grad_averaging: gradient averaging
        amsgrad (boolean, optional): whether to use the AMSGrad variant of this
            algorithm from the paper `On the Convergence of Adam and Beyond`_
            (default: False)
    MbP?gffffff?g\(\?:0yE>r   Fc                    d|k    s"t          d                    |                    d|k    s"t          d                    |                    d|d         cxk    rdk     s*n t          d                    |d                             d|d         cxk    rdk     s*n t          d                    |d                             t          ||||||	          }t          t          |                               ||           d S )
Ng        zInvalid learning rate: {}zInvalid epsilon value: {}r   g      ?z%Invalid beta parameter at index 0: {}   z%Invalid beta parameter at index 1: {})lrbetasepsweight_decaygrad_averagingamsgrad)
ValueErrorformatdictsuperr   __init__)
selfparamsr   r   r   r   r   r   defaults	__class__s
            Q/var/www/html/ai-engine/env/lib/python3.11/site-packages/timm/optim/nvnovograd.pyr   zNvNovoGrad.__init__    s$   byy8??CCDDDczz8??DDEEEeAh$$$$$$$$DKKERSHUUVVVeAh$$$$$$$$DKKERSHUUVVV2U%1'5 ') ) )
 	j$((:::::    c                     t          t          |                               |           | j        D ]}|                    dd           d S )Nr   F)r   r   __setstate__param_groups
setdefault)r   stategroupr   s      r   r   zNvNovoGrad.__setstate__1   sV    j$,,U333& 	/ 	/EY....	/ 	/r   Nc                    d}|5t          j                    5   |            }ddd           n# 1 swxY w Y   | j        D ]}|d         D ]}|j        |j        }|j        rt          d          |d         }| j        |         }t          |          dk    rd|d<   t          j        |          |d<   t          j	        g           
                    |d         j                  |d<   |r5t          j	        g           
                    |d         j                  |d	<   |d         |d         }	}|r|d	         }
|d
         \  }}|dxx         dz  cc<   t          j        t          j        |d                    }|	dk    r|	                    |           n-|	                    |                              |d|z
             |rEt          j        |
|	|
           |
                                                    |d                   }n-|	                                                    |d                   }|                    |           |d         dk    r|                    ||d                    |d         r|                    d|z
             |                    |                              |           |                    ||d                     |S )zPerforms a single optimization step.

        Arguments:
            closure (callable, optional): A closure that reevaluates the model
            and returns the loss.
        Nr   z#Sparse gradients are not supported.r   r   stepexp_avg
exp_avg_sqmax_exp_avg_sqr   r
      )alpha)outr   r   r   r   )torchenable_gradr   grad	is_sparseRuntimeErrorr    len
zeros_likezerostodevicesumpowcopy_mul_add_maxsqrtdiv_)r   closurelossr!   pr,   r   r    r$   r%   r&   beta1beta2normdenoms                  r   r#   zNvNovoGrad.step6   s/    "$$ ! !wyy! ! ! ! ! ! ! ! ! ! ! ! ! ! ! & 3	4 3	4E8_ 24 246>v> N&'LMMM	*
1 u::??$%E&M','7':':E)$*/+b//*<*<U9=M=T*U*UE,' ^27+b//2D2DU9EUE\2]2]./&+I&6l8K =%*+;%<N$W~uf"y4!3!344??$$T****OOE**//AI/FFF AInjnMMMM*//1166uU|DDEE&OO--225<@@E		%   (A--IIau^'<I===)* )IIa%i(((U##((...wuT{l3333e24h s   /33)r   r   r   r   FF)N)
__name__
__module____qualname____doc__r   r   r*   no_gradr#   __classcell__)r   s   @r   r   r      s         $ AE?D; ; ; ; ; ;"/ / / / /
 U]__A A A _A A A A Ar   r   )rF   r*   torch.optim.optimizerr   mathr    r   r   <module>rL      ss      + + + + + + k k k k k k k k k kr   