
    Ng                     :    d Z ddlZddlmZ  G d de          ZdS )aL   RMSProp modified to behave like Tensorflow impl

Originally cut & paste from PyTorch RMSProp
https://github.com/pytorch/pytorch/blob/063946d2b3f3f1e953a2a3b54e0b34f1393de295/torch/optim/rmsprop.py
Licensed under BSD-Clause 3 (ish), https://github.com/pytorch/pytorch/blob/master/LICENSE

Modifications Copyright 2021 Ross Wightman
    N)	Optimizerc                   d     e Zd ZdZ	 	 d fd		Z fd
Z ej                    dd            Z xZ	S )	RMSpropTFaE  Implements RMSprop algorithm (TensorFlow style epsilon)

    NOTE: This is a direct cut-and-paste of PyTorch RMSprop with eps applied before sqrt
    and a few other modifications to closer match Tensorflow for matching hyper-params.

    Noteworthy changes include:
    1. Epsilon applied inside square-root
    2. square_avg initialized to ones
    3. LR scaling of update accumulated in momentum buffer

    Proposed by G. Hinton in his
    `course <http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf>`_.

    The centered version first appears in `Generating Sequences
    With Recurrent Neural Networks <https://arxiv.org/pdf/1308.0850v5.pdf>`_.

    Arguments:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float, optional): learning rate (default: 1e-2)
        momentum (float, optional): momentum factor (default: 0)
        alpha (float, optional): smoothing (decay) constant (default: 0.9)
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-10)
        centered (bool, optional) : if ``True``, compute the centered RMSProp,
            the gradient is normalized by an estimation of its variance
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        decoupled_decay (bool, optional): decoupled weight decay as per https://arxiv.org/abs/1711.05101
        lr_in_momentum (bool, optional): learning rate scaling is included in the momentum buffer
            update as per defaults in Tensorflow

    {Gz??绽|=r           FTc
           
         d|k    s"t          d                    |                    d|k    s"t          d                    |                    d|k    s"t          d                    |                    d|k    s"t          d                    |                    d|k    s"t          d                    |                    t          ||||||||	          }
t          t          |                               ||
           d S )Nr	   zInvalid learning rate: {}zInvalid epsilon value: {}zInvalid momentum value: {}zInvalid weight_decay value: {}zInvalid alpha value: {})lrmomentumalphaepscenteredweight_decaydecoupled_decaylr_in_momentum)
ValueErrorformatdictsuperr   __init__)selfparamsr   r   r   r   r   r   r   r   defaults	__class__s              Q/var/www/html/ai-engine/env/lib/python3.11/site-packages/timm/optim/rmsprop_tf.pyr   zRMSpropTF.__init__0   s   byy8??CCDDDczz8??DDEEEh9@@JJKKKl""=DD\RRSSSe||6==eDDEEEHEsX\h+NL L L 	i''99999    c                     t          t          |                               |           | j        D ].}|                    dd           |                    dd           /d S )Nr   r   r   F)r   r   __setstate__param_groups
setdefault)r   stategroupr   s      r   r   zRMSpropTF.__setstate__B   sl    i++E222& 	0 	0EZ+++Z////	0 	0r   Nc                 *   d}|5t          j                    5   |            }ddd           n# 1 swxY w Y   | j        D ]}|d         D ]}|j        |j        }|j        rt          d          | j        |         }t          |          dk    r^d|d<   t          j        |          |d<   |d         dk    rt          j	        |          |d<   |d	         rt          j	        |          |d
<   |d         }d|d         z
  }|dxx         dz  cc<   |d         dk    rM|d         r(|
                    d|d         |d         z  z
             n|                    ||d                   }|                    |                    d          |z
  |           |d	         rf|d
         }	|	                    ||	z
  |           |                    |	|	d                              |d                                                   }
n-|                    |d                                                   }
|d         dk    r|d         }|d         rO|
                    |d                                       ||
|d                    |                    |            U|
                    |d                                       ||
           |                    ||d                     |                    ||
|d                     Ő|S )zPerforms a single optimization step.

        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   z)RMSprop does not support sparse gradientsr   step
square_avgr   momentum_bufferr   grad_avgg      ?r      r   r   r   )r      )valuer   r   )torchenable_gradr    grad	is_sparseRuntimeErrorr"   len	ones_like
zeros_likemul_addadd_powaddcmulsqrt_addcdiv_)r   closurelossr#   pr/   r"   r&   one_minus_alphar(   avgbufs               r   r%   zRMSpropTF.stepH   s}    "$$ ! !wyy! ! ! ! ! ! ! ! ! ! ! ! ! ! ! & 4	> 4	>E8_ 3> 3>6>v> T&'RSSS
1 u::??$%E&M*//!*<*<E,'Z(1,,383CA3F3F/0Z( @,1,<Q,?,?j)"<0
"$uW~"5f"(A--./ HrE$K%2G$GGHHHH#xx~1FxGG j 8PPP $ ?$Z0HMM$/MIII$,,Xxr,JJNNuUZ|\\bbddCC %..u66<<>>C$q(( 12C-. 8z!233<<T3eTXk<ZZZt z!233<<T3GGGs5;,7777JJtStJ====g3>j s   /33)r   r   r   r   r	   FFT)N)
__name__
__module____qualname____doc__r   r   r-   no_gradr%   __classcell__)r   s   @r   r   r      s         B ej7;: : : : : :$0 0 0 0 0 U]__B B B _B B B B Br   r   )rE   r-   torch.optimr   r    r   r   <module>rJ      sj      ! ! ! ! ! !} } } } }	 } } } } }r   