
    g                        d dl Z d dlmZ d dlZd dlmZ d dlmZmZ ddlm	Z	  e	j
        e          Z G d dej                  Z G d	 d
ej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d de          Zi ded ed!d"d#fd$ed%ed&ed'd(ifd)ed*ed+ed,ej        d-ed.ed/ed0ej        d1ed2ej        d3ej        d4ej        ej        ej        d5Z  ee           Z!d6 Z" e"d&          Z# e"d%          Z$ e"d          Z% e"d$          Z& e"d/          Z' e"d4          Z( e"d.          Z) e"d-          Z*dS )7    N)OrderedDict)version)Tensornn   )loggingc                   2     e Zd ZdZ fdZdedefdZ xZS )PytorchGELUTanha  
    A fast C implementation of the tanh approximation of the GeLU activation function. See
    https://arxiv.org/abs/1606.08415.

    This implementation is equivalent to NewGELU and FastGELU but much faster. However, it is not an exact numerical
    match due to rounding errors.
    c                     t                                                       t          j        t          j                  t          j        d          k     rt          dt          j         d          d S )Nz1.12.0zYou are using torch==zM, but torch>=1.12.0 is required to use PytorchGELUTanh. Please upgrade torch.)super__init__r   parsetorch__version__ImportErrorself	__class__s    T/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/activations.pyr   zPytorchGELUTanh.__init__%   so    =*++gmH.E.EEE9(9 9 9 9   FE    inputreturnc                 D    t           j                            |d          S )Ntanh)approximate)r   
functionalgelur   r   s     r   forwardzPytorchGELUTanh.forward-   s    }!!%V!<<<r   __name__
__module____qualname____doc__r   r   r   __classcell__r   s   @r   r
   r
      sd             =V = = = = = = = = =r   r
   c                   "    e Zd ZdZdedefdZdS )NewGELUActivationz
    Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT). Also see
    the Gaussian Error Linear Units paper: https://arxiv.org/abs/1606.08415
    r   r   c                     d|z  dt          j        t          j        dt          j        z            |dt          j        |d          z  z   z            z   z  S )N      ?      ?       @Hm?g      @)r   r   mathsqrtpipowr   s     r   r   zNewGELUActivation.forward7   sP    U{cEJtytw/G/G5S[^c^ghmor^s^sSsKs/t$u$uuvvr   Nr!   r"   r#   r$   r   r    r   r   r(   r(   1   sH         
wV w w w w w w wr   r(   c                   J     e Zd ZdZd	def fdZdedefdZdedefdZ xZ	S )
GELUActivationa  
    Original Implementation of the GELU activation function in Google BERT repo when initially created. For
    information: OpenAI GPT's GELU is slightly different (and gives slightly different results): 0.5 * x * (1 +
    torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) This is now written in C in nn.functional
    Also see the Gaussian Error Linear Units paper: https://arxiv.org/abs/1606.08415
    Fuse_gelu_pythonc                     t                                                       |r| j        | _        d S t          j        j        | _        d S N)r   r   _gelu_pythonactr   r   r   )r   r6   r   s     r   r   zGELUActivation.__init__C   s?     	*(DHHH})DHHHr   r   r   c                 f    |dz  dt          j        |t          j        d          z            z   z  S )Nr*   r+   r,   )r   erfr.   r/   r   s     r   r9   zGELUActivation._gelu_pythonJ   s-    s{cEIedinn.D$E$EEFFr   c                 ,    |                      |          S r8   r:   r   s     r   r   zGELUActivation.forwardM       xxr   )F)
r!   r"   r#   r$   boolr   r   r9   r   r%   r&   s   @r   r5   r5   ;   s         * * * * * * * *G& GV G G G GV         r   r5   c                   "    e Zd ZdZdedefdZdS )FastGELUActivationz}
    Applies GELU approximation that is slower than QuickGELU but more accurate. See: https://github.com/hendrycks/GELUs
    r   r   c                 Z    d|z  dt          j        |dz  dd|z  |z  z   z            z   z  S )Nr*   r+   g3E?r-   )r   r   r   s     r   r   zFastGELUActivation.forwardV   s;    U{cEJu|/CsXX]M]`eMeGe/f$g$gghhr   Nr2   r3   r   r   rB   rB   Q   sH         iV i i i i i i ir   rB   c                   "    e Zd ZdZdedefdZdS )QuickGELUActivationzr
    Applies GELU approximation that is fast but somewhat inaccurate. See: https://github.com/hendrycks/GELUs
    r   r   c                 6    |t          j        d|z            z  S )NgZd;?)r   sigmoidr   s     r   r   zQuickGELUActivation.forward_   s    u}UU]3333r   Nr2   r3   r   r   rE   rE   Z   s@         4V 4 4 4 4 4 4 4r   rE   c                   <     e Zd ZdZdedef fdZdedefdZ xZS )ClippedGELUActivationa  
    Clip the range of possible GeLU outputs between [min, max]. This is especially useful for quantization purpose, as
    it allows mapping negatives values in the GeLU spectrum. For more information on this trick, please refer to
    https://arxiv.org/abs/2004.09602.

    Gaussian Error Linear Unit. Original Implementation of the gelu activation function in Google Bert repo when
    initially created.

    For information: OpenAI GPT's gelu is slightly different (and gives slightly different results): 0.5 * x * (1 +
    torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))). See https://arxiv.org/abs/1606.08415
    minmaxc                     ||k    rt          d| d| d          t                                                       || _        || _        d S )Nzmin should be < max (got min: z, max: ))
ValueErrorr   r   rJ   rK   )r   rJ   rK   r   s      r   r   zClippedGELUActivation.__init__p   sV    99PcPP#PPPQQQr   xr   c                 \    t          j        t          |          | j        | j                  S r8   )r   clipr   rJ   rK   )r   rO   s     r   r   zClippedGELUActivation.forwardx   s     z$q''48TX666r   )	r!   r"   r#   r$   floatr   r   r   r%   r&   s   @r   rI   rI   c   sw        
 
E       7 7F 7 7 7 7 7 7 7 7r   rI   c                   2     e Zd ZdZ fdZdedefdZ xZS )AccurateGELUActivationz
    Applies GELU approximation that is faster than default and more accurate than QuickGELU. See:
    https://github.com/hendrycks/GELUs

    Implemented along with MEGA (Moving Average Equipped Gated Attention)
    c                     t                                                       t          j        dt          j        z            | _        d S )N   )r   r   r.   r/   r0   precomputed_constantr   s    r   r   zAccurateGELUActivation.__init__   s7    $(Ia$'k$:$:!!!r   r   r   c                 ~    d|z  dt          j        | j        |dt          j        |d          z  z   z            z   z  S )Nr*   r   r-      )r   r   rW   r1   r   s     r   r   zAccurateGELUActivation.forward   sC    U{a%*T-F%RZ]b]fglno]p]pRpJp-q"r"rrssr   r    r&   s   @r   rT   rT   |   sn         ; ; ; ; ;tV t t t t t t t t tr   rT   c                   B     e Zd ZdZ fdZdedefdZdedefdZ xZS )MishActivationz
    See Mish: A Self-Regularized Non-Monotonic Activation Function (Misra., https://arxiv.org/abs/1908.08681). Also
    visit the official repository for the paper: https://github.com/digantamisra98/Mish
    c                     t                                                       t          j        t          j                  t          j        d          k     r| j        | _        d S t          j	        j
        | _        d S )Nz1.9.0)r   r   r   r   r   r   _mish_pythonr:   r   r   mishr   s    r   r   zMishActivation.__init__   sY    =*++gmG.D.DDD(DHHH})DHHHr   r   r   c                 j    |t          j        t          j                            |                    z  S r8   )r   r   r   r   softplusr   s     r   r]   zMishActivation._mish_python   s'    uz"-"8"8"?"?@@@@r   c                 ,    |                      |          S r8   r>   r   s     r   r   zMishActivation.forward   r?   r   )	r!   r"   r#   r$   r   r   r]   r   r%   r&   s   @r   r[   r[      s         
* * * * *A& AV A A A AV         r   r[   c                   "    e Zd ZdZdedefdZdS )LinearActivationz[
    Applies the linear activation function, i.e. forwarding input directly to output.
    r   r   c                     |S r8   r3   r   s     r   r   zLinearActivation.forward   s    r   Nr2   r3   r   r   rc   rc      s@         V       r   rc   c                       e Zd ZdZddZdS )LaplaceActivationz
    Applies elementwise activation based on Laplace function, introduced in MEGA as an attention activation. See
    https://arxiv.org/abs/2209.10655

    Inspired by squared relu, but with bounded range and gradient for better stability
    绹۞? ^/?c                     ||z
                       |t          j        d          z            }ddt          j        |          z   z  S )Nr,   r*   r+   )divr.   r/   r   r<   )r   r   musigmas       r   r   zLaplaceActivation.forward   s@      3!788cEIe,,,--r   N)rg   rh   r!   r"   r#   r$   r   r3   r   r   rf   rf      s2         . . . . . .r   rf   c                       e Zd ZdZd ZdS )ReLUSquaredActivationzX
    Applies the relu^2 activation introduced in https://arxiv.org/abs/2109.08668v2
    c                 l    t           j                            |          }t          j        |          }|S r8   )r   r   relur   square)r   r   relu_appliedsquareds       r   r   zReLUSquaredActivation.forward   s+    }))%00,|,,r   Nrm   r3   r   r   ro   ro      s-             r   ro   c                        e Zd Z fdZ xZS )ClassInstantierc                     t                                          |          }t          |t                    r|n|i f\  }} |di |S )Nr3   )r   __getitem__
isinstancetuple)r   keycontentclskwargsr   s        r   rx   zClassInstantier.__getitem__   sL    ''%%c**!+GU!;!;Ngg'2Vs}}V}}r   )r!   r"   r#   rx   r%   r&   s   @r   rv   rv      s8                r   rv   r   gelu_10i
   )rJ   rK   	gelu_fastgelu_newgelu_pythonr6   Tgelu_pytorch_tanhgelu_accuratelaplace
leaky_relulinearr^   
quick_gelurq   relu2relu6rG   silu)swishr   c           	          | t           v rt           |          S t          d|  dt          t                                                                )Nz	function z not found in ACT2FN mapping )ACT2FNKeyErrorlistkeys)activation_strings    r   get_activationr      sO    F""'((h#4hhSWX^XcXcXeXeSfSfhhiiir   )+r.   collectionsr   r   	packagingr   r   r   utilsr   
get_loggerr!   loggerModuler
   r(   r5   rB   rE   rI   rT   r[   rc   rf   ro   rv   	LeakyReLUReLUReLU6SigmoidSiLUTanhACT2CLSr   r   r   r   r   r   r   r   r^   
linear_actr3   r   r   <module>r      sV    # # # # # #                      
	H	%	%= = = = =bi = = =*w w w w w	 w w w    RY   ,i i i i i i i i4 4 4 4 4") 4 4 47 7 7 7 7BI 7 7 72t t t t tRY t t t     RY   (    ry   
. 
. 
. 
. 
.	 
. 
. 
.    BI       k   
N%s2'>'>? # !	
 N%6$=>  +   ",  N % BG " RX  rz!" BG#$ WG'  * 
	!	!j j j n]++>*%%~fN;''	^L))
~f~f^H%%


r   