
    gQ                        d dl Z d dlZd dlmZ 	 d dlZn5# eef$ r+ d dlZ eej	                  j
        dk    r ed          Y nw xY wd Zd Zd Zd Zd	 Zd
 ZddZ eej        j                   ed          k    rd Zej        j        ZeZneZeZeeeeeeeej        j        ej        j        ej        j        ej        j        ej        j        dZd ZdS )    N)parse   zYour currently installed version of Keras is Keras 3, but this is not yet supported in Transformers. Please install the backwards-compatible tf-keras package with `pip install tf-keras`.c           
          t          j        |           } ddt           j                            | t          j        t          j        d          | j                  z            z   z  }| |z  S )av  
    Gaussian Error Linear Unit. Original Implementation of the gelu activation function in Google Bert repo when
    initially created. For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
    0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) Also see
    https://arxiv.org/abs/1606.08415
          ?      ?       @)tfconvert_to_tensormatherfcastsqrtdtype)xcdfs     W/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/activations_tf.py_gelur   "   sU     	QA
rw{{1rwrws||QW'E'E#EFFF
GCs7N    c                 F   t          j        |           } t          j        t          j        | j                  }t          j        d| j                  }ddt          j        t          j        d|z            | |t          j        | d          z  z   z            z   z  }| |z  S )z
    Gaussian Error Linear Unit. This is a smoother version of the GELU. Original paper: https://arxiv.org/abs/1606.0841

    Args:
        x: float Tensor to perform activation

    Returns:
        `x` with the GELU activation applied.
    Hm?r   r   r      )	r	   r
   r   r   pir   tanhr   pow)r   r   coeffr   s       r   	_gelu_newr   /   s     	QA	!'	"	"BGHag&&E
rwrwsRx00Aq!8L4LMNNN
OCs7Nr   c                     t          j        |           } | t          j        t           j                            |                     z  S )N)r	   r
   r   r   softplusr   s    r   mishr    A   s7    
QArwrw''**++++r   c                     t          j        |           } t          j        d| j                  }t          j        d| j                  }d| z  dt          j        | |z  d|| z  | z  z   z            z   z  S )Nr   g3E?r   r   )r	   r
   r   r   r   )r   coeff1coeff2s      r   	gelu_fastr$   G   sl    
QAWXqw''FW\17++F7cBGAJ#
Q2F$GHHHIIr   c                     t          j        |           } t          j        d| j                  }| t           j                            || z            z  S )NgZd;?)r	   r
   r   r   r   sigmoid)r   r   s     r   
quick_gelur'   O   sB    
QAGE17##Erwuqy))))r   c                 H    t          j        t          |           dd          S )a  
    Clip the range of possible GeLU outputs between [-10, 10]. This is especially useful for quantization purpose, as
    it allows mapping 2 negatives values in the GeLU spectrum. For more information on this trick, please refer to
    https://arxiv.org/abs/2004.09602

    Gaussian Error Linear Unit. Original Implementation of the gelu activation function in Google Bert repo when
    initially created. For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
    0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) Also see
    https://arxiv.org/abs/1606.08415 :param x: :return:
    i
   )r	   clip_by_valuer   r   s    r   gelu_10r+   U   s     E!HHc2...r   c                 z    t          j        | d|          \  }}|t           j                            |          z  S )a  
    Gated Linear Unit. Implementation as defined in the original paper (see https://arxiv.org/abs/1612.08083), where
    the input `x` is split in two halves across a dimension (`axis`), A and B, returning A * sigmoid(B).

    Args:
        `x`: float Tensor to perform activation
        `axis`: dimension across which `x` be split in half

    Returns:
        `x` with the GLU activation applied (with its size halved across the dimension `axis`).
    r   )axis)r	   splitr   r&   )r   r.   abs       r   glur2   c   s7     8Aqt$$$DAqrwq!!!!r   z2.4c                 D    t           j                            | d          S )NT)approximate)kerasactivationsgelur   s    r   approximate_gelu_wrapr8   u   s     %%aT%:::r   )r7   r+   r$   gelu_newr2   r    r'   relur&   siluswishr   c           	          | t           v rt           |          S t          d|  dt          t                                                                )Nz	function z not found in ACT2FN mapping )ACT2FNKeyErrorlistkeys)activation_strings    r   get_tf_activationrC      sO    F""'((h#4hhSWX^XcXcXeXeSfSfhhiiir   )r,   )r   
tensorflowr	   packaging.versionr   tf_kerasr5   ModuleNotFoundErrorImportError__version__major
ValueErrorr   r   r    r$   r'   r+   r2   versionVERSIONr8   r6   r7   r9   r:   r&   r<   r   r>   rC    r   r   <module>rO      s        # # # # # #

[) 
 
 
LLLuU%))j&
 
 	
 *)

 
 
  $, , ,J J J* * */ / /" " " "  	5	e,,; ; ; !D$HHDH " (#$"
 
 j j j j js    /AA