
    Χgs:                     (   d dl Z d dlmZmZmZmZ d dlZd dlmZmZ d dl	m
ZmZ d dlmZ ddlmZ ddlmZ g d	Z G d
 de          Z G d de          Zeeee         ef         Z G d de          Z G d de          Z G d de          ZdS )    N)ListOptionalTupleUnion)SizeTensor)
functionalinit)	Parameter   )CrossMapLRN2d)Module)LocalResponseNormr   	LayerNorm	GroupNormRMSNormc                        e Zd ZU dZg dZeed<   eed<   eed<   eed<   	 ddedededed
df
 fdZde	d
e	fdZ
d Z xZS )r   a  Applies local response normalization over an input signal.

    The input signal is composed of several input planes, where channels occupy the second dimension.
    Applies normalization across channels.

    .. math::
        b_{c} = a_{c}\left(k + \frac{\alpha}{n}
        \sum_{c'=\max(0, c-n/2)}^{\min(N-1,c+n/2)}a_{c'}^2\right)^{-\beta}

    Args:
        size: amount of neighbouring channels used for normalization
        alpha: multiplicative factor. Default: 0.0001
        beta: exponent. Default: 0.75
        k: additive factor. Default: 1

    Shape:
        - Input: :math:`(N, C, *)`
        - Output: :math:`(N, C, *)` (same shape as input)

    Examples::

        >>> lrn = nn.LocalResponseNorm(2)
        >>> signal_2d = torch.randn(32, 5, 24, 24)
        >>> signal_4d = torch.randn(16, 5, 7, 7, 7, 7)
        >>> output_2d = lrn(signal_2d)
        >>> output_4d = lrn(signal_4d)

    )sizealphabetakr   r   r   r   -C6?      ?      ?returnNc                     t                                                       || _        || _        || _        || _        d S Nsuper__init__r   r   r   r   selfr   r   r   r   	__class__s        Z/var/www/html/ai-engine/env/lib/python3.11/site-packages/torch/nn/modules/normalization.pyr    zLocalResponseNorm.__init__5   ;     		
	    inputc                 Z    t          j        || j        | j        | j        | j                  S r   )Flocal_response_normr   r   r   r   r"   r'   s     r$   forwardzLocalResponseNorm.forward>   s#    $UDItz49dfUUUr&   c                 &     dj         di | j        S Nz){size}, alpha={alpha}, beta={beta}, k={k} format__dict__r"   s    r$   
extra_reprzLocalResponseNorm.extra_reprA       A:ARRDMRRRr&   )r   r   r   )__name__
__module____qualname____doc____constants__int__annotations__floatr    r   r,   r4   __classcell__r#   s   @r$   r   r      s          : 322M
IIILLL
KKKHHH NQ  %49EJ	     VV V V V V VS S S S S S Sr&   r   c                   ~     e Zd ZU eed<   eed<   eed<   eed<   	 ddededededd	f
 fd
ZdedefdZde	fdZ
 xZS )r   r   r   r   r   r   r   r   r   Nc                     t                                                       || _        || _        || _        || _        d S r   r   r!   s        r$   r    zCrossMapLRN2d.__init__K   r%   r&   r'   c                 Z    t          j        || j        | j        | j        | j                  S r   )_cross_map_lrn2dapplyr   r   r   r   r+   s     r$   r,   zCrossMapLRN2d.forwardT   s#    %eTY
DItvVVVr&   c                 &     dj         di | j        S r.   r0   r3   s    r$   r4   zCrossMapLRN2d.extra_reprW   r5   r&   )r   r   r   )r6   r7   r8   r;   r<   r=   r    r   r,   strr4   r>   r?   s   @r$   r   r   E   s         
IIILLL
KKKHHH NO  %49EJ	     WV W W W W WSC S S S S S S S Sr&   r   c                        e Zd ZU dZg dZeedf         ed<   eed<   e	ed<   	 	 	 	 	 dde
dede	d
e	dd	f
 fdZddZdedefdZdefdZ xZS )r   a  Applies Layer Normalization over a mini-batch of inputs.

    This layer implements the operation as described in
    the paper `Layer Normalization <https://arxiv.org/abs/1607.06450>`__

    .. math::
        y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta

    The mean and standard-deviation are calculated over the last `D` dimensions, where `D`
    is the dimension of :attr:`normalized_shape`. For example, if :attr:`normalized_shape`
    is ``(3, 5)`` (a 2-dimensional shape), the mean and standard-deviation are computed over
    the last 2 dimensions of the input (i.e. ``input.mean((-2, -1))``).
    :math:`\gamma` and :math:`\beta` are learnable affine transform parameters of
    :attr:`normalized_shape` if :attr:`elementwise_affine` is ``True``.
    The standard-deviation is calculated via the biased estimator, equivalent to
    `torch.var(input, unbiased=False)`.

    .. note::
        Unlike Batch Normalization and Instance Normalization, which applies
        scalar scale and bias for each entire channel/plane with the
        :attr:`affine` option, Layer Normalization applies per-element scale and
        bias with :attr:`elementwise_affine`.

    This layer uses statistics computed from input data in both training and
    evaluation modes.

    Args:
        normalized_shape (int or list or torch.Size): input shape from an expected input
            of size

            .. math::
                [* \times \text{normalized\_shape}[0] \times \text{normalized\_shape}[1]
                    \times \ldots \times \text{normalized\_shape}[-1]]

            If a single integer is used, it is treated as a singleton list, and this module will
            normalize over the last dimension which is expected to be of that specific size.
        eps: a value added to the denominator for numerical stability. Default: 1e-5
        elementwise_affine: a boolean value that when set to ``True``, this module
            has learnable per-element affine parameters initialized to ones (for weights)
            and zeros (for biases). Default: ``True``.
        bias: If set to ``False``, the layer will not learn an additive bias (only relevant if
            :attr:`elementwise_affine` is ``True``). Default: ``True``.

    Attributes:
        weight: the learnable weights of the module of shape
            :math:`\text{normalized\_shape}` when :attr:`elementwise_affine` is set to ``True``.
            The values are initialized to 1.
        bias:   the learnable bias of the module of shape
                :math:`\text{normalized\_shape}` when :attr:`elementwise_affine` is set to ``True``.
                The values are initialized to 0.

    Shape:
        - Input: :math:`(N, *)`
        - Output: :math:`(N, *)` (same shape as input)

    Examples::

        >>> # NLP Example
        >>> batch, sentence_length, embedding_dim = 20, 5, 10
        >>> embedding = torch.randn(batch, sentence_length, embedding_dim)
        >>> layer_norm = nn.LayerNorm(embedding_dim)
        >>> # Activate module
        >>> layer_norm(embedding)
        >>>
        >>> # Image Example
        >>> N, C, H, W = 20, 5, 10, 10
        >>> input = torch.randn(N, C, H, W)
        >>> # Normalize over the last three dimensions (i.e. the channel and spatial dimensions)
        >>> # as shown in the image below
        >>> layer_norm = nn.LayerNorm([C, H, W])
        >>> output = layer_norm(input)

    .. image:: ../_static/img/nn/layer_norm.jpg
        :scale: 50 %

    normalized_shapeepselementwise_affine.rI   rJ   rK   h㈵>TNbiasr   c                 6   ||d}t                                                       t          |t          j                  r|f}t          |          | _        || _        || _        | j        rlt          t          j        | j        fi |          | _        |r*t          t          j        | j        fi |          | _        nC|                     dd            n,|                     dd            |                     dd            |                                  d S )NdevicedtyperM   weight)r   r    
isinstancenumbersIntegraltuplerI   rJ   rK   r   torchemptyrR   rM   register_parameterreset_parameters)	r"   rI   rJ   rK   rM   rP   rQ   factory_kwargsr#   s	           r$   r    zLayerNorm.__init__   s/    %+U;;&(899 	3 02 %&6 7 7"4" 	2#D1DD^DD DK  6%K 5HHHH 		 ''5555##Hd333##FD111r&   c                     | j         r;t          j        | j                   | j        t          j        | j                   d S d S d S r   )rK   r
   ones_rR   rM   zeros_r3   s    r$   rZ   zLayerNorm.reset_parameters   sO    " 	'Jt{###y$DI&&&&&	' 	'$$r&   r'   c                 Z    t          j        || j        | j        | j        | j                  S r   )r)   
layer_normrI   rR   rM   rJ   r+   s     r$   r,   zLayerNorm.forward   s*    |4($+ty$(
 
 	
r&   c                 &     dj         di | j        S )NF{normalized_shape}, eps={eps}, elementwise_affine={elementwise_affine}r/   r0   r3   s    r$   r4   zLayerNorm.extra_repr   s1    = 66<N N?C}N N	
r&   )rL   TTNNr   N)r6   r7   r8   r9   r:   r   r;   r<   r=   bool_shape_tr    rZ   r   r,   rF   r4   r>   r?   s   @r$   r   r   ^   s        K KZ FEEMCHo%%%	JJJ
 #'   "    !	 
   
           B' ' ' '
V 
 
 
 
 


C 
 
 
 
 
 
 
 
r&   r   c                        e Zd ZU dZg dZeed<   eed<   eed<   eed<   	 	 	 	 ddedededed
d	f
 fdZ	ddZ
ded
efdZd
efdZ xZS )r   a  Applies Group Normalization over a mini-batch of inputs.

    This layer implements the operation as described in
    the paper `Group Normalization <https://arxiv.org/abs/1803.08494>`__

    .. math::
        y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta

    The input channels are separated into :attr:`num_groups` groups, each containing
    ``num_channels / num_groups`` channels. :attr:`num_channels` must be divisible by
    :attr:`num_groups`. The mean and standard-deviation are calculated
    separately over the each group. :math:`\gamma` and :math:`\beta` are learnable
    per-channel affine transform parameter vectors of size :attr:`num_channels` if
    :attr:`affine` is ``True``.
    The standard-deviation is calculated via the biased estimator, equivalent to
    `torch.var(input, unbiased=False)`.

    This layer uses statistics computed from input data in both training and
    evaluation modes.

    Args:
        num_groups (int): number of groups to separate the channels into
        num_channels (int): number of channels expected in input
        eps: a value added to the denominator for numerical stability. Default: 1e-5
        affine: a boolean value that when set to ``True``, this module
            has learnable per-channel affine parameters initialized to ones (for weights)
            and zeros (for biases). Default: ``True``.

    Shape:
        - Input: :math:`(N, C, *)` where :math:`C=\text{num\_channels}`
        - Output: :math:`(N, C, *)` (same shape as input)

    Examples::

        >>> input = torch.randn(20, 6, 10, 10)
        >>> # Separate 6 channels into 3 groups
        >>> m = nn.GroupNorm(3, 6)
        >>> # Separate 6 channels into 6 groups (equivalent with InstanceNorm)
        >>> m = nn.GroupNorm(6, 6)
        >>> # Put all 6 channels into a single group (equivalent with LayerNorm)
        >>> m = nn.GroupNorm(1, 6)
        >>> # Activating the module
        >>> output = m(input)
    )
num_groupsnum_channelsrJ   affinerg   rh   rJ   ri   rL   TNr   c                    ||d}t                                                       ||z  dk    rt          d          || _        || _        || _        || _        | j        rIt          t          j	        |fi |          | _
        t          t          j	        |fi |          | _        n,|                     dd            |                     dd            |                                  d S )NrO   r   z,num_channels must be divisible by num_groupsrR   rM   )r   r    
ValueErrorrg   rh   rJ   ri   r   rW   rX   rR   rM   rY   rZ   )	r"   rg   rh   rJ   ri   rP   rQ   r[   r#   s	           r$   r    zGroupNorm.__init__  s     %+U;;*$))KLLL$(; 	2#EK$O$O$O$OPPDK!%+l"M"Mn"M"MNNDII##Hd333##FD111r&   c                 |    | j         r4t          j        | j                   t          j        | j                   d S d S r   )ri   r
   r]   rR   r^   rM   r3   s    r$   rZ   zGroupNorm.reset_parameters3  s@    ; 	#Jt{###K	"""""	# 	#r&   r'   c                 Z    t          j        || j        | j        | j        | j                  S r   )r)   
group_normrg   rR   rM   rJ   r+   s     r$   r,   zGroupNorm.forward8  s"    |E4?DKDHUUUr&   c                 &     dj         di | j        S )Nz8{num_groups}, {num_channels}, eps={eps}, affine={affine}r/   r0   r3   s    r$   r4   zGroupNorm.extra_repr;  s)    SLS 
 
m
 
 	
r&   )rL   TNNrc   )r6   r7   r8   r9   r:   r;   r<   r=   rd   r    rZ   r   r,   rF   r4   r>   r?   s   @r$   r   r      s        + +Z DCCMOOO	JJJLLL        	 
   
           6# # # #
VV V V V V V
C 
 
 
 
 
 
 
 
r&   r   c            	            e Zd ZU dZg dZeedf         ed<   ee	         ed<   e
ed<   	 	 	 	 ddedee	         de
d	df fd
ZddZdej        d	ej        fdZd	efdZ xZS )r   a}  Applies Root Mean Square Layer Normalization over a mini-batch of inputs.

    This layer implements the operation as described in
    the paper `Root Mean Square Layer Normalization <https://arxiv.org/pdf/1910.07467.pdf>`__

    .. math::
        y = \frac{x}{\sqrt{\mathrm{RMS}[x] + \epsilon}} * \gamma

    The root mean squared norm is taken over the last ``D`` dimensions, where ``D``
    is the dimension of :attr:`normalized_shape`. For example, if :attr:`normalized_shape`
    is ``(3, 5)`` (a 2-dimensional shape), the rms norm is computed over
    the last 2 dimensions of the input.

    Args:
        normalized_shape (int or list or torch.Size): input shape from an expected input
            of size

            .. math::
                [* \times \text{normalized\_shape}[0] \times \text{normalized\_shape}[1]
                    \times \ldots \times \text{normalized\_shape}[-1]]

            If a single integer is used, it is treated as a singleton list, and this module will
            normalize over the last dimension which is expected to be of that specific size.
        eps: a value added to the denominator for numerical stability. Default: :func:`torch.finfo(x.dtype).eps`
        elementwise_affine: a boolean value that when set to ``True``, this module
            has learnable per-element affine parameters initialized to ones (for weights)
            and zeros (for biases). Default: ``True``.

    Shape:
        - Input: :math:`(N, *)`
        - Output: :math:`(N, *)` (same shape as input)

    Examples::

        >>> rms_norm = nn.RMSNorm([2, 3])
        >>> input = torch.randn(2, 2, 3)
        >>> rms_norm(input)

    rH   .rI   rJ   rK   NTr   c                    ||d}t                                                       t          |t          j                  r|f}t          |          | _        || _        || _        | j        r*t          t          j        | j        fi |          | _        n|                     dd            |                                  d S )NrO   rR   )r   r    rS   rT   rU   rV   rI   rJ   rK   r   rW   rX   rR   rY   rZ   )r"   rI   rJ   rK   rP   rQ   r[   r#   s          r$   r    zRMSNorm.__init__n  s     %+U;;&(899 	3 02 %&6 7 7"4" 	4#D1DD^DD DKK ##Hd333r&   c                 J    | j         rt          j        | j                   dS dS )zS
        Resets parameters based on their initialization used in __init__.
        N)rK   r
   r]   rR   r3   s    r$   rZ   zRMSNorm.reset_parameters  s1     " 	$Jt{#####	$ 	$r&   xc                 N    t          j        || j        | j        | j                  S )z$
        Runs forward pass.
        )r)   rms_normrI   rR   rJ   )r"   rs   s     r$   r,   zRMSNorm.forward  s!     z!T2DKJJJr&   c                 &     dj         di | j        S )z5
        Extra information about the module.
        rb   r/   r0   r3   s    r$   r4   zRMSNorm.extra_repr  s1    
= 66<N N?C}N N	
r&   )NTNNrc   )r6   r7   r8   r9   r:   r   r;   r<   r   r=   rd   re   r    rZ   rW   r   r,   rF   r4   r>   r?   s   @r$   r   r   A  s         & &N FEEMCHo%%%	%
  $#'   "  e_  !	  
           0$ $ $ $K K%, K K K K
C 
 
 
 
 
 
 
 
r&   r   )rT   typingr   r   r   r   rW   r   r   torch.nnr	   r)   r
   torch.nn.parameterr   
_functionsr   rC   moduler   __all__r   r;   re   r   r   r   r/   r&   r$   <module>r}      s    / / / / / / / / / / / /          * * * * * * * * ( ( ( ( ( ( 9 9 9 9 9 9       V
U
U1S 1S 1S 1S 1S 1S 1S 1ShS S S S SF S S S, d3i%&C
 C
 C
 C
 C
 C
 C
 C
LZ
 Z
 Z
 Z
 Z
 Z
 Z
 Z
zY
 Y
 Y
 Y
 Y
f Y
 Y
 Y
 Y
 Y
r&   