
    ΧgA4                        d Z ddlmZmZ ddlmZ ddlmZ ddlZddl	m
c mZ ddlmZmZmZmZ ddlmZ ddlmZmZmZmZ g d	Zej                            e           ej                            e           ej                            e           ej                            e            G d
 de          Z G d dej                  ZdefdZdefdZ dS )zCDefines bias subclasses that work with scaled_dot_product_attention    )autoIntEnum)Optional)warnN)can_use_efficient_attentioncan_use_flash_attentionis_flash_attention_available
SDPAParams)_raise_kernel_warnings)_calculate_scale_input_requires_grad_postprocess_flash_output_validate_sdpa_input)causal_upper_leftcausal_lower_rightCausalVariant
CausalBiasc                   :    e Zd ZdZ e            Z e            ZdS )r   a#  
    Enum for causal variants used in attention mechanisms.

    Defines two types of causal biases:

    `UPPER_LEFT`: Represents upper-left triangular bias for standard causal attention.
    The equivalent pytorch code for constructing this bias is:

    .. code-block:: python

        torch.tril(torch.ones(size, dtype=torch.bool))

    For instance, with `shape=(3,4)`, the materialized bias tensor will be:

    .. code-block:: text

        [[1, 0, 0, 0],
         [1, 1, 0, 0],
         [1, 1, 1, 0]]


    `LOWER_RIGHT`: Represents lower-right triangular bias, the include values are aligned to the lower
    right corner of the matrix.

    The equivalent pytorch code for constructing this bias is:

    .. code-block:: python

        diagonal_offset = size[1] - size[0]
        torch.tril(
            torch.ones(size, dtype=torch.bool),
            diagonal=diagonal_offset,
        )

    For instance, with `shape=(3,4)`, the materialized bias tensor will be:

    .. code-block:: text

        [[1, 1, 0, 0],
         [1, 1, 1, 0],
         [1, 1, 1, 1]]

    Note that these variants are equivalent to each other when the sequence lengths of the query and key/value
    tensors are equal since the triangular matrix is square.

    .. warning:: This enum is a prototype and subject to change.
    N)__name__
__module____qualname____doc__r   
UPPER_LEFTLOWER_RIGHT     S/var/www/html/ai-engine/env/lib/python3.11/site-packages/torch/nn/attention/bias.pyr   r   !   s1        . .` J$&&KKKr   r   c                   8   e Zd ZdZdededefdZdej        dej	        fdZ
dej        dej	        fd	Zddeej                 dej	        fdZe	 	 	 	 ddej	        dej	        dej	        dd dededee         dedej	        fd            Zedd            Zd Zd
S )r   a  
    A bias representing causal attention patterns. For an overview of the bias structure, see the :class:`CausalVariant` enum.

    This class is used for defining causal (triangular) attention biases. For construing the bias, there exist
    two factory functions: :func:`causal_upper_left` and :func:`causal_lower_right`.

    Example:

    .. code-block:: python

        from torch.nn.attention.bias import causal_lower_right

        bsz, num_heads, seqlen_q, seqlen_kv, head_dim = 32, 8, 4, 12, 8

        # Create a lower-right causal bias
        attn_bias = causal_lower_right(seqlen_q, seqlen_kv)

        q = torch.randn(bsz, num_heads, seqlen_q, head_dim, device="cuda", dtype=torch.float16)
        k = torch.randn(bsz, num_heads, seqlen_kv, head_dim, device="cuda", dtype=torch.float16)
        v = torch.randn(bsz, num_heads, seqlen_kv, head_dim, device="cuda", dtype=torch.float16)

        out = F.scaled_dot_product_attention(q, k, v, attn_bias)

    .. warning:: This class is a prototype and subject to change.
    variant	seq_len_q
seq_len_kvc                     t          |t                    sJ || _        || _        || _        ||k    r!|t          j        k    rt          d           dS dS dS )a  
        Initializes the CausalBias instance with a specified variant and sequence lengths.

        Args:
            variant (CausalVariant): The type of causal bias to use (either UPPER_LEFT or LOWER_RIGHT).
            seq_len_q (int): The sequence length of the query tensor.
            seq_len_kv (int): The sequence length of the key/value tensor.

        Raises a warning if the LOWER_RIGHT variant is used with seq_len_q > seq_len_kv, as it may produce NaNs.
        zTLower right causal bias will produce NaNs in the output when seq_len_q > seq_len_kv!N)
isinstancer   r   r    r!   r   r   )selfr   r    r!   s       r   __init__zCausalBias.__init__q   st     '=11111"$z!!g1J&J&Jf     "!&J&Jr   devicereturnc                 ~    t          j        t          j        | j        | j        |t           j                            S )zUpper left causal biasr&   dtype)torchtrilonesr    r!   boolr$   r&   s     r   _upper_leftzCausalBias._upper_left   s2    zJt~tvUZXXX
 
 	
r   c                     | j         | j        z
  }t          j        t          j        | j        | j         |t          j                  |          S )zLower right causal biasr)   )diagonal)r!   r    r+   r,   r-   r.   )r$   r&   diagonal_offsets      r   _lower_rightzCausalBias._lower_right   sQ    /DN:zJej   %	
 
 
 	
r   Nc                     |t          j        d          }| j        t          j        k    r|                     |          S | j        t          j        k    r|                     |          S dS )a  
        Materializes the causal bias into a tensor form.

        Depending on the variant, this method generates either an upper-left or lower-right
        triangular matrix to represent the causal bias.

        Args:
            device (Optional[torch.device]): The device on which to create the tensor. Defaults to CPU.

        Returns:
            torch.Tensor: The materialized bias tensor.
        Ncpu)r+   r&   r   r   r   r0   r   r4   r/   s     r   _materializezCausalBias._materialize   sh     >\%((F<=333##F+++\]666$$V,,, 76r           Fquerykeyvalue	attn_mask	dropout_p	is_causalscale
enable_gqac                    |rt          d          |j        |j        k    s|j        t          j        k    rt          j        | ||d|d||          S |j        t          j        k    r~t          | ||d|||           t          | ||d|||          }t          |          rD|                     d          dz  dk    }	|                     d          }
t          |
|          }|	rt          j        j                            | dd|                     d          dz  z
  f          } t          j        j                            |dd|                    d          dz  z
  f          }t          j        j                            |dd|                    d          dz  z
  f          }t          j        j                            | |||dd|	          d         }t+          ||
          S t-          |          rd}t/          | ||          rd}t          j        j                            |                     d
d          |                    d
d          |                    d
d          ddddd|t5          |j                  ||d          d                             d
d          S t7          |           t          j        | |||                    | j                  |d||          S t          d|j                   )a8  
        Handles the logic for computing attention with the specified causal bias.

        Args:
            query (Tensor): Query tensor; shape :math:`(N, ..., L, E)`.
            key (Tensor): Key tensor; shape :math:`(N, ..., S, E)`.
            value (Tensor): Value tensor; shape :math:`(N, ..., S, Ev)`.
            attn_mask (CausalBias): The type of causal attention to apply.
                A boolean mask where a value of True indicates that the element *should* take part in attention.
                A float mask of the same type as query, key, value that is added to the attention score.
            dropout_p (float): Dropout probability; if greater than 0.0, dropout is applied
            is_causal (bool): If true, assumes upper left causal attention masking and errors if both attn_mask and is_causal
                are set.
            scale (optional float): Scaling factor applied prior to softmax. If None, the default value is set
                to :math:`\frac{1}{\sqrt{E}}`.
            enable_gqa (optional bool): If set to True, Grouped Query Attention (GQA) is enabled, by default it is set to False.

        Returns:
            output (Tensor): Attention output; shape :math:`(N, ..., L, Ev)`.

        Raises:
            ValueError: If the causal bias variant is not a CausalVariant type.

        z.CausalBias should not be used with causal=TrueNT)r<   r=   r>   r?   r@      r   F)r>   return_debug_maskr?         )
biascu_seqlens_qcu_seqlens_kmax_seqlen_qmax_seqlen_kr=   custom_mask_typecompute_log_sumexpr?   seqlen_kz<CausalBias.variant must be a CausalVariant type, but found: )
ValueErrorr    r!   r   r   r   Fscaled_dot_product_attentionr   r   r
   r   sizer   r+   nn
functionalpadopsaten#_scaled_dot_product_flash_attentionr   r   r   _efficient_attention_forward	transposeintr   r7   r&   )r9   r:   r;   r<   r=   r>   r?   r@   sdpa_paramsneeds_paddingog_head_sizeog_scaleoutrM   s                 r   	_dispatchzCausalBias._dispatch   s8   F  	OMNNN 9#777 M$<<<1#%	 	 	 	 -";;; UD)YPUVVV$sE4Iz K '{33 D %

2 2a 7$zz"~~+L%@@  X!H/33EAq5::b>>TUCU?U;VWWE(-11#1sxx||a?O;O7PQQC!H/33EAq5::b>>TUCU?U;VWWEinHH"&+" I    1lCCC*;77 %*"'sE:: .)-&y~BBOOAq))MM!Q''OOAq))!%!%!%!%'%():%;%;'9! C    Yq!__%  '{3335'44U\BB'#)	 	 	 	 byO`bb  r   r   c                 v    |i }|t           j        j        j        k    rt	          d           | j        |i |S )zjDefines the behavior of torch.nn.functional.scaled_dot_product_attention when the attn_bias is an AttnBiasNz5CausalBias only supports scaled_dot_product_attention)r+   rS   rT   rQ   NotImplementedErrorra   )clsfunctypesargskwargss        r   __torch_function__zCausalBias.__torch_function__  sO     >F58&CCC%G   s}d-f---r   c                 N    |                                                                  S N)r7   __repr__)r$   s    r   rl   zCausalBias.__repr__$  s       ""++---r   rk   )r8   FNF)r   N)r   r   r   r   r   r[   r%   r+   r&   Tensorr0   r4   r   r7   staticmethodfloatr.   ra   classmethodri   rl   r   r   r   r   r   V   s        4 # 3    (
%, 
5< 
 
 
 

5< 
EL 
 
 
 
- -8EL#9 -U\ - - - -(  !% m m|m\m |m  	m
 m m m m 
m m m \m^ . . . [.. . . . .r   r   r'   c                  |    t          |           dk    s
J d            | \  }}t          t          j        ||          S )a&  
    Creates an upper-left triangular causal bias.

    This function generates a upper-left triangular matrix to represent causal attention bias with a
    diagonal offset set so that the inclusive values are aligned to the upper left corner of the matrix.
    This equivalent to the `is_causal=True` argument in `scaled_dot_product_attention`.

    The equivalent pytorch code for constructing this bias is:

    .. code-block:: python

        torch.tril(torch.ones(size, dtype=torch.bool))

    For instance, with `shape=(3,4)`, the materialized bias tensor will be:

    .. code-block:: text

        [[1, 0, 0, 0],
         [1, 1, 0, 0],
         [1, 1, 1, 0]]

    Args:
        size: The size of the bias matrix.

    Returns:
        CausalBias: The UPPER_LEFT triangular causal bias variant.
    rF   z*causal_upper_left only supports 2D tensors)lenr   r   r   rR   r    r!   s      r   r   r   (  s=    8 t99>>>G>>> Izm.	:FFFr   c                  |    t          |           dk    s
J d            | \  }}t          t          j        ||          S )a:  
    Creates a lower-right triangular causal bias.

    This function generates a lower-right triangular matrix to represent causal attention bias with a
    diagonal offset set so that the inclusive values are aligned to the lower right corner of the matrix.

    The equivalent pytorch code for constructing this bias is:

    .. code-block:: python

        diagonal_offset = size[1] - size[0]
        torch.tril(
            torch.ones(size, dtype=torch.bool),
            diagonal=diagonal_offset,
        )

    For instance, with `shape=(3,4)`, the materialized bias tensor will be:

    .. code-block:: text

        [[1, 1, 0, 0],
         [1, 1, 1, 0],
         [1, 1, 1, 1]]

    Args:
        size: The size of the bias matrix.

    Returns:
        CausalBias: The LOWER_RIGHT triangular causal bias variant.
    rF   z+causal_lower_right only supports 2D tensors)rr   r   r   r   rs   s      r   r   r   I  s=    > t99>>>H>>> Izm/JGGGr   )!r   enumr   r   typingr   warningsr   r+   torch.nn.functionalrS   rT   rP   torch.backends.cudar   r   r	   r
   torch.nn.attentionr   torch.nn.attention._utilsr   r   r   r   __all___dynamoallow_in_graphr   rm   r   r   r   r   r   r   <module>r      s   I I                                          6 5 5 5 5 5            U
T
T   9 : : :   4 5 5 5   8 9 9 9   Z ( ( (2 2 2 2 2G 2 2 2jO. O. O. O. O. O. O. O.dG
 G G G GB!H !H !H !H !H !H !Hr   