
    Χg              
       4   U d Z ddlZddlmZmZmZ ddlZg Zee         e	d<   dej
        defdZdej
        d	edej
        fd
Zdedee         defdZg dZdeeej        f         defdZ	 	 	 	 ddej
        dej
        dej
        deej
                 fdZdS )zCDefines utilities for interacting with scaled_dot_product_attention    N)ListOptionalUnion__all__tensorsreturnc                  4    t          d | D                       S )z0Returns True if any of the tensors requires gradc              3   $   K   | ]}|j         V  d S )N)requires_grad).0ts     U/var/www/html/ai-engine/env/lib/python3.11/site-packages/torch/nn/attention/_utils.py	<genexpr>z'_input_requires_grad.<locals>.<genexpr>   s$      001q000000    )any)r   s    r   _input_requires_gradr      s    00000000r   inpt_tensorog_sizec                 P    |                      d          |k    r| dd|f         S | S )z'Handles the unpad of the last dimension.N)size)r   r   s     r   _postprocess_flash_outputr      s4    w&&3=))r   head_dim_sizescalec                 8    ||S dt          j        |           z  S )z
    For FlashAttention we pad the head dimension to be a multiple of 8 so we need to scale the output
    by the original head size and not the padded.
    Ng      ?)mathsqrt)r   r   s     r   _calculate_scaler      s$    
 =))))r   )
                @         i   i   nc                     | t           v S )z:Returns true if the head dim is supported by FlexAttention)_SUPPORTED_HEAD_DIMS)r'   s    r   _supported_head_dimr*   %   s    $$$r           Fquerykeyvalue	attn_maskc           	      T   | j         |j         k    s| j         |j         k    r(t          d| j          d|j          d|j          d          | j        |j        k    s| j        |j        k    r(t          d| j         d|j         d|j         d          |                                 dk     s0|                                dk     s|                                dk     rOt          d	|                                  d
|                                 d|                                 d          d S )NzLExpected query, key, and value to have the same dtype, but got query.dtype: z, key.dtype: z, and value.dtype: z	 instead.zSExpected query, key, and value to have the same device type, but got query.device: z, key.device: z, and value.device: r   zUExpected query, key, and value to all be  at least 2 dimensional, but got query.dim: z, key.dim: z and value.dim: )dtype
ValueErrordevicedim)r,   r-   r.   r/   	dropout_p	is_causalr   s          r   _validate_sdpa_inputr7   *   so    {ci5;%+#=#=7$)K7 7>Ai7 7 %7 7 7
 
 	

 |sz!!U\U\%A%A9%*\9 9AD9 9!&9 9 9
 
 	

 yy{{Q#''))a--599;;??Yyy{{Y Y'*wwyyY YBG))++Y Y Y
 
 	
 ,;?r   )Nr+   FN)__doc__r   typingr   r   r   torchr   str__annotations__Tensorboolr   intr   floatr   r)   SymIntr*   r7    r   r   <module>rC      sq   I I I  ( ( ( ( ( ( ( ( ( (  c   15< 1D 1 1 1 1
5< # %,    *C * *E * * * * BAA %5el!23 % % % % % )-

 
<
	
 <
 %	
 
 
 
 
 
r   