
     Ng                         d dl mZ d dlmZmZmZmZ d dlZd dl	m
Z
 d dlmZ d dlmZmZ d dlmZmZmZmZ d dlmZ  ee          Z G d	 d
          Z G d de
          ZdS )    )	getLogger)ListOptionalTupleUnionN)Fusion)AttentionMaskFormat)FusionUtilsNumpyHelper)	NodeProtoTensorProtohelpernumpy_helper)	OnnxModelc                   F    e Zd ZdZdefdZdefdZd Zd Z	de
d	e
fd
ZdS )AttentionMask:
    Fuse Attention subgraph into one Attention node.
    modelc                     || _         i | _        i | _        t          |          | _        t
          j        | _        |                                | _	        d S N)
r   mask_indicemask_castedr
   utilsr	   MaskIndexEndmask_formatget_opset_versionopset_version)selfr   s     e/var/www/html/ai-engine/env/lib/python3.11/site-packages/onnxruntime/transformers/fusion_attention.py__init__zAttentionMask.__init__   sM    
 ''
.;"4466    r   c                     || _         d S r   )r   )r   r   s     r   set_mask_formatzAttentionMask.set_mask_format!   s    &r!   c                 R    || j         v r|| j         |         k    sJ || j         |<   d S r   )r   )r   mask
mask_indexs      r   set_mask_indicezAttentionMask.set_mask_indice$   s<    4###!1$!77777!+r!   c                 x    t          | j                  dk    sJ t          t          | j                            S Nr   )lenr   nextiter)r   s    r   get_first_maskzAttentionMask.get_first_mask)   s7    4#$$q((((D)**+++r!   inputreturnc           	      ^   | j         t          j        k    rd S || j        v r| j        |         S | j                            |          r| j                            |          \  }}n| j                            |          \  }}d}|r
|| j	        |<   | j         t          j
        k    r|| j        |<   |S | j                            d          }| j        dk     rwt          j        d|g|g| j                            dd                    }|j                            t          j        ddg          t          j        d	d
          g           nd}| j                            |          =| j                            t          j        |t*          j        dgdgd                     t          j        d||g|g| j                            dd                    }|j                            t          j        d	d
          g           | j                            |           || j        |<   |S )NTr&      	ReduceSumMaskReduceSuminputsoutputsnameaxes   keepdimsr   ort_const_1_reduce_sum_axesFr7   	data_typedimsvalsraw)r   r	   NoMaskr   r   find_graph_inputr   cast_graph_input_to_int32cast_input_to_int32r   r   create_node_namer   r   	make_node	attributeextendmake_attributeget_initializeradd_initializermake_tensorr   INT64add_node)r   r.   casted
input_name	cast_nodeoutput_namemask_index_node	axes_names           r   process_maskzAttentionMask.process_mask-   sW   29994D$$$#E** :&&u-- 	!%!E!Ee!L!LFJJ$(J$B$B5$I$I!J	F 	1&0DU# 2@@@&0DU# j11,??""$."|$Z00oNN	  O %,,f.CFQC.P.PRXRghrtuRvRv-wxxxx 6Iz)))44<
**&&"-"3SS!     %."I.$Z00oNN	  O %,,f.CJPQ.R.R-STTT
O,,,"-r!   N)__name__
__module____qualname____doc__r   r    r	   r#   r'   r-   strrU    r!   r   r   r      s         7i 7 7 7 7'+> ' ' ' ', , ,
, , ,8# 8# 8 8 8 8 8 8r!   r   c            )           e Zd ZdZdddddgfdededed	ee         d
edede	e
         f fdZdedeeef         fdZdedeeef         fdZdefdZde
fdZde
de
de
fdZde
de
de
e
ffdZde
de
de
fdZde
de
fdZded eedf         d!eedf         d"e
deedf         f
d#Zd$ed%ed&eded eedf         d!eedf         dedeedf         fd'Z	 	 	 	 	 	 	 d6d$ed%eee
df         d&eee
df         ded eedf         d!eedf         deded)e
d*e
de
de
de
d+e
d,e
d-edeedf         f"d.Z	 	 	 	 	 	 	 d7d/e
d$ed%ed&eded ed!ededed0e
d)e
d1e
de
de
d+e
d,e
d2ee         d3edeedf         f&d4Zd5 Z xZS )8FusionAttentionr   NFSkipLayerNormalizationLayerNormalizationr   hidden_size	num_headsattention_maskuse_multi_head_attention!disable_multi_head_attention_biassearch_op_typesc                    |rdnd}t                                          |||           || _        || _        |r|nt	          |          | _        || _        || _        d | _        d| _	        d| _
        d | _        d| _        d S )NMultiHeadAttention	AttentionT)superr    r`   ra   r   rb   rc   rd   mask_filter_valuenum_heads_warninghidden_size_warningshape_infershape_infer_done)
r   r   r`   ra   rb   rc   rd   re   attention_op_name	__class__s
            r   r    zFusionAttention.__init__m   s     5M]00R] 1?CCC&"0>XnnMRWDXDX(@%1R.!% "&#'  $r!   concatr/   c                    t          |j                  dk    r| j                            |j        d                   }| j                            |j        d                   }t	          |t
          j                  rI|j        dk    r>t	          |t
          j                  r$|j        dk    r|d         |d         |d         z  fS | j        | j	        fS )aU  
        Detect num_heads and hidden_size from Concat node in the following subgraph:

        SkipLayerNormalization or EmbedLayerNormalization
                        /        |
                     MatMul    Shape
                        |        |
                       Add     Gather(indices=0)
                        |        |
                        |      Unsqueeze
                        |        |
                        |     Concat (*, -1, 12, 64)
                        |     /
                       Reshape
                          |
                       Transpose
                 r9   r   )
r*   r.   r   get_constant_value
isinstancenpndarraysizera   r`   )r   rq   ra   	head_sizes       r   )get_num_heads_and_hidden_size_from_concatz9FusionAttention.get_num_heads_and_hidden_size_from_concat   s    $ v|!!
55fl1oFFI
55fl1oFFI9bj11ANa''y"*55 (Na'' |Yq\IaL%@@@~t///r!   	reshape_qc                 f   | j                             |j        d                   }|s| j                             |d          }| |j        dk    r|                     |          S t                              |j        d          d           | j        | j	        fS t          j        |          }t          |          dk    s|d         dk    s|d         dk    r,t                              d	| d
           | j        | j	        fS |d         }|d         }||z  }| j        dk    r?|| j        k    r4| j        r-t                              d| j         d| d           d| _        | j	        dk    r?|| j	        k    r4| j        r-t                              d| j	         d| d           d| _        ||fS )zDetect num_heads and hidden_size from a reshape node.

        Args:
            reshape_q (NodeProto): reshape node for Q

        Returns:
            Tuple[int, int]: num_heads and hidden_size
        r9   NConcatz is not initializer.rs   rt   r   ru   zq_shape_value=z7. Expected value are like [0, 0, num_heads, head_size].z--num_heads is z. Detected value is z. Using detected value.Fz--hidden_size is )r   rJ   r.   
get_parentop_typer|   loggerdebugra   r`   r   to_arrayr*   rk   warningrl   )r   r}   q_shaperq   q_shape_valuera   r{   r`   s           r   get_num_heads_and_hidden_sizez-FusionAttention.get_num_heads_and_hidden_size   s    *,,Y_Q-?@@?Z**9a88F!fn&@&@EEfMMMLLIOA.DDDEEE>4#333#,W55}""}Q'71'<'<a@PTU@U@ULLp-pppqqq>4#333!!$	!!$	)+>A)t~"="=% /wwwU^wwwxxx).&aK43C$C$C' 1r(8rrkrrr   ,1(+%%r!   add_qkc                    | j         s'| j                            d          | _        d| _         | j        d S | j                            |j        d                   }| j                            |j        d                   }|| t                              d| d           d S ||k    r t                              d| d           d S |j        d         S )	NT)updater   r9   zone of the inputs of z is Nonezthe shape of two inputs of z is not same)rn   r   infer_runtime_shaperm   get_edge_shaper.   r   r   )r   r   input_0_shapeinput_1_shapes       r   get_add_qk_strzFusionAttention.get_add_qk_str   s    $ 	)#z==T=JJD$(D!#4(77QHH(77QHH M$9LLAAAABBB4M))LLKvKKKLLL4|Ar!   c                    dz   t          t          fd| j                            }t          |          dk    rS t          |          dk    sJ | j                            d          }t          j        dfdt          | j	                  D             g|d          }| j        
                    |           | j        | j        |<   S )N_maskc                 &    | j         d         k    S r)   )output)nodemask_output_names    r   <lambda>z0FusionAttention.reshape_add_qk.<locals>.<lambda>   s    t{1~AQ/Q r!   r9   r   r   c                     g | ]}S r[   r[   ).0_r   s     r   
<listcomp>z2FusionAttention.reshape_add_qk.<locals>.<listcomp>   s    :::qF:::r!   r5   r6   r7   axis)listfilternodes_to_addr*   r   rE   r   rF   rangera   appendthis_graph_namenode_name_to_graph_name)r   r   concat_nodeconcat_node_nameconcat_add_qk_fp32r   s    `   @r   reshape_add_qkzFusionAttention.reshape_add_qk   s     "G+ 6"Q"Q"Q"QSWSdeeff{q  ##;1$$$$:66x@@#-::::E$.$9$9:::%&!
 
 
 	  !34449=9M$%56r!   past_kpast_vc                     | j                             d          }| j                             d          }|dz                       dd          }|dz                       dd          }t          j        d|g|g|dg          }t          j        d|g|g|dg          }| j                            |           | j                            |           | j        | j        |<   | j        | j        |<   | j                             d          }	|                    dd	                              dd                              d
d          }
t          j        d||g|
g|	d          }| j                            |           | j        | j        |	<   |
S )zConcatenate past_k and past_v inputs to create past_kv input.

        Args:
            past_k (str): name of past K value
            past_v (str): name of past V value

        Returns:
            kv_output_name (str): name of past KV value
        	Unsqueeze_5d.r   r   )r5   r6   r7   r8   r   z.valuez.kv_value_kvr   )	r   rE   replacer   rF   r   r   r   r   )r   r   r   unsqueeze_k_nameunsqueeze_v_name	k_5d_name	v_5d_namek_5dv_5dr   kv_output_name	concat_kvs               r   r   zFusionAttention.concat_kv   s     :66{CC:66{CCe^,,S#66	e^,,S#66	8K!
 
 
 8K!
 
 
 	  &&&  &&&9=9M$%569=9M$%56  :66x@@%88@@cJJRRS[]bcc$y)#$!
 
 
	 	  +++9=9M$%56r!   c                    d}| j                             |          }|Wt          j        t	          j        dd| j         j        gd          |          }| j                             || j                   | j         	                    d          }| j         	                    d          }|d	z   
                    d
d          }|d	z   
                    d
d          }t          j        d||g|g|          }	t          j        d||g|g|          }
| j                            |	           | j                            |
           | j        | j        |<   | j        | j        |<   ||fS )ah  Reshape past_k and past_v from 4D to 3D to use as inputs for multihead attention node.

        Args:
            past_k (str): name of past K value of shape 4D
            past_v (str): name of past V value of shape 4D

        Returns:
            k_3d (str): name of past K value of shape 3D
            v_3d (str): name of past V value of shape 3D
        kv_4d_to_3dNr   int64dtyper7   Reshape_3dr   r   r4   )r   rJ   r   
from_arrayrx   arrayr`   rK   r   rE   r   r   rF   r   r   r   )r   r   r   new_dims_namenew_dimsreshape_k_namereshape_v_name	k_3d_name	v_3d_namek_3dv_3ds              r   
reshape_kvzFusionAttention.reshape_kv4  s    &:--m<<#.!R!78HHH}  H J&&x1EFFF44Y??44Y??e^,,S#66	e^,,S#66	M*K	
 
 
 M*K	
 
 
 	  &&&  &&&7;7K$^47;7K$^4)##r!   present_k_namepresent_v_namekv_nodec                 $   d\  }}| j                             |          }| j                             |          }|Jt          j        t	          j        dd          |          }| j                             || j                   |Jt          j        t	          j        dd          |          }| j                             || j                   | j                             d          }| j                             d          }	t          j
        d||g|g|d	          }
t          j
        d||g|g|	d	          }| j                            |
           | j                            |           | j        | j        |<   | j        | j        |	<   dS )
a?  Split kv_node containing present KV values into separate present K and present V values.

        Args:
            present_k_name (str): name of output to store present K value in
            present_v_name (str): name of output to store present V value in
            kv_node (str): name of present KV values
        )index_0index_1Nr   r   r   r   r9   Gatherr   )r   rJ   r   r   rx   r   rK   r   rE   r   rF   r   r   r   )r   r   r   r   k_indexv_indexk_dimv_dimgather_k_namegather_v_name	present_k	present_vs               r   split_kvzFusionAttention.split_kve  s    0
**733
**733= +BHQg,F,F,FWUUUEJ&&ud.BCCC= +BHQg,F,F,FWUUUEJ&&ud.BCCC 
33H==
33H==$W%#$
 
 
	 $W%#$
 
 
	 	  +++  +++6:6J$]36:6J$]333r!   c                    |dz                        dd          }|dz                        dd          }| j                            d          }| j                            d          }t          j        d|g|g|g d          }t          j        d|g|g|g d          }| j                            |           | j                            |           | j        | j        |<   | j        | j        |<   ||fS )a}  Transpose past_k and past_v from (B,N,P,H) to (B,P,N,H)

        Args:
            past_k (str): name of past K value of shape (B,N,P,H)
            past_v (str): name of past V value of shape (B,N,P,H)

        Returns:
            past_k_transpose (str): name of past K value of shape (B,P,N,H)
            past_v_transpose (str): name of past V value of shape (B,P,N,H)
        _transposedr   r   	Transpose)r   rt   r9   ru   )r5   r6   r7   perm)	r   r   rE   r   rF   r   r   r   r   )	r   r   r   past_k_transposepast_v_transposetranspose_k_nametranspose_v_nametranspose_ktranspose_vs	            r   transpose_kvzFusionAttention.transpose_kv  s$    #]2;;CEE"]2;;CEE:66{CC:66{CC&8%&!
 
 
 &8%&!
 
 
 	  ---  ---9=9M$%569=9M$%56!111r!   q_addk_addv_addname_prefixc                 D   | j                             |j        d                   p$| j                             |j        d                   }t          j        |          }t          j        |          }t          j        |          }|^| j                             |j        d                   p$| j                             |j        d                   }	t          j        |	          }|^| j                             |j        d                   p$| j                             |j        d                   }
t          j        |
          }t          j        |||fd          }dt          j        |j	                  z  }|dz   }| 
                    ||j        |g|           |S )Nr9   r   r   ru   	_qkv_biasr7   r=   r>   r?   )r   rJ   r.   r   r   rx   
zeros_likestackprodshaperK   r=   )r   r   r   r   r   q_biasqbkbvbk_biasv_biasqkv_biasqkv_bias_dim	bias_names                 r   create_combined_qkv_biasz(FusionAttention.create_combined_qkv_bias  s|    ++EKN;;itz?Y?YZ_ZefgZh?i?i!&))]2]2Z//A??m4:C]C]^c^ijk^lCmCmF%f--BZ//A??m4:C]C]^c^ijk^lCmCmF%f--B8RRLq1112728,,,+-	&	 	 	
 	
 	
 r!   q_matmulk_matmulv_matmulc                    | j                             d          }|j        d         |j        d         k    r|j        d         |j        d         k    sJ | j                             |j        d                   }	| j                             |j        d                   }
| j                             |j        d                   }t	          j        |	          }t	          j        |
          }t	          j        |          }|j        |j        k    r|j        |j        k    sJ |j        d         }t          j        |||fd          	                    |d|z  f          }|dz   }| 
                    ||	j        |j        d         |j        d         g|           |dz   }t          j        d|j        d         |g|g|	          }| j        | j        |<   |g}|d
z   }| 
                    |t           j        dgdgd           |dz   }| 
                    |t           j        dg|gd           |dz   }| 
                    |t           j        dgd|z  gd           |dz   }| 
                    |t           j        dgd|z  gd           |dz   }| 
                    |t           j        dgdgd           |dz   }t          j        d||||g|g| j                             d          	          }| j        | j        |j        <   |dz   }t          j        d||||g|g| j                             d          	          }| j        | j        |j        <   |dz   }t          j        d||||g|g| j                             d          	          }| j        | j        |j        <   |} |}!|}"|                    |||g           | j        r|| j                             |j        d                   rdnd}#t          j        t	          j        | j                             |j        |#                                       r8||j        d|#z
  <   |} |                    |           | j        | j        |j        <   || j                             |j        d                   rdnd}#t          j        t	          j        | j                             |j        |#                                       r8||j        d|#z
  <   |}!|                    |           | j        | j        |j        <   || j                             |j        d                   rdnd}#t          j        t	          j        | j                             |j        |#                                       r8||j        d|#z
  <   |}"|                    |           | j        | j        |j        <   | j                            |           | |!|"fS )a  Create packed QKV MatMul node before MultiHeadAttention node.
           This is for the scenario where an Attention node should be created but cannot be created
           because past_key and past_value are separate inputs and not one concatenated input.

        Args:
            q_matmul (NodeProto): name of MatMul from Q path - (batch_size, sequence_length, hidden_size)
            k_matmul (NodeProto): name of MatMul from K path - (batch_size, sequence_length, hidden_size)
            v_matmul (NodeProto): name of MatMul from V path - (batch_size, sequence_length, hidden_size)
            q_add (NodeProto): name of Add from Q path
            k_add (NodeProto): name of Add from K path
            v_add (NodeProto): name of Add from V path
            num_heads (int): number of heads

        Returns:
            Union[NodeProto, None]: the node created or None if failed.
        MatMulr   r9   r   ru   _qkv_weightr   _qkv_outr4   _q_start_indexFr<   _k_start_index_v_start_indexrt   _end_of_qkv_index_qkv_last_axisr   _q_outSlice_k_out_v_out)r   rE   r.   rJ   r   r   r   rx   r   reshaperK   r=   r   rF   r   r   r   rM   r7   rH   rd   anyr   r   )$r   r   r   r  r   r   r   ra   matmul_node_nameq_weightk_weightv_weightqwkwvwd
qkv_weightqkv_weight_nameqkv_matmul_output
qkv_matmul	qkv_nodesq_slice_namek_slice_namev_slice_nameend_of_qkv_nameqkv_last_axis_nameq_slice_outputq_slicek_slice_outputk_slicev_slice_outputv_sliceq_outputk_outputv_outputinitializer_inputs$                                       r   create_packed_qkv_matmul_nodez-FusionAttention.create_packed_qkv_matmul_node  sP   4  :66x@@ ~a HN1$555(.:Kx~^_O`:`:`:`:` :--hnQ.?@@:--hnQ.?@@:--hnQ.?@@!(++!(++!(++x28##BH(<(<(<(<HQKXr2rl333;;QAJGG
*]: ("1%z'7':;	 	 	
 	
 	
" -z9%N1%7&'!	
 
 

 :>9M$%56L	 (*::,+:KSTRU]^\_ejkkk'*::,+:KSTRU]^\_ejkkk'*::,+:KSTRU]^ab]b\cinooo*-@@/[=NVWUX`ade`e_flqrrr-0@@"4@QYZX[cebflqrrr)H4"%|\CUV#$,,W55	
 
 
 6:5I$W\2)H4"%|\CUV#$,,W55	
 
 
 6:5I$W\2)H4"%|_FXY#$,,W55	
 
 
 6:5I$W\2'7G45551 	T )-)C)CEKPQN)S)S$ZAAYZ!6+.tz/I/I%+VgJh/i/ijjkk T9GEK$5 56$H$$U+++?C?SD0< )-)C)CEKPQN)S)S$ZAAYZ!6+.tz/I/I%+VgJh/i/ijjkk T9GEK$5 56$H$$U+++?C?SD0< )-)C)CEKPQN)S)S$ZAAYZ!6+.tz/I/I%+VgJh/i/ijjkk T9GEK$5 56$H$$U+++?C?SD0< 	  +++8++r!    r   key_padding_maskr   r   
packed_qkvc           	         |dk    sJ |dk    r+||z  dk    r"t                               d| d|            dS t          d | j                                        j        D                       }| j                            d          }g }|rZ|                     |||||||          \  }}}|                    |j	        d         |j	        d         |j	        d         g           n0t          |          t          u rt          |          t          u r{| j        r:|                    |j	        d         |j	        d         |j	        d         g           n|                    |j	        d         |j	        d         |j	        d         g           nt          |          t          k    rot          |          t          k    rW||v rS||v rO| j        r$|                    |j	        d         ||g           n&|                    |j	        d         ||g           ndS | j        s.|                     ||||          }|                    |           n|                    d           |r|r|                    |
|||g           n|
s|r|                    |
|g           |	g}|r|r|                    ||g           t!          j        d|||	          }d
|_        |j                            t!          j        d|          g           |S )a[  Create a MultiHeadAttention node.

        Args:
            q_matmul (NodeProto): name of MatMul from Q path - (batch_size, sequence_length, hidden_size)
            k_matmul (NodeProto): name of MatMul from K path - (batch_size, sequence_length, hidden_size) or (batch_size, num_heads, past_sequence_length, head_size)
            v_matmul (NodeProto): name of MatMul from V path - (batch_size, sequence_length, hidden_size) or (batch_size, num_heads, past_sequence_length, head_size)
            q_add (NodeProto): name of Add from Q path
            k_add (NodeProto): name of Add from K path
            v_add (NodeProto): name of Add from V path
            num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
            hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
            output (str): output name of MHA
            key_padding_mask (str): name of key padding mask
            add_qk (str): name of add after Q x K'
            past_k (str): name of past K value - (batch_size, num_heads, past_sequence_length, head_size)
            past_v (str): name of past V value - (batch_size, num_heads, past_sequence_length, head_size)
            present_k (str): name of present K value - (batch_size, num_heads, sequence_length, head_size)
            present_v (str): name of present V value - (batch_size, num_heads, sequence_length, head_size)
            packed_qkv (bool): whether to combine MatMuls from Q, K, V paths
                               Note: This is for the scenario where an Attention node should be created but cannot be created
                               because past_key and past_value are separate inputs and not one concatenated input.

        Returns:
            Union[NodeProto, None]: the node created or None if failed.
        r   input hidden size # is not a multiple of num of heads Nc                     g | ]	}|j         
S r[   r   )r   r   s     r   r   zCFusionAttention.create_multihead_attention_node.<locals>.<listcomp>  s     P P Pt P P Pr!   rh   r.  rg   r4   com.microsoftra   )r   r   setr   graphr.   rE   r-  rH   r   typer   rd   rZ   r   r   r   rF   domainrG   rI   )r   r   r   r  r   r   r   ra   r`   r   r/  r   r   r   r   r   r0  graph_input_namesmha_node_name
mha_inputsr$  r&  r(  r   mha_outputsmha_nodes                             r   create_multihead_attention_nodez/FusionAttention.create_multihead_attention_nodef  sb   Z 1}}}}??i 7A==LLikii^giijjj4 P Ptz7G7G7I7I7O P P PQQ
33K@@ 
 	(,(J(J(HeUE9) )%GWg w~a0'.2CW^TUEVWXXXX(^^y((T(^^y-H-H5 `!!5<?HOA4FUV"XYYYY!!8?1#5xq7I8?[\K]"^____NNc!!X#%%------5 L!!5<?Hh"GHHHH!!8?1#5x"JKKKK4 5 	"55eUE=YYIi((((b!!!  	:f 	:/HIIII 	: 	:/8999 h 	7 	7	95666# 	
 
 
 *!!6#8i#P#P"QRRRr!   r&   r.   
add_qk_strscalecausalc                    |dk    sJ |	dk    r+|	|z  dk    r"t                               d|	 d|            dS d}|||d}| j                            |j        d                   }| j                            |j        d                   }| j                            |j        d                   }d\  }}}|r| j                            |j        d                   p$| j                            |j        d                   }| j                            |j        d                   p$| j                            |j        d                   }| j                            |j        d                   p$| j                            |j        d                   }|r|r|r|sdS |t          |j        d          d	           dS t          j        |          }t          j        |          }t          j        |          }|j        |j        k    sJ |j        d         }|j        d         }|j        d         }||cxk    r|k    sn J |	dk    r'|	|k    r!t           	                    d
|	 d| d           d} |j        |j        k    rd} t          j        |j        dd                   }!t          j        |j        dd                   }"t          j        |j        dd                   }#d}$| r"t          j        |||fd          }%|!|"z   |#z   }$nt          j        |||fd          }%d|!z  }$|rt          j        |          }&t          j        |          }'t          j        |          }(t          j        |&j                  })t          j        |'j                  }*t          j        |(j                  }+|)|*cxk    r|!k    sn J |+|#k    sJ | r"t          j        |&|'|(fd          },|)|*z   |+z   }-nt          j        |&|'|(fd          },d|)z  }-| j                            d          }.| j        s#|                     |.dz   |j        ||$g|%           |r"|                     |.dz   |j        |-g|,           | j        rz|rt                               d           dS |j        d         |j        d         |j        d         |.dz   g}/||/                    |           t)          j        d|/|g|.          }0n9|
|.dz   |r|.dz   ndg}/||/                    |           n|/                    d           |o|}1|1r+|                     ||          }2|/                    |2           |A|                     |          }3|1s|/                    d           |/                    |3           |g}4|rl|rj|                    dd                              dd                              dd          }5|4                    |5           |                     |||5           t)          j        d|/|4|.          }0d|0_        |0j                            t)          j        d|          g           |r.|0j                            t)          j        dd          g           |.|0j                            t)          j        d|          g           | r1|0j                            t)          j        d|!|"|#g          g           | j        @|0j                            t)          j        d t?          | j                            g           |0S )!a+  Create an Attention node.

        Args:
            mask_index (str): mask input
            q_matmul (NodeProto): MatMul node in fully connection for Q
            k_matmul (NodeProto): MatMul node in fully connection for K
            v_matmul (NodeProto): MatMul node in fully connection for V
            q_add (NodeProto): Add bias node in fully connection for Q
            k_add (NodeProto): Add bias node in fully connection for K
            v_add (NodeProto): Add bias node in fully connection for V
            num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
            hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
            input (str): input name
            output (str): output name
            add_qk_str (str): name of Add node after Q x K'
            past_k (str): name of input for past K value
            past_v (str): name of input for past V value
            present_k (str): name of output to store present K value
            present_v (str): name of output to store present V value
            scale: scale before softmax
            causal: whether it is uni-directional mask.

        Returns:
            Union[NodeProto, None]: the node created or None if failed.
        r   r2  r3  NTFr9   )NNNzl is not an initializer. Please set do_constant_folding=True in torch.onnx.export to unblock attention fusionzInput hidden size (z3) is not same as weight matrix dimension of q,k,v (z:). Please provide a correct input hidden size or pass in 0r   ru   rh   r  r   r   zVMultiHeadAttention does not support relative_position_bias: cannot fuse the attention.rg   r4   r.  z.key_keyr   r   r5  ra   unidirectionalrA  qkv_hidden_sizesrj   ) r   r   r   rJ   r.   printr   r   r   r   rx   r   concatenater   rE   rc   rK   r=   r   r   r   rF   r   r   r   r   r9  rG   rH   rI   rj   float)6r   r&   r   r   r  r   r   r   ra   r`   r.   r   r@  r   r   r   r   rA  rB  has_biasr  r  r  r   r   r   r  r  r  
qw_in_size
kw_in_size
vw_in_sizeis_qkv_diff_dimsqw_out_sizekw_out_sizevw_out_sizeqkv_weight_dimr  r   r   r   q_bias_shapek_bias_shapev_bias_shaper   r   attention_node_nameattention_inputsattention_nodepast_existspast_kvr   attention_outputs
present_kvs6                                                         r   create_attention_nodez%FusionAttention.create_attention_node  s@   \ 1}}}}??i 7A==LLikii^giijjj4=U]u}H:--hnQ.?@@:--hnQ.?@@:--hnQ.?@@!1 	Z//A??m4:C]C]^c^ijk^lCmCmFZ//A??m4:C]C]^c^ijk^lCmCmFZ//A??m4:C]C]^c^ijk^lCmCmF  f  t>!$ g g g   4!(++!(++!(++ x28####Xa[
Xa[
Xa[
Z5555:555555??{j88NNJk J Jfp J J J  
 !8rx#
 gbhqrrl++gbhqrrl++gbhqrrl++ 	-R1===J(;6DNN2r2,Q777J_N 	0%f--B%f--B%f--B728,,L728,,L728,,L<>>>>;>>>>>>;.... 0>2r2,Q???+l:\I8RRLq999 </"j99+FF, 	  (=8", .1	 !     	  (;6 *"^	 !    ( 8	 uvvvt """#k1	  % ''
333#-$'(	  NN #m35=E#k112 
 % ''
3333 ''+++ +VK 1..88 ''000%#'#6#6z#B#B  # 0$++B/// ''(8999!' @Y @&..vr::BB62NNVVWZ\_``
!((444iJ???#-')(	  N !0 '')>{I)V)V(WXXX 	Z$++V-BCSUV-W-W,XYYY$++V-B7E-R-R,STTT 	$++&'9KVa;bccd   !-$++V-BCVX]^b^tXuXu-v-v,wxxxr!   c                    |}|j         dk    r#| j                            |dd          }||}nd S | j                            |g dg d          }d }|	|\  }}}	}
}n,| j                            |g dg d          }||\  }}}
}nd S g }t	          |j                  D ]7\  }}||vr
||d         j        d         k    r"|                    |           8t          |          dk    rd S |d         }	 | j                            |d	d          }|p||j        d                  }|6t          |          d
k    r#|d         }|j         dk    r|j        d         }nWd S |!t          |          dk    r|j        d         }n2d S |j         dk    r%||         }|D ]}|j         dk    r|j        d         }	 ||         }|j         dk    r%t          |j                  dk    r|j        d         }||         }d |D             }|	                    d          dk    rd S | j                            |g dg d          }|t                              d           d S |\  }}}}d}d}d}g dg dfg dg dfg dg dfg dg dfg dg dfd}d }|                                D ]H\  }} | j                            || d         | d                   }|0|dk    rd }|d!k    rd }|d"k    rd } |t                              d#           d S d }!d }"d }#|r|\  }}#}"}n|r|\  }}!}#}"n|r|\  }}}"n|\  }}!}}"| j                            |"g dg d$          }$|$>| j                            |"g d%g d&          }$|$t                              d'           d S |$d(         }%|$d)         }&|$d*         }'| j                            |"g dg d          }(|(>| j                            |"g d+g d,          }(|(t                              d-           d S |(d)         })|(d*         }*d }+d },|r6| j                            |#g d.g dfg d/g dfg d0g d1fg|          \  }}+}n|rg| j                            |#g d2g d1fg d/g dfg|          \  }}+}|!6|                     |!          },|,t                              d3|!            d S n1|rn.| j                            |!g d4g d5fg d6g d7fg|          \  }}+}|s|+t                              d8           d S |sTt          |+          dk    rA|+d         j         d	k    r0| j                            |+d                   \  }}-|-d9k    r|-| _        |j        d         |k    rn|'j        d         |k    r^|*j        d         |k    rN|s+| j                            |+d*         j        d                   nd }.||	n|
}/|                     |%          \  }0}1|0dk    s|1dk    rt                              d:           d S |                     |.|'|*||&|)||0|1||/j        d         |,          }2|2d S | j                            |2           | j        | j        |2j        <   ||j        d         }3d;|3z   }4|                     d<|3z   t6          j        dgt;          j        dd|0t?          |1|0z            g          d=          }5| j                             tC          j"        d>|/j        d         |5j        g|4gd?|3z             | j                   |4|j        d<   | j#        $                    |/|
|g           | j#        $                    |           | j#        $                    | j%        s|$n	|$d d*                    | j#        $                    | j%        s|(n	|(d d*                    | j#        $                    | j%        s|n	|d d*                    d | _&        d S d S d S d S )@Nr_   Addr   )r_  r  r   r   r  )NNr   r   r   )r_  Einsumr   r  )r9   Nr   r   r9   Mulrt      r^   rs   c                     g | ]	}|j         
S r[   )r   )r   childs     r   r   z(FusionAttention.fuse.<locals>.<listcomp>  s    >>>E%->>>r!   r  ru   )r   r   r_  r  )r9   r   r   Nz&fuse_attention: failed to match v pathF)Softmaxr_  Divr  )r   r   Nr   )re  r_  ra  r  )re  Wherer  rf  )r   r   rt   r   )re  r_  rg  r  )r   r   r   rt   )re  rf  r  )r   r   r   )path1path2path3path4path5rj  Trk  rl  z'fuse_attention: failed to match qk path)r   r   r   N)rf  r   r   r_  r  )r   r   r   r   Nz&fuse_attention: failed to match q pathr   )r   r   r   r_  r  )r9   r   r   r   Nz&fuse_attention: failed to match k path)Expandr   Equal)rp  r   r   )Castro  r   rp  )r   r   r   r   )rq  rp  r   r   z4fuse_attention: failed to verify shape inference of )ra  Subrq  r   r   )Nr   r9   r   r   )ra  rr  r   r   )Nr   r9   r   z)fuse_attention: failed to match mask pathizmFailed to detect num_heads and hidden_size for Attention fusion. Please specify those parameters in argument.edge_modified_shape_modified_tensorr<   r   reshape_modified_)'r   r   match_parentmatch_parent_path	enumerater.   r   r   r*   countr   r   itemsmatch_parent_pathsr   get_constant_inputrj   rb   rU   r   r   r]  r   r   r   r7   rK   r   rM   rx   r   intrN   r   rF   nodes_to_removerH   rc   prune_graph)6r   normalize_nodeinput_name_to_nodesoutput_name_to_node
start_nodeadd_before_layernormr  einsum_noder   reshape_qkvtranspose_qkv
matmul_qkvother_inputs_ir.   
root_inputmul_before_layernormmul_childrenlayernorm_nodechildrenrd  parent_nodechildren_typesv_nodesadd_vmatmul_v
is_distillis_distill_addis_no_mask_attentionqk_pathsqk_nodeskvr   	matmul_qkwhere_qkq_nodesr}   add_qmatmul_qk_nodesadd_kmatmul_k
mask_nodesr@  mul_valr&   attention_last_nodeq_num_headsq_hidden_sizenew_nodeunique_indexnew_edgeshape_tensors6                                                         r   fusezFusionAttention.fuse  s
    $
!%999#':#:#:>5RS#T#T #/1

 J00???!!!
 
	
  =F:Q;zz 
44DDDooo I $>G;K

":#344 	' 	'IB///	!+A...&&&&|!!F!!_
	  $z66z5!LL+./C/J1/MNL'C,=,=,B,B!-a!)-AAA!/!6q!9JJF)c,.?.?1.D.D18;

#';;;*:6H! 1 1=$888!&aJ	 **5":::s;CU?V?VZ[?[?[$+A.J&z2>>X>>>))Q..F*..z;d;d;dfufufuvv?LLABBBF")Auh
$999???K999???K;;;\\\J;;;\\\J222III>
 
 NN$$ 
	 
	DAqz33J!adKKHG||!
G||!%G||'+$LLBCCCF	 	1*2'Q)QQ 	1/7,Q))! 	1 (Q99(0%Q9*..y:c:c:cetetetuu?j22@@@""" G
 EFFFBK	2;*..y:c:c:cetetetuu?j22FFF""" G
 EFFF2; 

 %	#z<<333YYY?888)))D;;;\\\J
 $   Az11  	#z<<@@@,,,O888)))D $   Az1 !!0088
%LL!`X^!`!`aaaF! 	#z<< IHH*** >==O $
  
 Az1 $ 	
(:LLDEEEF# 	1J!(;(;
1@UY^@^@^66z!}EEJAw&  )0&>!
**x~a/@J/N/NS[SabcSdhrSrSrZnx,99*R.:Nq:QRRRtxJ1<1D++-)-)K)KI)V)V&Ka=A#5#5C    11#*1- H $$X...:>:ND(7&*03+l:#330<?)/1ac-+:U6V6V"WXX  4     
##$!,3A68IJ!
+l:	  (   (0!!$ '')<mZ(XYYY ''111  ''t7T(fZabecebeZfggg ''t7T(fZabecebeZfggg ''t7T(fZabecebeZfggg  $DG +*/N/NSrSrr!   )r.  r.  r.  r.  r.  r.  F)r.  r.  r.  r.  r.  NF)rV   rW   rX   rY   r   r}  r   r   boolr   rZ   r    r   r   r|   r   r   r   r   r   r   r   r   r   r-  r?  rI  r]  r  __classcell__)rp   s   @r   r]   r]   h   s         37).27&>@T%U% %% % 	%
 !/% #'% ,0% c% % % % % %40	 0eTWY\T\o 0 0 0 0>'&y '&U3PS8_ '& '& '& '&RY    * S        25 5S 5S 5 5 5 5n/$ /$c /$sCj /$ /$ /$ /$b+Ks +KC +K# +K +K +K +KZ%23 %2 %2 %2 %2 %2N Y_% Y_%	
  
y$	   <M,M, M, 	M,
 M, Y_%M, Y_%M, M, 
y$	M, M, M, M,t !# #m mm 	3,-m 	3,-	m
 m Y_%m Y_%m m m m m m m m m  !m" #m$ 
y$	%m m m mx !%'c cc c 	c
 c c c c c c c c c c c  !c" #c$ %c& 'c( 
y$	)c c c cJY$ Y$ Y$ Y$ Y$ Y$ Y$r!   r]   )loggingr   typingr   r   r   r   numpyrx   fusion_baser   fusion_optionsr	   fusion_utilsr
   r   onnxr   r   r   r   
onnx_modelr   rV   r   r   r]   r[   r!   r   <module>r     s9  
       / / / / / / / / / / / /           . . . . . . 1 1 1 1 1 1 1 1 = = = = = = = = = = = =            	8		S S S S S S S Slk$ k$ k$ k$ k$f k$ k$ k$ k$ k$r!   