
     Nge                        d dl Z d dlmZmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZmZmZ d dlmZ  e j        e          Z G d de          Z G d	 d
e          ZdS )    N)OptionalUnion)FusionAttention)Fusion)FunctionProto	NodeProtoTensorProtohelpernumpy_helper)	OnnxModelc                        e Zd ZdZdededef fdZ	 	 	 	 	 	 	 dded	ed
ededededededededede	e
         deedf         fdZd Zd Zd Z xZS )FusionRotaryAttentionze
    Fuse Attention subgraph with rotary positional embeddings into one MultiHeadAttention node.
    modelhidden_size	num_headsc                 X    t                                          |||dg d           d S )NT)SimplifiedLayerNormalization SkipSimplifiedLayerNormalizationLayerNormalizationSkipLayerNormalizationAdd)use_multi_head_attentionsearch_op_types)super__init__)selfr   r   r   	__class__s       l/var/www/html/ai-engine/env/lib/python3.11/site-packages/onnxruntime/transformers/fusion_rotary_attention.pyr   zFusionRotaryAttention.__init__   sK     	%)   	 	
 	
 	
 	
 	
     Ninputoutputq_rotaryk_rotaryv_matmul	attn_maskadd_qkpast_kpast_v	present_k	present_vscalereturnc                 6   | j         dk    sJ | j        dk    r?| j        | j         z  dk    r,t                              d| j         d| j                     d S | j                            d          }|j        d         |j        d         |j        d         d||||	g}|g}|
r|r|                    |
|g           t          j	        d|||          }d|_
        |j                            t          j        d| j                   g           |.|j                            t          j        d	|          g           | j        @|j                            t          j        d
t          | j                            g           |                     d           |S )Nr   z)fuse_rotary_attention: input hidden size z# is not a multiple of num of heads MultiHeadAttentionr    inputsoutputsnamecom.microsoftr   r,   mask_filter_value)r   r   loggerdebugr   create_node_namer"   extendr
   	make_nodedomain	attributemake_attributer5   floatincrease_counter)r   r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   mha_node_name
mha_inputsmha_outputsmha_nodes                    r   create_mha_nodez%FusionRotaryAttention.create_mha_node)   s    ~!!!!aT%5%F1$L$LLL BD<L  B  Bquq  B  B   4
334HIIOAOAOA	

 h 	7 	7	95666# 	
 
 
 *!!6#8dn#U#U"VWWW%%v'<We'L'L&MNNN!-%%v'<=PRWX\XnRoRo'p'p&qrrr2333r   c	                 r
   | j                             |dgdg          }	| j                             |dgdg          }
|	|
dS |	d         |
d         }}| j                             |g dg d          }| j                             |g dg d          }| j                             |g dg d          }| j                             |g dg d          }||||dS |\  }}}|\  }}}|j        d         |k    s|j        d         |k    rdS |d         j        |j        k    s|d         j        |j        k    rdS | j                             |dgdg          }| j                             |dgdg          }||dS |d         |d         }}| j                             |g d	g d
          }| j                             |g dg d          }| j                             |g dg d          }| j                             |g dg d          }||||dS |d         j        |j        k    sB|d         j        |j        k    s,|d         j        |j        k    s|d         j        |j        k    rdS | j                             |dgdg          }|dS |d         }| j                             |g d	g d
          } | j                             |g dg d          }!| |!dS | d         j        |j        k    s|!d         j        |j        k    rdS | j                             |dgdg          }"|"dS |"d         }#| j                             |#g d	g d
          }$| j                             |#g dg d          }%|$|%dS |$d         j        |j        k    s|%d         j        |j        k    rdS |$d         }&| d         }'|d         }(|j        d         })|&j        d         |)k    s"|'j        d         |)k    s|(j        d         |)k    rdS | j                             |g dg d          }*| j                             |g dg d          }+|*|*\  }},}-n|+|+\  }}},}-ndS |-j        d         dvrdS | j                             |,g dg d          }.| j                             |-g dg d          }/| j                             |-dgdg          }0|.|/|0dS |.d         j        |/d         j        k    s|.d         j        |/d         j        k    rdS |/d         j        d         |0d         j        d         k    rdS dS )NConcat   Fr   	UnsqueezeGatherShaper   r   r   rG   r   r   )   r   r   )rI   MulrJ   rK   r   r   r   r   )rI   r   rJ   rK   rG   r   r   r   rN   )rN   r   r   r   rF   SlicerS   CastrF   rS   rS   >   r&   attention_mask)rN   r   rG   r   rI   T)r   match_parent_pathr!   r3   r"   )1r   reshape_qkv_2reshape_qkv_1reshape_q_2reshape_k_2reshape_v_2reshape_v_1r'   
root_inputconcat_qkv_2_pathconcat_qkv_1_pathconcat_qkv_2concat_qkv_1reshape_qkv_2_path_1reshape_qkv_2_path_2reshape_qkv_1_path_1reshape_qkv_1_path_2_gather_1shape_1gather_2shape_2concat_v_2_pathconcat_v_1_path
concat_v_2
concat_v_1reshape_v_2_path_1reshape_v_2_path_2reshape_v_1_path_1reshape_v_1_path_2concat_k_2_path
concat_k_2reshape_k_2_path_1reshape_k_2_path_2concat_q_2_path
concat_q_2reshape_q_2_path_1reshape_q_2_path_2mul_qmul_kmul_vgather_1_outattn_mask_path_1attn_mask_path_2
slice_qk_2
slice_qk_1slice_qk_2_pathslice_qk_1_path_1slice_qk_1_path_2s1                                                    r   &check_runtime_shape_paths_for_functionz<FusionRotaryAttention.check_runtime_shape_paths_for_functiona   s    !J88
UVTWXX J88
UVTWXX$(9(A5%6q%9;LQ;Ol#z;;LJjJjJjlululuvv#z;;LJjJjJjlululuvv#z;;LJjJjJjlululuvv#z;;LJjJjJjlululuvv (#+#+#+538W38W =z))W]1-=-K-K5  "'8=88<PQR<S<X\d\i<i<i5 *66{XJQRPSTT*66{XJQRPSTT"o&=5!0!3_Q5GJ
!Z99???
 
 "Z99???
 
 "Z99*FfFfFfhqhqhqrr!Z99*FfFfFfhqhqhqrr&!)!)!)5 q!&(-77!!$)X]::!!$)X]::!!$)X]::5 *66{XJQRPSTT"5$Q'
!Z99???
 
 "Z99???
 
 %);)C5 a %66:LQ:O:TX`Xe:e:e5 *66{XJQRPSTT"5$Q'
!Z99???
 
 "Z99*FfFfFfhqhqhqrr%);)C5 a %66:LQ:O:TX`Xe:e:e5 #1%"1%"1%q);q>\))U[^|-K-Ku{[\~amOmOm5  :77@\@\@\^g^g^ghh:77@d@d@dfrfrfrss'(8%Az::)+;(Aq*jj5A&EEE5*66???
 
 !J88???
 
 !J88k]UVTWXX"&7&?CTC\5 1"&7&:&????STCUCZ^opq^r^wCwCw5 Q%a(,=a,@,Fq,III5tr   c                    | j                             |dgdg          }|dS |d         }| j                             |g dg d          }| j                             |g dg d          }	||	dS |\  }
}}|	\  }
}}|j        d         |k    s|j        d         |k    rdS | j                             |dgdg          }|dS |d         }| j                             |g dg d          }| j                             |g dg d          }||dS |d         j        |j        k    s|d         j        |j        k    rdS | j                             |dgdg          }|dS |d         }| j                             |g dg d          }| j                             |g dg d          }||dS |d         j        |j        k    s|d         j        |j        k    rdS | j                             |dgdg          }|dS |d         }| j                             |g dg d          }| j                             |g dg d          }||dS |d         j        |j        k    s|d         j        |j        k    rdS dS )	NrF   rG   Fr   rH   rL   rM   T)r   rW   r!   r3   )r   reshape_qkv	reshape_q	reshape_k	reshape_vr^   concat_qkv_path
concat_qkvreshape_qkv_path_1reshape_qkv_path_2rg   rh   ri   rj   rk   concat_v_pathconcat_vreshape_v_path_1reshape_v_path_2concat_k_pathconcat_kreshape_k_path_1reshape_k_path_2concat_q_pathconcat_qreshape_q_path_1reshape_q_path_2s                              r   #check_runtime_shape_paths_for_nodesz9FusionRotaryAttention.check_runtime_shape_paths_for_nodes   s8    *66{XJQRPSTT"5$Q'
!Z99*FfFfFfhqhqhqrr!Z99*FfFfFfhqhqhqrr%);)C518W18W =z))W]1-=-K-K5 
44Y
QCPP 5 #:77BbBbBbdmdmdmnn:77BbBbBbdmdmdmnn#'7'?5 A#x}448H8K8PT\Ta8a8a5 
44Y
QCPP 5 #:77BbBbBbdmdmdmnn:77BbBbBbdmdmdmnn#'7'?5 A#x}448H8K8PT\Ta8a8a5 
44Y
QCPP 5 #:77BbBbBbdmdmdmnn:77BbBbBbdmdmdmnn#'7'?5 A#x}448H8K8PT\Ta8a8a5tr   c                 p    |j         dvrd S d } j                            |g dg d          } j                            |g dg d          } j                            |g dg d          }||\  }}	}}
}|}n5|
|\  }}}}|}n)||\  }}}}}|}nt                              d           d S d\  }}}d }d } j                            |g d	g d
          } j                            |g dg d          } j                            |g dg d          } j                            |g dg dfg dg dfg dg dfg dg dfg dg dfg dg dfg dg dfg dg dfg dg dfg	d           \  }}} j                            |g d g d!          }|}|\  }}}}}}|} j                            |d"d#gd$d%g          }|t                              d&           d S |d$         j        d$         }|d'         j        d$         }|j        d$         }n|$|\  }}}}|}|j        d$         }|j        d$         }n||\  }}}|}|j        d$         }n|Et          |          d(k    r2|d$         d)d          \  }}}}|}|j        d$         }|j        d$         }nE|'|\  }}}}}|}|}|j        d$         }|j        d$         }nt                              d*           d S  j                            |g d+g d,          }d-\  }} ||\  }}}} nt                              d.           d S d/\  }!}" j                            |g d0g d          }# j                            |g d1g d          }$ j                            |g d2g d3          }% j                            |g d4g d5          }& j                            |g d6g d7          }' j                            |g d8g d3          }( j                            |g d9g d:          })|#|#\  }}*}+|*j        d$         }!n|$|$\  }}}*}+|*j        d$         }!n|%' 	                    |%d$         j        d$                   }"n|&' 	                    |&d$         j        d$                   }"nq|'|'d$         j        d$         }"n[|(|(d$         j        d$         }"nE|)' 	                    |)d$         j        d$                   }"nt                              d;           d S d/\  },}-d }.d }/d }0 j                            | g d<g d
          }1 j                            | g d=g d          }2 j                            | g d>g d?          }3 j                            | g d@g d:fg dAg dBfg dCg dDfg dEg dFfg dGg dHfg dIg dJfg dKg dLfg dIg dMfg dIg dNfg	d           \  }}4} j                            | g dOg dP          }5|1|1\  }6}}7}}8}9|1}. j                            |7d"d#gd$d%g          }:|:t                              dQ           d S |:d$         j        d$         },|:d'         j        d$         };|7j        d$         }-||;k    sJ n|2|2\  }}8}}<}9|2}.|8j        d$         }-n|3&|3\  }}7}8}}<}9|3}.|7j        d$         },|7j        d$         }-n|4Vt          |4          d(k    rC|4d$         dRd          \  }<}9|4d$         dSdT         \  }7}8|4}.|7j        d$         },|7j        d$         }-nG|5)|5\	  }}7}0}8}/}}<}}9|5}.|7j        d$         },|7j        d$         }-nt                              dU           d S d }=d }>d }? j                            | g dVg d,          }@ j                            | g dWg d,          }A j                            | g dXg dY          }B|@
|@\  }C}}D}E|@}=n7|A
|A\  }D}}F}E|A}=n+|B|B\  }?}D}>}}F}}E|B}=nt                              dZ           d S |Ej        d$         |9j        d$         k    r8|9j        d$         |j        d$         k    rt                              d[           d S d\}G||k    rQ 
                    |	|
|C|6||||Ej        d$                   st                              d]           d S |	j        d$         }Gn|||fv r                     ||F|<||Ej        d$                   st                              d]           d S |j        d$         }G|>r|>j        d$         n|Ej        d$         |Dj        d$<   |/r|/j        d$         n|9j        d$         |8j        d$<   |?|8j        d^z   |8j        d$<   ||k    r
|d_d          } fd`}H|?r|0r j                            da          }I|Id^z   }Jt          j        da|0j        d$         g|Jg|Ib          }K|Kj                            t          j        dcg dd          g            j                            da          }L|Ld^z   }Mt          j        da|?j        d$         g|Mg|Lb          }N|Nj                            t          j        dcg dd          g            |H|<          }O|Ot                              de           d S  j                            dfdgh          }Pt          j        df|Kj        d$         |Oj        d$         g|Pd^z   g|Pb          }Q j                            dfdih          }Rt          j        df|Nj        d$         |Oj        d$         g|Rd^z   g|Rb          }S|Q}8|S}D j                            |O            j                            |K            j                            |N            j                            |Q            j                            |S            j         j        |Oj        <    j         j        |Kj        <    j         j        |Nj        <    j         j        |Qj        <    j         j        |Sj        <                        |Ej        d$         |G|D|8||!|"|,||-|          }T|Tt                              dj           d S  j                            |T            j         j        |Tj        <    j                            |d_d                     ||k    r/ j                            |
|d d'         n	|d dR                    n*|d$         d'         g}U|D ]}V                     |V|U            j                            |           |.|1k    r$ j                            |.d dR                    ns|.|2k    rb j                            |.d$                     j                            |.d%                     j                            |.dk                    n|.|3k    r j                            |.d$                     j                            |.d_                     j                            |.dk                     j                            |.dl                    n|.|5k    rA j                            |.d$                     j                            |.d_                    n=|.|4k    r7|.d$         d'         |.d$         d)         g}U|.D ]}V                     |V|U           |=|@k    r# j                            |=d dR                    nF|=|Ak    r@ j                            |=d_                     j                            |=d%                    dm _        d S )nN>   r   r   r   )MatMulReshape	Transposer   r   rG   r   r   r   r   )r   r   r   r   rQ   )	AllReducer   r   r   r   z0fuse_rotary_attention: failed to match qkv nodes)r    r    r    )r   r   rF   r   r   r   )rG   r   r   rG   r   r   )rF   r   r   r   )rG   rG   r   r   )r   r   r   rM   )r   ExpandrI   rF   r   r   r   )rG   r   r   r   rG   r   r   )r   r   WhereEqualr   rF   rI   rJ   rK   rF   r   r   r   )rG   r   rG   r   r   r   r   r   r   r   rG   r   r   )r   r   r   r   rO   ConstantOfShaperK   r   rF   rI   rJ   rK   rF   r   r   r   )rG   r   rG   r   rG   r   r   r   r   rG   r   r   r   rG   r   r   )r   r   r   r   rK   r   rF   rI   rJ   rK   rF   r   r   r   )rG   r   rG   rG   r   r   r      r   r   r   rG   r   r   )r   r   r   r   rF   rI   rJ   rK   rF   r   r   r   )rG   r   rG   rN   r      r   r   r   rG   r   r   )	r   rF   rI   rJ   rK   rF   r   r   r   )	rG   rG   r   r   r   r   rG   r   r   )
r   rF   rI   rO   rJ   rK   rF   r   r   r   )
rG   rG   rG   r   r   r   r   rG   r   r   )	rG   rG   rN   r   r   r   rG   r   r   )	rG   rG   r   r   r   r   rG   r   r   )output_name_to_node)rF   r   r   r   r   )rG   rG   r   r   rG   rS   rI   r   rN   zDfuse_rotary_attention: failed to match past/present concat in v path	   z-fuse_rotary_attention: failed to match v path)Softmaxr   Divr   rP   NNz/fuse_rotary_attention: failed to match qk nodes)r    r    rR   rT   )r   r   SubrU   r   rI   rI   )rG   r   rN   rG   r   r   r   )r   r   rU   r   rI   rI   )rG   rN   rG   r   r   r   )r   r   r   r   rU   r   rI   rI   )rG   r   r   rN   rG   r   r   r   )r   r   r   rU   r   rI   rI   )	r   rU   r   rU   r   rU   r   rI   rI   )	rG   r   r   r   r   rG   r   r   r   z;fuse_rotary_attention: failed to match attention mask nodes)r   r   rF   r   RotaryEmbeddingr   )r   r   r   r   r   )r   rF   r   r   r   r   )rG   r   rG   r   r   r   )	r   r   r   rI   rF   r   r   r   r   )r   r   r   r   r   r   rF   rI   rJ   rK   rF   r   r   r   r   )rG   r   r   rG   r   r   r   r   r   r   r   rG   r   r   r   )r   r   r   r   r   rO   r   rK   r   rF   rI   rJ   rK   rF   r   r   r   r   )rG   r   r   rG   r   rG   r   r   r   r   rG   r   r   r   rG   r   r   r   )r   r   r   r   r   rK   r   rF   rI   rJ   rK   rF   r   r   r   r   )rG   r   r   rG   rG   r   r   r   r   r   r   r   rG   r   r   r   )r   r   r   r   r   rF   rI   rJ   rK   rF   r   r   r   r   )rG   r   r   rG   rN   r   r   r   r   r   rG   r   r   r   )r   r   rF   rI   rJ   rK   rF   r   r   r   r   )rG   r   rG   r   r   r   r   rG   r   r   r   )r   r   rF   rI   rO   rJ   rK   rF   r   r   r   r   )rG   r   rG   rG   r   r   r   r   rG   r   r   r   )rG   r   rG   rN   r   r   r   rG   r   r   r   )rG   r   rG   r   r   r   r   rG   r   r   r   )	r   rF   rF   r   rS   r   r   r   r   )	rG   r   rG   r   r   r   r   r   rG   zDfuse_rotary_attention: failed to match past/present concat in k pathz.fuse_rotary_attention: failed to match k nodes)r   r   r   r   )r   r   r   r   )rF   r   rS   r   r   r   r   )r   r   r   r   r   r   rG   z.fuse_rotary_attention: failed to match q nodeszKfuse_rotary_attention: failed to find the same root_input for q, k, v pathsr    z;fuse_rotary_attention: failed to verify runtime shape paths	_output_0rG   c                 0   
j                             | dd          }|t                              d           dS 
j                             |j        d                   }
j                             |j        d                   }||t                              d           dS |d         }|d         }||z  }
j                             d	d
          }
j                             |          &
                    |t          j
        dg|gd           
j                             dd          }t          j        d|j        d         |j        d         |g|dz   g|          }	|	j                            t          j        dd          g           |	S )zDetect num_heads and hidden_size for ONNX model from phi-2
            Args:
                reshape_q (NodeProto): reshape node for q
            Returns:
                hidden_size_concat_node(NodeProto): Concat node to be used by reshape
            rF   rG   NzEfuse_rotary_attention: failed to trace the concat node from reshape_qrN   r   zMfuse_rotary_attention: failed to get constant nodes of num_heads or head_sizer   Initializerr   name_prefixF)r3   	data_typedimsvalsrawhidden_size_concatoutput_0r0   axis)r   match_parentr6   r7   get_constant_valuer!   r8   get_initializeradd_initializerr	   INT64r
   r:   r<   r9   r=   )r   concatnum_head_constant_nodehead_size_constant_nodenum_head_valuehead_size_valuer   hidden_size_initilizerhidden_size_reshape_node_namehidden_size_concat_noder   s             r   create_hidden_size_concat_nodezBFusionRotaryAttention.fuse.<locals>.create_hidden_size_concat_node  s    Z,,Y!DDF~deeet &*Z%B%B6<PQ?%S%S"&*j&C&CFLQRO&T&T#%-1H1Plmmmt3A6N5a8O(?:K%)Z%@%@\i%@%j%j"z))*@AAI$$/)/% %    -1J,G,G^r,G,s,s)&,&6LOLO*
 7CD2	' 	' 	'# $-44f6KFTU6V6V5WXXX**r   r   r0   perm)r   rN   rG   r   z?fuse_rotary_attention: failed to create hidden_size_concat_noder   concat_k_halfr   concat_q_halfzSfuse_rotary_attention: failed to create multi-head attention with rotary embeddingsr   r   T)op_typer   rW   r6   r7   match_parent_paths_allr!   r"   lenreshape_add_qkr   r   r3   r8   r
   r:   r<   r9   r=   nodes_to_addappendthis_graph_namenode_name_to_graph_namerD   nodes_to_remove&add_nodes_to_remove_with_nodes_to_keepprune_graph)Wr   normalize_nodeinput_name_to_nodesr   	qkv_nodesqkv_nodes_1qkv_nodes_2qkv_nodes_3rg   rX   rY   
matmul_qkvr   r)   r+   past_seq_lenv_nodesadd_v	v_nodes_1	v_nodes_2	v_nodes_3	v_nodes_4	v_nodes_5r\   r   r]   matmul_vr   transpose_vr   qk_nodesr'   	matmul_qkr&   
add_qk_strattn_mask_nodes_1attn_mask_nodes_2attn_mask_nodes_3attn_mask_nodes_4attn_mask_nodes_5attn_mask_nodes_6attn_mask_nodes_7slice_mask_1slice_mask_2r(   r*   k_nodesslice_kr   	k_nodes_1	k_nodes_2	k_nodes_3	k_nodes_4	k_nodes_5r[   r   rotary_kmatmul_kr   shared_past_seq_lenr   q_nodesslice_qr   	q_nodes_1	q_nodes_2	q_nodes_3rZ   rotary_qmatmul_qr   root_outputr   k_transpose_node_namek_tranpose_output_namek_transpose_nodeq_transpose_node_nameq_tranpose_output_nameq_transpose_noder   concat_k_reshape_node_nameconcat_k_reshape_nodeconcat_q_reshape_node_nameconcat_q_reshape_nodenew_nodenodes_to_keep	temp_pathsW   `                                                                                      r   fusezFusionRotaryAttention.fuseF  sF   !)nnnF
 	j22CCCOO
 

 j22888LL
 

 j22EEEOO
 

 "=H:A}a
#II$,7)A{Az#II$/:,Aq+q*#IILLKLLLF +5'	<J00PPP
 
	
 J00888LL
 
	
 J00...II
 
	
 *;; cbb)))
   <;;!&  $ EDD',    ?>>#(   988$ vuu///
   322  vuu///
 vuu///OkX !%] < o
 o
9a` J00???OO
 
	
  AJ>KHahG J88+&A M
 $cddd"1%+A.F(,215L *II"9B6Hk9hG^A&F *II"/8,KHG#*1-II"s9~~':':9B1bcc9J6Hk9hG^A&F *II"@I=Hk9eXHG^A&F *IILLHIIIF://///LL
 

 '	&.#Avq))LLJKKKF !'	: J88(((II
 

 !J88000LL
 

 !J88OOO!!!
 

 !J88HHH
 

 !J88YYY$$$
 

 !J88RRR!!!
 

 !J88aaa'''
 

 (,=)A|\$+A.II*/@,Aq,$+A.II*,,->q-A-H-KLLJJ*,,->q-A-H-KLLJJ**1-4Q7JJ**1-4Q7JJ*,,->q-A-H-KLLJJLLVWWWF
 #	J00XXX
 
	
 J00NNNOO
 
	
 J00XXX
 
	
 *;;
 
 
 0//  " BAA%*  ( KJJ+0  $ EDD',    ?>>#(   655"   988$   655"   655gcH !%M < g
 g
9aP J00rrr'''
 
	
  >G;KHa8G J88+&A M
 $cddd"1%+A.F"/"3"9!"< *I#666666"2;/AxIxG *II"<E9Ax1iG^A&F *II"s9~~':':"+A,rss"3Ix!*1be!4HhG^A&F *II"W`TAx'1iHG^A&F *IILLIJJJF
 J00AAALL
 
	
 J00AAALL
 
	
 J00[[[!!!
 
	
  1:.KHhGG"/8,HaHGG"JSGM8WaAxGGLLIJJJF>!q 111hnQ6G8>Z[K\6\6\LLfgggF##>>q!	 	  Z[[['.q1KK;444;;q!   Z[[[%,Q/K
 6= Tq 1 1(/RSBTHN15< Tq 1 1(/RSBTHN1 $%-][%@"K''%abbM	2	+ 2	+ 2	+ 2	+ 2	+j  =	\] =	\$(J$?$?$L$L!%:[%H"%/%,Q/0/0*	      &--v/DV\\\/Z/Z.[\\\ %)J$?$?$L$L!%:[%H"%/%,Q/0/0*	      &--v/DV\\\/Z/Z.[\\\&D&DY&O&O#&.^___ *.)D)DY\k)D)l)l&$*$4(/24K4RST4UV3kAB/	% % %! *.)D)DY\k)D)l)l&$*$4(/24K4RST4UV3kAB/	% % %! -H,H$$%<===$$%5666$$%5666$$%:;;;$$%:;;;IMI]D()@)EFBFBVD()9)>?BFBVD()9)>?GKG[D()>)CDGKG[D()>)CD''N1
 
 LLnoooF  ***6:6J$X]3##IabbM222i ''7SVTVSV<XXXX$QZ^,M$ V V	;;I}UUUU##H---i ''5555	!! ''
333 ''
333 ''
3333	!! ''
333 ''
333 ''
333 ''
3333	!! ''
333 ''
3333	!!$QZ^WQZ^<M$ V V	;;I}UUUUi ''5555	!! ''
333 ''
333r   )r    r    r    r    r    r    N)__name__
__module____qualname____doc__r   intr   strr   r   r>   r   rD   r   r   r  __classcell__r   s   @r   r   r      s]        

 
 	
 
 
 
 
 
6 !%6 66 6 	6
 6 6 6 6 6 6 6 6 6 
y$	6 6 6 6pZ Z ZxG G GRK  K  K  K  K  K  K r   r   c            
       b     e Zd Zdef fdZdedefdZdefdZde	d	e	d
e	de	de	f
dZ
d Z xZS )FusionRotaryEmbeddingsr   c                     d| _         t                                          || j         | j         | j         dz   dg           d S )Nr   z.1r   )	base_namer   r   )r   r   r   s     r   r   zFusionRotaryEmbeddings.__init__U  s@    *RVAVX]0^_____r   rot_emb_nodefunctionc                    g g }}|j         D ]}|j        dk    r|j        g k    r{|j        d         |j        v rg|                    |           t          |j                                      |j        d                   }|                    |j        |                    g }|D ]g}|j        d         j        }	| j	        
                    d          |	_        | j	                            |	           |                    |	j                   ht          ||          D ]T\  }
t          t          fd| j	        j	        j        j                             }|D ]}t!          j        ||
           U|S )NConstantr   c                     | j         v S N)r!   )entryextra_outputs    r   <lambda>z?FusionRotaryEmbeddings.reassign_extra_outputs.<locals>.<lambda>o  s    8S r   )noder   r!   r"   r   listindexr<   tr   r8   r3   r   zipfiltergraphr   replace_node_input)r   r'  r(  extra_constantsextra_outputsfn_nodeoutput_indexextra_initializersextra_constantconstant_tensorprotoextra_initializernodes_to_updatenode_to_updater.  s                @r   reassign_extra_outputsz-FusionRotaryEmbeddings.reassign_extra_outputs\  s   )+R} 	H 	HG*,,"1D1DXYIZ^f^mImIm&&w///#HO44::7>!;LMM$$\%8%FGGG  - 	A 	AN#1#;A#>#@ (,
(C(CJ(O(O %J&&';<<<%%&:&?@@@@ 03=BT/U/U 	^ 	^+L+"6*S*S*S*SUYU_UeUkUp#q#qrrO"1 ^ ^,^\K\]]]]^ r   r0  c                    | j                             | j                  }| j                             ddgddg          }||\  }}nt                              d           d S |j        d         j        d         g}t          t          fd| j         j         j
        j                            }t          t          fd| j         j         j
        j                            }d\  }	}
t          |          dk    rt          |          dk    r| j                             |	          | j                             |
          t          j        |d         j        d         j                                                  }t          j        |d         j        d         j                                                  }t'          j        |	t*          j        t          |j                  |                                                                	          }| j                             || j                   t'          j        |
t*          j        t          |j                  |                                                                	          }| j                             || j                   | j                            |d         |d         g           |                    |	|
g           j        }t          |          dk    rt          t          fd
| j         j         j                            }t          |          dk    sJ |                     |d                   t          t          fd|                    }t          |          dk    sJ t'          j         | j        |||d          }d|_!        | j        "                    |           |S )Nr   r   r   z.fuse_rotary_embeddings: failed to match MatMulrG   c                 <    | j         d         j        d         k    S )Nr   rN   r"   r!   constantr0  s    r   r/  zOFusionRotaryEmbeddings.create_rotary_embeddings_from_function.<locals>.<lambda>      hoa6HDJWXM6Y r   c                 <    | j         d         j        d         k    S )Nr   r   rE  rF  s    r   r/  zOFusionRotaryEmbeddings.create_rotary_embeddings_from_function.<locals>.<lambda>  rH  r   	cos_cache	sin_cacher3   r   r   r   c                 $    | j         j        k    S r,  )r3   r   )fnr0  s    r   r/  zOFusionRotaryEmbeddings.create_rotary_embeddings_from_function.<locals>.<lambda>  s    "'T\*A r   c                     | vS r,   )output_namer9  s    r   r/  zOFusionRotaryEmbeddings.create_rotary_embeddings_from_function.<locals>.<lambda>  s    TaAa r   r1   r2   r3   interleavedr4   )#r   r8   r&  rW   r6   r7   r"   r!   r1  r5  r6  r0  r   r   r   to_arrayr<   r3  squeezer
   make_tensorr	   FLOATshapeflattentolistr   r   r   r9   	functionsrB  r:   r;   r   )r   r0  rotary_emb_node_namematmul_pathreshape_nodematmul_noderotary_emb_inputscos_cache_nodesin_cache_nodecos_cache_namesin_cache_namerK  rL  cos_cache_tensorsin_cache_tensorrotary_emb_outputsfuncrotary_emb_noder9  s    `                @r   &create_rotary_embeddings_from_functionz=FusionRotaryEmbeddings.create_rotary_embeddings_from_functionu  s   #z::4>JJj22!F
 

 "(3%L++LLIJJJF q!JqM
 f%Y%Y%Y%Y[_[e[k[q[vwwxxf%Y%Y%Y%Y[_[e[k[q[vwwxx)A& 1$$N##q((
**>::B
**>::B$-nQ.?.I!.L.NOOWWYYI$-nQ.?.I!.L.NOOWWYYI%1#%+)/**&&((//11	      J&&'79MNNN%1#%+)/**&&((//11	      J&&'79MNNN ''):N1<M(NOOO  ..!ABBB![!""Q&&AAAA4:CSC]^^__Dt99>>>> 77d1gFFM!%f-a-a-a-acu&v&v!w!w)**a//// *N$&%
 
 
 "1##L111r   r^   position_ids	cos_slice	sin_slicer"   c                    | j                             | j                  }t          t	          fd| j         j         j        j                            }t          t	          fd| j         j         j        j                            }d\  }	}
t          |          dk    r	t          |          dk    r| j                             |	          | j                             |
          t          j
        |d         j        d         j                                                  }t          j
        |d         j        d         j                                                  }|j        d         }|d d d |dz  f         }|d d d |dz  f         }t          j        |	t"          j        t          |j                  |                                                                          }| j                             || j                   t          j        |
t"          j        t          |j                  |                                                                          }| j                             || j                   | j                            |d         |d         g           t          j        | j        |||	|
g|g|d          }d	|_        |S )
Nc                 &    | j         d         k    S Nr   r"   )rG  rm  s    r   r/  zLFusionRotaryEmbeddings.create_rotary_embeddings_from_nodes.<locals>.<lambda>      hoa6HI6U r   c                 &    | j         d         k    S rq  rr  )rG  rn  s    r   r/  zLFusionRotaryEmbeddings.create_rotary_embeddings_from_nodes.<locals>.<lambda>  rs  r   rJ  rG   r   rN   rM  rS  r4   )r   r8   r&  r1  r5  r6  r0  r   r   r   rU  r<   r3  rV  rY  r
   rW  r	   rX  rZ  r[  r   r   r   r9   r:   r;   )r   r^   rl  rm  rn  r"   r]  rb  rc  rd  re  rK  rL  	head_sizerf  rg  rj  s      ``            r   #create_rotary_embeddings_from_nodesz:FusionRotaryEmbeddings.create_rotary_embeddings_from_nodes  s     $z::4>JJ f%U%U%U%UW[WaWgWmWrssttf%U%U%U%UW[WaWgWmWrsstt)A& 1$$N##q((
**>::B
**>::B$-nQ.?.I!.L.NOOWWYYI$-nQ.?.I!.L.NOOWWYYI "*I!!!!%7	Q%7"78I!!!!%7	Q%7"78I%1#%+)/**&&((//11	      J&&'79MNNN%1#%+)/**&&((//11	      J&&'79MNNN ''):N1<M(NOOO *NnnMH%
 
 
 "1r   c                   % | j         |j        vr|j        dk    rd S d %|j        dk    rt          |j                  dvs|j        d         dvrt                              d           d S |                     |          %%t                              d           d S | j                            |           t          t          %fd| j        j        j        j                            }t          |          dk    sJ | j        j        j        j                            |d                    nz| j                            |g d	g d
          }| j                            |g dg d
          }|p|}| j                            |g dg d          }| j                            |g dg d          }	|p|	}
||
t                              d           d S | j                            |g dg d          }| j                            |g dg d          }|p|}| j                            |g dg d          }| j                            |g dg d          }|p|}||t                              d           d S |d         j        |d         j        k    sT|d         j        |
d         j        k    s8|d         j        |d         j        k    s|d         j        |
d         j        k    rt                              d           d S | j                            |ddgddg          }| j                            |ddgddg          }|p|}|t                              d           d S d\  }}}| j                            |g dg d          }| j                            |g d g d!          }| j                            |g d"g d#          }| j                            |g d$g d%          }||}|d&         j        d         }n||}|d'         j        d         }nr|)|}|d&         j        d         }|d(         j        d         }nG|)|}|d'         j        d         }|d(         j        d         }nt                              d)           d S d*\  }}| j                            |g dg d+          }| j                            |g d g d,          }| j                            |g d"g d-          }| j                            |g d$g d.          } ||}|d&         j        d         }n||}|d'         j        d         }nr|)|}|d&         j        d         }|d(         j        d         }nG| )| }|d'         j        d         }|d(         j        d         }nt                              d)           d S |d/k    r| j                            |d(         d0gdg          }!| j                            |d(         d0gdg          }"|!|"|!d         j        |"d         j        k    rt                              d1           d S |"d         j        d         }ng }!g }"d2\  }#}$||k    r||k    s||k    r\||k    rV|d3         j        |d3         j        k    s|d         j        |d         j        k    rt                              d4           d S n||k    r||k    s||k    r|| k    r|d         j        |d         j        k    rt                              d5           d S | j                            |d         d6d7gddg          }#| j                            |d         g d8g d9          }$|#>|$<| j                            |#d         j        d                   |$d         j        dk    rt                              d:           d S nt                              d;           |                     |d         j        d         ||||j        d                   %%t                              d           d S |                     |g           |                     |d d                    |                     |d d                    |                     |d d                    |                     |
d d                    |                     |d d                    |                     |           |                     |           |                     |!d d                    |                     |"d d                    |#Ft          | j                            |#d                             dk    r|                     |#           |$|                     |$d d                    |                     | j                    | j        | j        %j        <   | j                            %           d<| _        d S )=Nr   >   r      rG   >   pospos_idpos_idsposition_idrl  zLfuse_rotary_embeddings: failed to verify inputs for RotaryEmbedding functionz=fuse_rotary_embeddings: failed to create RotaryEmbedding nodec                 0    | j         j        d         k    S rq  )r3   r"   )r0  rj  s    r   r/  z-FusionRotaryEmbeddings.fuse.<locals>.<lambda>  s    DI1G1J$J r   r   )rO   rF   NegrS   r   r   )rO   rF   r~  rS   rS   )	rO   rF   r~  rS   rI   r   rJ   rK   r   )	rG   r   r   r   rG   r   r   r   r   )	rO   rF   r~  rS   rI   r   rJ   rK   rS   z9fuse_rotary_embeddings: failed to match x2 in rotate_half)rO   rF   rS   r   )rG   r   rG   r   )rO   rF   rS   rS   )rO   rF   rS   rI   r   rJ   rK   r   )rG   r   rG   rN   r   r   r   r   )rO   rF   rS   rI   r   rJ   rK   rS   z9fuse_rotary_embeddings: failed to match x1 in rotate_halfr   zCfuse_rotary_embeddings: failed to match common input in rotate_halfrO   r   rS   z8fuse_rotary_embeddings: failed to match x in rotate_half)Nr    r    )	rO   rI   rJ   Squeezer  rS   rI   rJ   rK   )	rG   rG   r   r   r   r   rN   r   r   )rO   rI   rJ   r  r  rS   rI   r   )rG   rG   r   r   r   r   rN   r   )rO   rI   rJ   rS   rI   rJ   rK   )rG   rG   r   r   rN   r   r   )rO   rI   rJ   rS   rI   r   )rG   rG   r   r   rN   r   r   r   rN   z>fuse_rotary_embeddings: failed to match sin path in apply_rope)Nr    )	r   rG   r   r   r   r   rN   r   r   )r   rG   r   r   r   r   rN   r   )r   rG   r   r   rN   r   r   )r   rG   r   r   rN   r   r    r   zGfuse_rotary_embeddings: failed to match position ids path in apply_roper   r   zdfuse_rotary_embeddings: failed to match common Gather node and Shape node in sin cache and cos cachezRfuse_rotary_embeddings: failed to match common Add node in sin cache and cos cacherJ   rK   )rJ   rK   r   rL   zKfuse_rotary_embeddings: failed to match past_seq_len and curr_seq_len pathsz:fuse_rotary_embeddings: failed to match common cache pathsT)r&  r   r   r!   r6   r7   rk  r   r   r1  r5  r   r6  
value_inforemoverW   r3   find_graph_inputrv  r"   add_nodes_to_removeget_childrenr?   r   r   r   r   )&r   r0  r   r   old_shape_inferrotate_half_x2_path_1_1rotate_half_x2_path_1_2rotate_half_x2_path_1rotate_half_x2_path_2_1rotate_half_x2_path_2_2rotate_half_x2_path_2rotate_half_x1_path_1_1rotate_half_x1_path_1_2rotate_half_x1_path_1rotate_half_x1_path_2_1rotate_half_x1_path_2_2rotate_half_x1_path_2x_path_1x_path_2x_pathsin_pathrL  rl  
sin_path_1
sin_path_2
sin_path_3
sin_path_4cos_pathrK  
cos_path_1
cos_path_2
cos_path_3
cos_path_4position_ids_from_sin_pathposition_ids_from_cos_pathpast_seq_len_pathcurr_seq_len_pathrj  s&                                        @r   r  zFusionRotaryEmbeddings.fuse  s   >--$,%2G2GF <5  4:f,,
1 F 1 1 klll"II$OOO&\]]]  ''--- #JJJJDJL\LbLmnn O ''1,,,,J"-44_Q5GHHHH( '+j&B&B>>>' '# '+j&B&B:::' '# %<$V?V!&*j&B&Beee+++' '# '+j&B&Baaa+++' '# %<$V?V!$,0E0MXYYY&*j&B&B777' '# '+j&B&B333' '# %<$V?V!&*j&B&B^^^(((' '# '+j&B&BZZZ(((' '# %<$V?V!$,0E0MXYYY &b).2G2K2PPP(,15J25N5SSS(,15J25N5SSS(,15J25N5SSSbccc z33$A H z33 A H )F~WXXX 1=-Hi55mmm+++ J
 55aaa((( J
 55WWW%%% J
 55KKK""" J
 %%$RL.q1		'%$RL.q1		'%$RL.q1	'{03'%$RL.q1	'{03]^^^ #+Hi55mmm+++ J
 55aaa((( J
 55WWW%%% J
 55KKK""" J
 %%$RL.q1		'%$RL.q1		'%$RL.q1	'{03'%$RL.q1	'{03]^^^ r!!-1Z-I-IQKKC. .*
 .2Z-I-IQKKC. .* /6191!49=WXY=Z=___LL!jkkkF9!<B1E-/*-/*3=00J&&8z+A+AJ&&8z+A+AB<$(999Xb\=NRZ[]R^Rc=c=cLL~   F	 >d
 j((X-C-CJ&&8z+A+AB<$(999LL!uvvvF$(J$@$@RLw'F% %! %)J$@$@RL444II% %! &-(0z223DR3H3Nq3QRRZ(,4CCLL!noooF D
 YZZZ"FF%b)03A O &\]]] $$dV,,,$$%:3B3%?@@@$$%:3B3%?@@@$$%:3B3%?@@@$$%:3B3%?@@@$$VCRC[111$$X...$$X...$$%?%DEEE$$%?%DEEE ,TZ5L5LM^_`Ma5b5b1c1cgh1h1h (():;;; ,(():3B3)?@@@dn---=A=Q$_%9:  111r   )r  r  r  r   r   r   r   rB  rk  r   rv  r  r!  r"  s   @r   r$  r$  T  s        `i ` ` ` ` ` `9     2H9 H H H HT66 6 	6
 6 6 6 6 6pA  A  A  A  A  A  A r   r$  )loggingtypingr   r   fusion_attentionr   fusion_baser   onnxr   r   r	   r
   r   
onnx_modelr   	getLoggerr  r6   r   r$  rQ  r   r   <module>r     s  
  " " " " " " " " , , , , , ,       L L L L L L L L L L L L L L            		8	$	$A  A  A  A  A O A  A  A H"d  d  d  d  d V d  d  d  d  d r   