
     NgU                         d dl mZ d dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ  ee          Z G d de          Z G d	 d
e          ZdS )    )	getLoggerN)Fusion)FusionUtils)helper)	OnnxModelc                   D     e Zd ZdZdedef fdZd Zd Zd Z	d Z
 xZS )	FusionGptAttentionPastBasez3Base class for GPT Attention Fusion with past statemodel	num_headsc                     t                                          |dddgd           || _        t          |          | _        i | _        d | _        d S )N	AttentionLayerNormalizationSkipLayerNormalizationz	with past)super__init__r   r   utilscasted_attention_maskmask_filter_valueselfr
   r   	__class__s      i/var/www/html/ai-engine/env/lib/python3.11/site-packages/onnxruntime/transformers/fusion_gpt_attention.pyr   z#FusionGptAttentionPastBase.__init__   sU    .BD\-]_jkkk" ''
%'"!%    c                    | j                             |d|          }||j        dk    rt                              d           d S | j                             |d          dk    rt                              d           d S |j        d         }| j                             |d|          }|r|j        dk    r|}nF| j                             |ddgddg          }|t                              d           d S |d         }| j                             |d          dk    rt                              d	           d S |j        d         }	||	k    rt                              d
           d S |S )Nr   Gatherz,match_past_pattern_1: expect Gather for past   z9match_past_pattern_1: expect indices=1 for Gather of past	Transposez7match_past_pattern_1: failed match Transpose and Gatherz;match_past_pattern_1: expect indices=0 for Gather k of pastz,match_past_pattern_1: expect past to be same)r
   
get_parentop_typeloggerdebugfind_constant_inputinputmatch_parent_path)
r   concat_kconcat_voutput_name_to_nodegatherpastparentgather_past_kpast_k_nodespast_ks
             r   match_past_pattern_1z/FusionGptAttentionPastBase.match_past_pattern_1   sr   & &&x4GHH>V^x77LLGHHH4:))&!4499LLTUUU4|A&&x4GHH 	-fn00"MM:77;PXBY\]_`[abbL#VWWWt(,M:))-;;q@@LLVWWW4$Q'6>>LLGHHH4r   c                 v   | j                             |d|          }||j        dk    rt                              d           d S | j                             |d|          }||j        dk    rt                              d           d S | j                                         }|dk     rht          j        |ddg          st                              d           d S t          j        |d	d
d
g          st                              d           d S ns| j        	                    |d
dg          st                              d           d S | j        	                    |d
d
d
g          st                              d           d S t          j        |ddd          st                              d           d S |j
        d         }| j                             |ddgddg          }|t                              d           d S |d         j
        d         }	||	k    rt                              d           d S |S )Nr   Squeezez:match_past_pattern_2: expect Squeeze as parent of concat_vSplitz0match_past_pattern_2: expect Split for past path   axesz:match_past_pattern_2: axes != [0] for Squeeze in past pathsplitr   z<match_past_pattern_2: split != [1, 1] for Split in past pathaxis)default_valuezKmatch_past_pattern_2: attribute axis of Split are not expected in past pathz7match_past_pattern_2: failed to match past_k_nodes pathr   z,match_past_pattern_2: expect past to be same)r
   r   r    r!   r"   get_opset_versionr   check_node_attributer   check_node_input_valuer$   r%   info)
r   r&   r'   r(   squeezer5   opset_versionr*   r-   r.   s
             r   match_past_pattern_2z/FusionGptAttentionPastBase.match_past_pattern_2K   s<   , *''!5HII?go::LLUVVV4
%%gq2EFF=EMW44LLKLLL4
446623GVaSII YZZZt3E7QFKK [\\\t :44Wa!EE YZZZt:44UA1vFF [\\\t/vqPQRRR 	LLfggg4{1~z33Hy'>RUVXYTZ[[LLRSSS4b!'*6>>KKFGGG4r   c                    | j                             |d|d          }|st                              d           d S | j                             |d|d          }|st                              d           d S |j        d         }|S )N	UnsqueezeF)	recursivezexpect unsqueeze for presentConcatzexpect concat for presentr   )r
   find_first_child_by_typer!   r;   output)r   r'   input_name_to_nodesunsqueeze_present_vconcat_presentpresents         r   match_presentz(FusionGptAttentionPastBase.match_present   s    "jAAk#6% B 
 
 # 	KK67774<<+>% = 
 
  	KK34444 '*r   c                    || j         v r| j         |         }ni| j                            |          r(| j                            |          \  }}|| j         |<   n'| j                            |          \  }}|| j         |<   |S N)r   r
   find_graph_inputr   cast_graph_input_to_int32cast_input_to_int32)r   
input_nameattention_mask_input_namecasted	cast_nodes        r   cast_attention_maskz.FusionGptAttentionPastBase.cast_attention_mask   s    333(,(B:(N%%Z((44 	O04
0T0TU_0`0`-F-5ND&z2237:3Q3QR\3]3]0%y5ND&z2((r   )__name__
__module____qualname____doc__r   intr   r/   r>   rI   rS   __classcell__r   s   @r   r	   r	      s        ==&i &C & & & & & &/ / /bA A AF  "	) 	) 	) 	) 	) 	) 	)r   r	   c                   8     e Zd ZdZdedef fdZd Zd Z xZ	S )FusionGptAttentionzP
    Fuse GPT-2 Attention with past state subgraph into one Attention node.
    r
   r   c                 L    t                                          ||           d S rK   )r   r   r   s      r   r   zFusionGptAttention.__init__   s#    	*****r   c
                    | j                             d          }
t          j        d|||||g|
dz   |g|
          }d|_        |j                            t          j        d| j                  t          j        d|	rdnd	          g           | j	        @|j                            t          j        d
t          | j	                            g           t          j        d|
dz   |j        d         g|
dz   g|
dz             }t          j        d|
dz   |j        d         g|g|
dz             }| j                            |||g           | j        | j        |j        <   | j        | j        |j        <   | j        | j        |j        <   d S )NGptAttentionr   _output)inputsoutputsnamezcom.microsoftr   unidirectionalr   r   r   MatMul_matmul_output_matmulAdd   _add)r
   create_node_namer   	make_nodedomain	attributeextendmake_attributer   r   floatr$   nodes_to_addthis_graph_namenode_name_to_graph_namerc   )r   	fc_weightfc_biasgemm_qkvr*   rH   r$   rD   maskis_unidirectionalattention_node_nameattention_nodematmul_nodeadd_nodes                 r   create_attention_nodez(FusionGptAttention.create_attention_node   s    #j99.II)9gtT:(94g>$	
 
 
 !0 ''%k4>BB%&6=N8UTUVV	
 	
 	
 !-$++V-BCVX]^b^tXuXu-v-v,wxxx&')3X^A5FG(+;;<$y0	
 
 
 #'*::HN1<MNH$v-	
 
 
 	  .+x!HIII<@<P$^%899=9M$[%566:6J$X]333r   c                 $   d }d }g }|j         dk    }d }|s$| j                            |g dg d||          }n#| j                            |g dg d||          }|d S d }	|s!|\  }
}}}}}}|
j        d|d         z
           }	n	|\  }}}}}}| j                            |g d	g d
          }|t                              d           d S |\  }}}}| j                            |g dg d|          }|!| j                            |g dg d|          }|| j                            |g dg d|          }|!| j                            |g dg d|          }|t                              d           d S |d         j        d         }| j                            |d                   \  }}|d         j        |         }n&|d         j        d         }|d         j        d         }|d         }|	%|	|j        vrt                              d           d S d}d }d }d }| j                            |g dg d          } | | \  }!}"}#}$}%| j                            |"g dg d          }&|&t                              d           d S |&d         }'|&d         }|$|'k    rt                              d           d S t          |&          dk    rB|&d         j         dk    r1| j                            |&d                   \  }}(|(dk    r|( | _        n| j        	                    |g d g d!fg d"g d#fg|          \  }} }| t                              d$           d S | d%         })| d&         }$| d         }%|dk    r| d         }*| j        	                    |*g d'g d(fg d)g d*fg d+g d,fg|          \  }}}|t                              d-           d S t          |          dk    rA|d         j         dk    r0| j                            |d                   \  }}(|(dk    r|(| _        | j        	                    |)g d.g d/fg d0g d1fg|          \  }}&}|&t                              d2           d S |&|dk    rdnd         }| j        
                    |&d         d|          }+|+j         d3k    r%|+}'|$|'k    rt                              d           d S n(|+j         d4k    r|+}nt                              d2           | j                            |j        d                   },t          |,t          j                  rGt          |,j                  d5k    r/|,j        d d         d6k    r|,j        d         |,j        d         k    st                              d7           d S t          j        |,t          j        |,                    rd8}nUt          j        |,t          j        t          j        |,                              st                              d9           d S | j                            |%g d:g d;          }-|-t                              d<           d S |-\  }.}/}0||0k    rt                              d=           d S | j                            |%g d	g d
          }1|1G| j                            |%g d>g d?          }1|1t                              d@           d S |1\  }}2}3}4}5n|1\  }2}3}4}5||5k    rt                              dA           d S |r"|2|k    rt                              dB           d S dC}6|(|d         j        d         }7|                     |7          }6|                     |2||          p|                     |2||          }|t                              dD           d S | j                            |          st                              dE           |                     ||          }|t                              dF           d S | j                            |          st                              dG           d S |                     ||||||j        d         |j        d         |6|	  	         d| _        d S )HNr   )rh   ReshapeGemmr   r   r   re   )r   Nr   r   r   r   r   )r(   return_indice)r   r   r   r   r   re   )Nr   r   r   r   r   r   r   )rB   r   r   r2   )r   r   r   r   z&fuse_attention: failed to match v path)r   r   r   r   )r   r   r   r   )r   r   r   r   )rh   re   r   )r   Nr   )rh   re   r   z'fuse_attention: failed to match fc pathri   r   zCUpstream Add and (Skip)LayerNormalization shall have one same inputT)SoftmaxSubMulDivre   )r   r   r   r   r   )
r   r   Slicer   r@   r   r1   r   Shaper   )
r   r   r   r   r   r   r   r   r   r   z8fuse_attention: failed to match unidirectional mask path   z-fuse_attention: skip since div_qk != div_maskr   i)r   Wherer   re   )r   r   r   r   )r   rh   r   r   re   )r   r   Nr   r   z(fuse_attention: failed to match qk nodes)r   r   Castr@   r@   r   )Nr   r   r   r   r   )r   r   r@   r@   r   )Nr   r   r   r   )r   r   r@   r@   )Nr   r   r   z9fuse_attention: failed to match input attention mask path)r   r   r   r@   r   r1   r   r   )r   r   r   r   r   r   r   r   )r   r   r@   r   r1   r   r   )r   r   r   r   r   r   r   z)fuse_attention: failed to match mask pathr   rB      )r   r   z4fuse_attention: skip since mask shape is not 1x1xWxWFzDfuse_attention: skip since mask is neither lower triangular nor ones)r   r   r2   )r   r   r   z&fuse_attention: failed to match q pathz.fuse_attention: skip since split_fc != split_q)r   rB   r   r   r2   )r   r   r   r   r   z&fuse_attention: failed to match k pathz.fuse_attention: skip since split_fc != split_kz8fuse_attention: skip since concat_k != concat_k_to_match z)fuse_attention: failed to match past pathzpast is not graph input.z,fuse_attention: failed to match present pathz!expect present to be graph output)r    r
   r%   r$   r!   r"   get_constant_inputlenr   match_parent_pathsr   get_constant_value
isinstancenpndarrayshapeallclose	ones_liketrilrS   r/   r>   r;   rL   rI   find_graph_outputr~   rD   prune_graph)8r   normalize_noderE   r(   r*   rH   r   is_normalize_node_skiplayernorm	qkv_nodesanother_inputadd_qkvreshape_qkvrw   	reshape_1	reshape_2transpose_qkv
matmul_qkvv_nodesr'   transpose_v	reshape_vsplit_fcfc_nodesru   i_rv   layernorm_before_attentionry   
slice_maskinput_mask_nodesconcat_k_to_matchqk_nodes
softmax_qksub_qkmul_qkdiv_qk	matmul_qk
mask_nodesdiv_maskmul_valwhere_qkadd_qkdiv_or_concat	mask_dataq_nodestranspose_q	reshape_qsplit_qk_nodesr&   transpose_k	reshape_ksplit_krP   rO   s8                                                           r   fusezFusionGptAttention.fuse   s
   *8*@D\*\'	. 	
44WWW((($7+ 5  II 
44PPP%%%$7+ 5  I F. 	  $M!mA.>*>?MM  *..z;f;f;fhththtuu?LLABBBF7>4;	8 ://@@@LL	
 
 z33HHH#	 H z33777#	 H :77??? LL'	  FGGG )!,I:00!==DAqqk'*GG )!,Iqk'*G%-b\" $>X>^)^)^LL^___F 
 ://
<f<f<fhwhwhwxx>F;Z55   /.. J  !WXXX!"~H#AJ!!LMMM:""z!}'<'E'E!Z:::a=II
7f$$.5XD* "Z:::::LLLIAAACUCUCUV $ NAx GHHH|Hb\F IAvv!!)-)F)F XWW111
 POO...
 EDD+OO (!* *&#Q$ $+LL!\]]]F'((1,,1A!1D1LPU1U1U!%!>!>?OPQ?R!S!SJAw&((18.#z<< dcc000 \[[---
 $   Az1 !HIII#aAAQ7J J11*R.!EXYYM$--(X%%LL!PQQQF & &(22$1!!HIII J11*2B12EFF	y"*--	IO$$))#v--"ioa&888LLOPPPF;y",y"9"9:: 	 %YY0G0G(H(HII 	LL_```F*..y:[:[:[]f]f]fgg?LLABBBF,3)iwLLIJJJF*..y:e:e:egsgsgstt?j22HHH G
 EFFFAH>Hk9gg:A7X{IwwLLIJJJF 	->!>!>LLSTTTF$&!')"-3A6J(,(@(@(L(L% ((8=PQQ 
UYUnUnh 3V
 V
 <KKCDDDFz**400 	5LL3444 $$X/BCC?KKFGGGFz++G44 	KK;<<<F""&-a0q!%
	
 
	
 
	
  r   )
rT   rU   rV   rW   r   rX   r   r~   r   rY   rZ   s   @r   r\   r\      s~         +i +C + + + + + +.K .K .K`             r   r\   )loggingr   numpyr   fusion_baser   fusion_utilsr   onnxr   
onnx_modelr   rT   r!   r	   r\    r   r   <module>r      s   
                 $ $ $ $ $ $                  	8		X) X) X) X) X) X) X) X)vw  w  w  w  w 3 w  w  w  w  w r   