
     Ngw                     @   d dl mZ d dlmZmZ d dlZd dlmZ d dl	m
Z
 d dlmZmZ d dlmZmZ d dlmZ d d	lmZmZmZmZmZ d d
lmZ  ee          Z G d d          Z G d d          Z G d d          Z G d d          Z  G d d          Z! G d d          Z" G d d          Z# G d d          Z$ G d de
          Z% G d de          Z& G d d e%          Z' G d! d"e%          Z( G d# d$e%          Z) G d% d&e%          Z* G d' d(e          Z+dS ))    )	getLogger)ListOptionalN)DynamoOnnxHelper)Fusion)AttentionOpTypeFusionOptions) FusionBiasSkipLayerNormalizationFusionSkipLayerNormalization)NumpyHelper)
ModelProto	NodeProtoTensorProtohelpernumpy_helper)	OnnxModelc                       e Zd Zd ZdS )ProcessGemmWFuncc                 ,    t          j        |d          S )N   r   )np	transposeselfxs     c/var/www/html/ai-engine/env/lib/python3.11/site-packages/onnxruntime/transformers/onnx_model_phi.py__call__zProcessGemmWFunc.__call__   s    |Av&&&    N__name__
__module____qualname__r    r   r   r   r      s#        ' ' ' ' 'r   r   c                       e Zd Zd ZdS )ProcessMatMulQFuncc                 `    t          j        t          j        |dd          d         d          S )N   r   r   r   r   splitr   s     r   r   zProcessMatMulQFunc.__call__   '    |BHQ1--a0&999r   Nr    r$   r   r   r&   r&      #        : : : : :r   r&   c                       e Zd Zd ZdS )ProcessMatMulKFuncc                 `    t          j        t          j        |dd          d         d          S )Nr(   r   r   r   r)   r   s     r   r   zProcessMatMulKFunc.__call__    r+   r   Nr    r$   r   r   r.   r.      r,   r   r.   c                       e Zd Zd ZdS )ProcessMatMulVFuncc                 `    t          j        t          j        |dd          d         d          S )Nr(   r      r   r)   r   s     r   r   zProcessMatMulVFunc.__call__%   r+   r   Nr    r$   r   r   r1   r1   $   r,   r   r1   c                       e Zd Zd ZdS )ProcessBiasQFuncc                 >    t          j        |dd          d         }|S )Nr(   r   r   r*   r   s     r   r   zProcessBiasQFunc.__call__*       HQ2q!r   Nr    r$   r   r   r5   r5   )   #            r   r5   c                       e Zd Zd ZdS )ProcessBiasKFuncc                 >    t          j        |dd          d         }|S )Nr(   r7   r   r8   r   s     r   r   zProcessBiasKFunc.__call__0   r9   r   Nr    r$   r   r   r<   r<   /   r:   r   r<   c                       e Zd Zd ZdS )ProcessBiasVFuncc                 >    t          j        |dd          d         }|S )Nr(   r7   r3   r8   r   s     r   r   zProcessBiasVFunc.__call__6   r9   r   Nr    r$   r   r   r?   r?   5   r:   r   r?   c                       e Zd Zd ZdS )ProcessRotCacheFuncc                 x    t          |j                  dk    sJ |j        d         dk    r|d d ddf         S |S )Nr3   r       r      )lenshaper   s     r   r   zProcessRotCacheFunc.__call__<   sG    17||q    71:QQQ"W:r   Nr    r$   r   r   rB   rB   ;   s#            r   rB   c                   D    e Zd Zdedee         f fdZdefdZd Z	d Z
d Zd	 Zd#dZd Zd Zd Zdee         dedee         fdZd$dee         dee         defdZd$dee         dee         defdZd%dee         dee         defdZd$dee         dee         defdZd$dee         dee         defdZd&dee         dee         defdZd&dee         dee         defdZd&dee         dee         defdZ	 	 	 	 d'dee         dee         defd"Z xZS )(Fissionmodelnodes_to_findc                 N    t                                          |d|           d S )NDONOTUSEsuper__init__)r   rJ   rK   	__class__s      r   rP   zFission.__init__F   s'    
 	
M:::::r   attn_op_typec                     || _         d S N)rR   )r   rR   s     r   set_attention_op_typezFission.set_attention_op_typeM   s    (r   c                 ,    |dz   t          |          z   S )N_)str)r   layer_idnames      r   	get_unamezFission.get_unameP   s    czCMM))r   c                     |D ]6}||k    s*|                     |          s|                    |          r|c S 7t          d| d          )NzEdge z
 not found)endswith
startswith
ValueError)r   edgesrZ   edges       r   get_edge_by_namezFission.get_edge_by_nameS   sb     	 	Dt||t}}T22|dood6K6K| 1111222r   c                 8    |                      |j        |          S rT   )rb   inputr   noderZ   s      r   get_input_by_namezFission.get_input_by_nameY   s    $$TZ666r   c                 8    |                      |j        |          S rT   )rb   outputre   s      r   get_output_by_namezFission.get_output_by_name\   s    $$T[$777r   Nc                 h   | j                             |          }t          j        |          } ||          }t	          j        ||dz   n|t          j        |j        |	                                
                                d          }| j                             || j                   |j        S )N
_processedT	data_typedimsvalsraw)rJ   get_initializerr   to_arrayr   make_tensorr   FLOATrG   flattentobytesadd_initializerthis_graph_namerZ   )r   initializer_namefunctorcustom_namei
i_np_arrayprocessed_i_np_array
new_tensors           r   process_initializerzFission.process_initializer_   s    J&&'788 )!,,
&wz22'/:/B|++!'%+%--//7799
 
 

 	
"":t/CDDDr   c                     | j                                         j                                        }||_        t
          j        |j        j        _	        d S rT   )
rJ   graph
value_infoaddrZ   r   ru   typetensor_type	elem_typer   rZ   new_value_infos      r   add_fp32_value_infozFission.add_fp32_value_infom   C    ))++6::<<"4?4E'111r   c                     | j                                         j                                        }||_        t
          j        |j        j        _	        d S rT   )
rJ   r   r   r   rZ   r   INT64r   r   r   r   s      r   add_int64_value_infozFission.add_int64_value_infor   r   r   c                 j   | j                                         j        D ]@}|j        |k    r3| j                                         j                            |            nAt          j        |t          j        |          }| j                                         j        	                    |g           d S )Nr   rG   )
rJ   r   r   rZ   remover   make_tensor_value_infor   ru   extend)r   rZ   rG   r   r   s        r   replace_fp32_value_infozFission.replace_fp32_value_infow   s    ***,,7 	 	J$&&
  ""-44Z@@@ '  6!'
 
 

 	
%,,n-=>>>>>r   subgraph_nodesrY   layer_known_edges_namesc                 <   |D ]}t          |j                  D ]N\  }}|dk    r||vr>|                     ||          |j        |<   |                     |j        |                    Ot          |j                  D ]N\  }}|dk    r||vr>|                     ||          |j        |<   |                     |j        |                    O|                     ||j                  |_        | j                            |           | j        | j	        |j        <   d S )N )
	enumeraterd   r[   r   ri   rZ   nodes_to_addappendry   node_name_to_graph_name)r   r   rY   r   new_noder}   rZ   s          r   set_unique_name_and_add_nodesz%Fission.set_unique_name_and_add_nodes   s>    ' 	O 	OH$X^44 @ @42::!888(,x(F(FHN1%,,X^A->???$X_55 A A42::!888)-$)G)GHOA&,,X_Q-?@@@ NN8X]CCHM$$X...:>:ND(77	O 	Or   r   inputsoutputsprefixc                     t          |          dk    sJ t          |          dk    sJ t          j        d|||dz   d          }|gS )Nr(   r   LayerNormalization_LayerNormalizationg   >)r   r   rZ   epsilonrF   r   	make_noder   r   r   r   rf   s        r   	layernormzFission.layernorm   se    6{{a7||q     //)
 
 
 vr   c                    t          |          dk    sJ t          |          dk    sJ t          j        d|d         |d         g|dz   g|dz             }t          j        d|dz   |d         g||d	z             }||gS )
Nr(   r   MatMulr   
matmul_outr   r   rZ   Addr3   Biasr   )r   r   r   r   matmulr   s         r   gemmzFission.gemm   s    6{{a7||q    !1Ivay)l*+("	
 
 
 \)6!95&	
 
 
 }r   rD   c           	          t          |          dk    sJ t          |          dk    sJ t          j        d|||dz   d||          }|gS )N   r   RotaryEmbeddingcom.microsoft)r   r   rZ   domainrotary_embedding_dim	num_headsr   )r   r   r   r   rot_dimr   rf   s          r   rotaryzFission.rotary   sk    6{{a7||q    ++"!(
 
 
 vr   c                     t          |          dk    sJ t          |          dk    sJ t          j        d|||dz   d          }|gS )Nr   FastGelur   )r   r   rZ   r   r   r   s        r   fastgeluzFission.fastgelu   sd    6{{a7||q    *$"
 
 
 vr   c                     t          |          dk    sJ t          |          dk    sJ t          j        d|||dz             }|gS )Nr3   r   r   r   r   r   s        r   r   zFission.add   s`    6{{a7||q    %	
 
 
 vr   c           	          t          |          dk    sJ t          |          dk    sJ t          j        d|||dz   d|d          }|gS )N   r(   MultiHeadAttentionr   r   )r   r   rZ   r   r   unidirectionalr   r   r   r   r   r   rf   s         r   mhazFission.mha   sk    6{{a7||q     .."
 
 
 vr   c           	          t          |          dk    sJ t          |          dk    sJ t          j        d|||dz   d||          }|gS )N   r(   GroupQueryAttentionr   )r   r   rZ   r   r   kv_num_headsr   r   s         r   gqazFission.gqa   sk    6{{a7||q    !//""
 
 
 vr   c                     t          |          dk    sJ t          |          dk    sJ t          j        d|||dz   d|ddd	  	        }|gS )N   r3   	Attentionr   r   rD   )r   r   rZ   r   r   r   	do_rotaryr   r   r   s         r   	attentionzFission.attention   sp    6{{a7||q    +%"!#

 

 

 vr   P      %?c                     t          |          dk    sJ t          |          dk    sJ t          j        d|||dz   d||||	  	        }|gS )N   r   PagedAttentionzvllm.ort.ext)r   r   rZ   r   r   num_kv_heads	head_sizescaler   )r   r   r   r   r   r   r   rf   s           r   
paged_attnzFission.paged_attn  ss     6{{a7||q    **!"

 

 

 vr   rT   )r   )r   rD   rD   )r   rD   )r   rD   r   r   )r!   r"   r#   r   r   rX   rP   r   rU   r[   rb   rg   rj   r   r   r   r   r   intr   r   r   r   r   r   r   r   r   r   __classcell__rQ   s   @r   rI   rI   E   s)       ;; Cy; ; ; ; ; ;)/ ) ) ) )* * *3 3 37 7 78 8 8   F F F
F F F

? 
? 
?O"9oO9<OW[\_W`O O O O(
 
S	 
DI 
s 
 
 
 
 49 tCy #    " T#Y c C    
 
tCy 
49 
c 
 
 
 
	 	$s) 	d3i 	 	 	 	 	 $s) d3i      $s) d3i      S	 DI s    ( ! S	 c 	       r   rI   c                   V     e Zd Zdededef fdZdefdZd Zde	fd	Z
de	fd
Z xZS )Phi2PreProcessorrJ   r   hidden_sizec                     t                                          |           d| _        || _        || _        d| _        d S )NrD   modeling_phi_PhiModel_model_1)rO   rP   num_hidden_layersnum_attention_headsr   	func_namer   rJ   r   r   rQ   s       r   rP   zPhi2PreProcessor.__init__  s>    !##, &8r   returnc                 D   i }d|d<   d|d<   d|d<   d|d<   t          d	| j        d	          D ]0}d
| |d| <   d| |d| <   d| |d| d<   d| |d| d<   1d | j        j        j        D             }d|v rd|v rd|d<   d|d<   nd|v rd|v sJ d|d<   d|d<   |S )Nlogits	lm_head_1	input_idsl_input_ids_
past_key_0
key_statespast_value_0value_statesr   	past_key_key_states_past_value_value_states_present_key_model_layers__1present_value__1_1c                     g | ]	}|j         
S r$   rZ   ).0os     r   
<listcomp>z7Phi2PreProcessor.get_phi2_edge_dict.<locals>.<listcomp>3  s    ;;;a16;;;r   model_layers_0_1_1model_layers_0_1_2present_key_0present_value_0model_layers_0_1)ranger   rJ   r   ri   )r   	edge_dictr}   r   s       r   get_phi2_edge_dictz#Phi2PreProcessor.get_phi2_edge_dict'  sD   	!)	+$/	.!".	,$2	.!q$0!44 	F 	FA+:q??I'A''(->1->->I)a))*/Aa/A/AI+a+++,1E!1E1EI-a---..;;4:#3#:;;;7**/Cw/N/N.=I*+.?I*++%005IW5T5T5T5T,;I().?I*+r   c                     d}| j         j        j        D ]6}|j                            |          }|dk    r|j        |d          |_        7d S )N)modeling_phi_PhiDecoderLayer_model_layersr7   )rJ   r   rf   op_typefind)r   phi2_transformer_layer_namerf   indexs       r   simplify_phi2_op_typez&Phi2PreProcessor.simplify_phi2_op_type=  s[    &Q#J$) 	4 	4DL%%&ABBE{{#|EFF3	4 	4r   rR   c                    |t           j        k    | _        |t           j        k    | _        | j        j        }g }|j        D ]}d|j        v rt          j
        |j        | j        st          j        nt          j        ddg          }t          j
        dt          j        dg          }t          j
        dt          j        ddg          }t          j
        dt          j        ddg          }t          j
        d	t          j        dg          }	| j        s|                    |||g          n|                    |||	g           | j        rwd
|j        v rlt          j
        |j                            d
d          |j        j        j        dd| j        d| j        | j        z  g          }
|                    |
g           | j        rd
|j        v rCt          j
        |j        |j        j        j        g d          }
|                    |
g           d|j        v rCt          j
        |j        |j        j        j        g d          }
|                    |
g           'd
|j        v s	d|j        v rWt          j
        |j        |j        j        j        d| j        d| j        | j        z  g          }
|                    |
g           |                    d           |j                            |           g }t-          |j                  D ]\  }}|dk    r|                    |g           "| j        rvd|j        v rlt          j
        |j                            dd          |j        j        j        dd| j        d| j        | j        z  g          }
|                    |
g           | j        rt          j
        |j        |j        j        j        d| j        d| j        | j        z  g          }
|                    |
g           |                    d           |j                            |           d S )Nr   
batch_sizeseq_lenr   stepr   position_idsattention_maskinput_metadatapast_keypastr3   past_seq_len)
num_blocksr   head_size_x
block_sizeblock_x
past_value)r  r   r   r  rd   r   present_keypresenttotal_seq_lenri   )r   r   use_attnr   use_vllmrJ   r   rd   rZ   r   r   r   INT32r   r   replacer   r   r   r   r   
ClearFieldr   ri   )r   rR   r   
new_inputsvivi_iidvi_stepvi_pidvi_maskvi_metavi_cachenew_outputsr}   s                r   process_graph_ioz!Phi2PreProcessor.process_graph_ioD  s   $(AA$(FF
 
+ N	2 N	2Bbg%%6G7;}[k//+J['3  
 !7)/#  
  6")/'3  
 !7$)/'3  
 !7$)/#    =FJ%%vw&@AAA#**FFG+DEE} .2((%<
F;;"$'"5"?( 4* ,0HH
  
  
 H %%xj111  2((%<"$'"5"?aaa     H
 %%xj11127**%<"$'"5"?  	  	  	 H %%xj111((LBG,C,C%<"$'"5"?( 4* ,0HH		  	  	 H %%xj111!!!:&&&u|,, 	3 	3EArAvv""B4((((= 3$//#)#@GOOM9EE&(g&9&C ! , $ 8 / $ 0D4L L#
$ 
$ 
$ $**H:666] 3%<"$'"5"?( 4+ ,0HH		  	  	 H  &&z2222"""K(((((r   c                    d }| j         j        D ]*}|j                            | j                  r	|j        } n+|J |                     |           |                     |                                            |                                  | 	                                 |t          j        k    r|                                  |                     |           d S rT   )rJ   	functionsrZ   r]   r   unroll_functionupdate_edgesr  r  remove_dropout_layerr   r   remove_lm_head_layerr,  )r   rR   function_namefuncs       r   preprocess_onnxz Phi2PreProcessor.preprocess_onnx  s    J( 	 	Dy!!$.11  $	 (((]+++$1133444""$$$!!###?999%%'''l+++++r   )r!   r"   r#   r   r   rP   dictr  r  r   r,  r5  r   r   s   @r   r   r     s        9j 9S 9s 9 9 9 9 9 9D    ,4 4 4{)_ {) {) {) {)z,O , , , , , , , ,r   r   c                   *     e Zd Zdef fdZd Z xZS )FissionTransformerEmbeddingPhirJ   c                 N    t                                          |dg           d S )N6torch_nn_modules_sparse_Embedding_model_embed_tokens_1rN   r   rJ   rQ   s     r   rP   z'FissionTransformerEmbeddingPhi.__init__  s)     	!Y Z[[[[[r   c                    t                               d|j                   t          |j                  dk    sJ t          |j                  dk    sJ |j        d         }|j        d         }|                     |d          }|||g}t          j        d||g|gd          g}| 	                    |d|           | j
                            |           d	| _        d S )
NOptimizing %s...r3   r   r   zembed_tokens.weightGatherEmbedding_Gatherr   T)loggerinforZ   rF   rd   ri   rg   r   r   r   nodes_to_remover   prune_graph)	r   rf   input_name_to_nodesoutput_name_to_noderd   ri   	embeddingr   r   s	            r   fusez#FissionTransformerEmbeddingPhi.fuse  s    &	2224:!####4;1$$$$
1Q**41FGG	#(&)"< !5)'	  
 	**>1>UVVV##D)))r   r!   r"   r#   r   rP   rG  r   r   s   @r   r8  r8    s[        \\ \ \ \ \ \             r   r8  c                   *     e Zd Zdef fdZd Z xZS )FissionTransformerLayerNormPhirJ   c                 N    t                                          |dg           d S )N@torch_nn_modules_normalization_LayerNorm_model_final_layernorm_1rN   r;  s     r   rP   z'FissionTransformerLayerNormPhi.__init__  s)     	!c deeeeer   c                 v   t                               d|j                   t          |j                  dk    sJ t          |j                  dk    sJ |j        d         }|j        d         }|                     |d          }|                     |d          }||||g}g }	|	                    |                     |||g|gd                     | 	                    |	d|           | 
                    |g d	           | 
                    |g d	           | j                            |           d
| _        d S )Nr=  r(   r   r   zfinal_layernorm.weightzfinal_layernorm.biasFinalc   r  r  r   T)r@  rA  rZ   rF   rd   ri   rg   r   r   r   r   rB  r   rC  )
r   rf   rD  rE  rd   ri   	ln_weightln_biasr   r   s
             r   rG  z#FissionTransformerLayerNormPhi.fuse  sI   &	2224:!####4;1$$$$
1Q**41IJJ	((/EFF#(&)W"EdnneY-H6(T[\\]]]**>2?VWWW$$U,T,T,TUUU$$V-U-U-UVVV##D)))r   rH  r   s   @r   rJ  rJ    s[        ff f f f f f             r   rJ  c                   *     e Zd Zdef fdZd Z xZS )!FissionTransformerCausalLMHeadPhirJ   c                 N    t                                          |dg           d S )N(torch_nn_modules_linear_Linear_lm_head_1rN   r;  s     r   rP   z*FissionTransformerCausalLMHeadPhi.__init__  s)     	!K LMMMMMr   c                    t                               d|j                   t          |j                  dk    sJ t          |j                  dk    sJ |j        d         }|j        d         }|                     |                     |d          t                                }|                     |d          }||||g}g }	|		                    | 
                    |||g|gd                     |                     |	d	|           |                     |g d
           |                     |g d           | j                            |           d| _        d S )Nr=  r   r   r3   r   zlm_head.weightzlm_head.biasLMHead_rO  rP  )r  r  i   T)r@  rA  rZ   rF   rd   ri   r   rg   r   r   r   r   r   rB  r   rC  )
r   rf   rD  rE  rd   ri   	fc_weightfc_biasr   r   s
             r   rG  z&FissionTransformerCausalLMHeadPhi.fuse  s^   &	2224:!####4;1$$$$
1Q,,T-C-CDJZ-[-[]m]o]opp	((~>>#(&)W"Edii	7(CfXyYYZZZ**>2?VWWW$$U,T,T,TUUU$$V-M-M-MNNN##D)))r   rH  r   s   @r   rT  rT    s[        NN N N N N N             r   rT  c                   @     e Zd Zdedef fdZd Zd Zd Zd Z	 xZ
S )FissionTransformerBlockPhirJ   r   c                     || _         d}i | _        g }t          |          D ]'}d| d}|                    |           || j        |<   (t	                                          ||           d S )NrD   *modeling_phi_PhiDecoderLayer_model_layers_r   )r   func_to_layer_idr  r   rO   rP   )r   rJ   r   max_num_layersrK   layerr   rQ   s          r   rP   z#FissionTransformerBlockPhi.__init__6  s    
 # ">** 	5 	5ENUNNNI  +++/4D!),,.....r   c                 &    | j         |j                 S rT   )r_  r  )r   rf   s     r   get_layer_idz'FissionTransformerBlockPhi.get_layer_idF  s    $T\22r   c                    t          j        ddgdgdt          j                  t          j        dddgdgd	
          t          j        dddgdgd
          t          j        ddgdgdt          j                  t          j        ddgdgd
          t          j        dddgdgdd          t          j        ddgdgdt          j                  g}|S )NCastr  
mask_int64Cast_gqa_aux_0)r   r   rZ   to	ReduceSumonemask_row_sumsReduceSum_gqa_auxr   Subseqlens_k_int64Sub_gqa_aux	seqlens_kCast_gqa_aux_1Shape
mask_shapeShape_gqa_aux_0r>  total_seq_len_int64Gather_gqa_aux_0r   )r   r   rZ   axistotal_sequence_lengthCast_gqa_aux_2)r   r   r   r   r   )r   gqa_aux_nodess     r   get_gqa_aux_nodesz,FissionTransformerBlockPhi.get_gqa_aux_nodesI  s<   ()%%$   $e,()(	   '/*+"	   )*$%$   Wl^l^Zklll$e,./'   -.01%$  G*
V r   c	                 8   | j                             |          }	| j                             |          }
| j                             |          }t          j        t	          j        |	          d          }t          j        t	          j        |
          d          }t          j        t	          j        |          d          }t          j        |||fd          }| j                             |          }| j                             |          }| j                             |          }t	          j        |          }t	          j        |          }t	          j        |          }t          j        |||fd          }|j        d         }t          j	        |t          j        ||dz  g|                                                                d          }| j                             || j                   t          j	        |t          j        |dz  g|                                                                d          }| j                             || j                   |                     |j                   |                     |j                   ||fS )Nr   r   )rw  r   r(   Trm   )rJ   rr   r   r   r   rs   stackrG   r   rt   r   ru   rv   rw   rx   ry   r   rZ   )r   q_wk_wv_wq_bk_bv_bweight_name	bias_nameq_weightk_weightv_weightqwkwvw
qkv_weightq_biask_biasv_biasqbkbvbqkv_biasr   weightbiass                             r   pack_qkv_gemmz(FissionTransformerBlockPhi.pack_qkv_gemmw  sH   :--c22:--c22:--c22\+.x88&AA\+.x88&AA\+.x88&AAXr2rl333
++C00++C00++C00!&))!&))!&))8RRLq111 &q)#!'{Q/##%%--//
 
 
 	
""64+?@@@!!'/"!!##++--
 
 
 	
""4)=>>>  ---  +++I%%r   c                    t                               d|j                   t                               d| j                    |                     |          }|j        d         }|                     |d          }|                     |d          }|j        d         }|                     |d          }	|                     |d          }
|                     |d	          }|                     |d
          }d\  }}}}}}d\  }}d\  }}| j        t          j
        k    rQ|                     |                     |d          t                                }|                     |                     |d          t                                }|                     |                     |d          t                                }|                     |d          }|                     |d          }|                     |d          }|                     |                     |d          t                                }|                     |                     |d          t                                }n|                     |                     |d          |                     |d          |                     |d          |                     |d          |                     |d          |                     |d          |                     |d          |                     |d                    \  }}|                     |                     |d          t                                }|                     |d          }|                     |                     |d          t                                }|                     |                     |d          t                                }|                     |d          }|                     |d          }g }|                    |||g           |                    ||	|
g           |                    ||g           | j        t          j
        k    r|                    ||||||||g           n|                    ||g           |                    ||||||g           |                    g d           g }|                    |                     |||gdg                     |                    |                     d||gd gd!                     |                    |                     d||gd"gd#                     |                    |                     d"gd$g                     |                    |                     d$||gd%gd&                     |                    |                     d d%gd'gd(                     |                    |                     |d'g|gd)                     | j        t          j
        k    rq|                    |                     d||gd*gd+                     |                    |                     d||gd,gd-                     |                    |                     d||gd.gd/                     | j        t          j        k    rd0nd1}|                    |                     d*|||gd2gd+                     |                    |                     d,|||gd3gd-                     | j        t          j        k    r6|                    |                     d2d3d.d4d5d4||gd|	|
g                     nv| j        t          j        k    r|                    |                     d2d3d.||d6d7gd|	|
g                     |dk    r|                                 } | D ]0}!| j                            |!           | j        | j        |!j        <   1| j         !                    tE          j#        tI          j%        d8gd9:          d;<          | j                   n| j        t          j        k    r0|                    | &                    d2d3d.||d=gdg                     nQd>| }"d?| }#|                    |"|#g           |                    | '                    d||d5|"gd|#g                     | (                    |||           | )                    |g d@           | )                    |g d@           | j*                            |           dA| _+        d S )BNr=  zAttentionOpType: r   r  r  r7   r  present_valuezinput_layernorm.weightzinput_layernorm.bias)NNNNNN)NNzself_attn.q_proj.weightzself_attn.k_proj.weightzself_attn.v_proj.weightzself_attn.q_proj.biaszself_attn.k_proj.biaszself_attn.v_proj.biaszrotary_emb.cos_cachedzrotary_emb.sin_cachedattn_qkv_weightattn_qkv_biaszself_attn.dense.weightzself_attn.dense.biaszmlp.fc1.weightzmlp.fc2.weightzmlp.fc1.biaszmlp.fc2.bias)r  r  rp  rx  r  r  ln_outattn_outattn_add_outOutProj_fc1_outFC1_gelu_outfc2_outFC2_residual_1_out
Residual_1
Residual_2queryQ_keyK_valueV_r  r  	query_rotkey_rotr   r  rp  rx  r   int64)dtyperj  r   r  past_present_rP  T),r@  rA  rZ   rR   rc  rd   rg   ri   rj   r   r   r   r   rB   r  r[   r   r   r   r   r   r   r   r   r   r   r   r{  r   r   ry   r   rJ   rx   r   
from_arrayr   arrayr   r   r   r   rB  rC  )$r   rf   rD  rE  rY   i_hidden_statesi_key_cachei_value_cacheo_hidden_stateso_key_cacheo_value_cacherQ  rR  attn_q_weightattn_q_biasattn_k_weightattn_k_biasattn_v_weightattn_v_biasr  r  	cos_cache	sin_cacheattn_out_weightattn_out_biasmlp_fc1_weightmlp_fc2_weightmlp_fc1_biasmlp_fc2_biasr   r   pos_ids_namerz  r   	past_namepresent_names$                                       r   rG  zFissionTransformerBlockPhi.fuse  s	    	&	222;(9;;<<<$$T***Q-,,T:>>..t\BB+b/--dMBB//oFF**41IJJ	((/EFF^
Z{M;{ *4&)	9 999 44&&t-FGGIYI[I[ M !44&&t-FGGIYI[I[ M !44&&t-FGGIYI[I[ M 007NOOK007NOOK007NOOK00&&t-DEEGZG\G\ I 00&&t-DEEGZG\G\ II .2-?-?&&t-FGG&&t-FGG&&t-FGG&&t-DEE&&t-DEE&&t-DEEx):;;x99	. 	.*O] 22""4)ABBDTDVDV
 
 ..t5KLL11$2H2HO_2`2`brbtbtuu11$2H2HO_2`2`brbtbtuu--dNCC--dNCC"$&&m'TUUU&&m'TUUU&&	7';<<< 999#**!!!	    $**O]+KLLL&&m^\>[gh	
 	
 	
 	 &&nnn	
 	
 	
 dnnoy'-RU]T^__```dii_m(TWeVfhrsstttdii><(PS\R]_effgggdmmYK*FFGGGdii^\(RU^T_aghhiiidhh	'BEUDVXdeefffdhh9I'J_L]_kllmmm 999!!$))X}k,RU\T]_c"d"deee!!$))X}k,RUZT[]a"b"bccc!!$))X}k,RU\T]_c"d"deee-1->/B`-`-`>>flL!!$++wiQZ.[^i]jlp"q"qrrr!!$++ulIy.Y\e[fhl"m"mnnn O$FFF%%HH$i">NPRT_ano#[-@     "o&III%%HH'%#')'3 $[-@    q==$($:$:$<$<M$1 [ [)00:::FJFZ4X]CCJ..$/!G0L0L0LSXYYY[_[o   "o&DDD%%OO$i+}Vfg#    +**I0h00L#**I|+DEEE!!?OQZ[^hjv]w    	**>8E\]]]$$_6^6^6^___$$_6^6^6^___##D)))r   )r!   r"   r#   r   r   rP   rc  r{  r  rG  r   r   s   @r   r\  r\  5  s        // / / / / / / 3 3 3, , ,\(& (& (&Tk  k  k  k  k  k  k r   r\  c                   \     e Zd Zdededef fdZddee         def fd	Z	d
 Z
ddZ xZS )PhiOnnxModelrJ   r   r   c                 "   t                                          |           t          | j        ||          | _        t          | |          | _        t          |           | _        t          |           | _
        t          |           | _        d S rT   )rO   rP   r   rJ   phi2_preprocessorr\  fission_transformer_blockrT  fission_causal_lm_headrJ  fission_transformer_layernormr8  fission_transformer_embeddingr   s       r   rP   zPhiOnnxModel.__init__P  sy    !1$*i!U!U)CD))T)T&&G&M&M#-KD-Q-Q*-KD-Q-Q***r   NFoptionsadd_dynamic_axesc                 B   |J |j         }| j                            |           | j                            |           | j                                         | j                                         | j                                         | j                                         t                      
                                 t          |           | _        t          |           | _        | j                                         | j                                         d S rT   )attention_op_typer  rU   r  r5  applyr  r  r  rO   rC  r   fuse_slnr
   fuse_bias_sln)r   r  r  rR   rQ   s       r   optimizezPhiOnnxModel.optimizeX  s    """0&<<\JJJ..|<<<&,,...*00222#))+++*00222 5T::=dCC  """""r   c                     i }g d}|D ])}|                      |          }t          |          ||<   *t                              d|            |S )z8
        Returns node count of fused operators.
        )	r   r   r   r   GeluBiasGelur   r   SkipLayerNormalizationzOptimized operators: )get_nodes_by_op_typerF   r@  rA  )r   op_countopsopnodess        r   get_fused_operator_statisticsz*PhiOnnxModel.get_fused_operator_statisticsm  so     

 

 

  	& 	&B--b11Eu::HRLL6H66777r   c                    |                                  dt          ffd} |d           |d          z    |d          z    |d          z   } |d           |d	          z    |d
          z   } |d           |d          z   }|dk    o||k    o||k    }|dk    rt                              d           |dk    rt                              d           |dk    rt                              d           |S )zA
        Returns True when the model is fully optimized.
        Nop_namec                 2                         |           pdS )Nr   )get)r  fused_op_counts    r   r  z1PhiOnnxModel.is_fully_optimized.<locals>.op_count  s    !%%g..3!3r   r   r   r   r   r  r  r   r   r  r   zLayer Normalization not fusedzGelu (or FastGelu) not fusedz+Attention (or MultiHeadAttention) not fused)r  rX   r@  debugwarning)r   r  r  r   gelu
layer_norm
is_perfects    `     r   is_fully_optimizedzPhiOnnxModel.is_fully_optimized  sW    !!??AAN	4c 	4 	4 	4 	4 	4 	4 H[!!h+,,-h,--. h'(() 	 x((:"6"66*9M9MMX233hh?W6X6XX
!mZ)t*;Z*PYBY
??LL8999199LL7888>>NNHIIIr   )NFrT   )r!   r"   r#   r   r   rP   r   r	   boolr  r  r  r   r   s   @r   r  r  O  s        Rj RS Rs R R R R R R# # 7 #RV # # # # # #*  .       r   r  ),loggingr   typingr   r   numpyr   dynamo_onnx_helperr   fusion_baser   fusion_optionsr   r	   fusion_skiplayernormr
   r   fusion_utilsr   onnxr   r   r   r   r   
onnx_modelr   r!   r@  r   r&   r.   r1   r5   r<   r?   rB   rI   r   r8  rJ  rT  r\  r  r$   r   r   <module>r     s         ! ! ! ! ! ! ! !     / / / / / /       9 9 9 9 9 9 9 9 _ _ _ _ _ _ _ _ $ $ $ $ $ $ I I I I I I I I I I I I I I            	8		' ' ' ' ' ' ' '
: : : : : : : :
: : : : : : : :
: : : : : : : :
                            V V V V Vf V V Vrp, p, p, p, p,' p, p, p,f         W      D         W      B               BW  W  W  W  W  W  W  W tS S S S S9 S S S S Sr   