
     Ngm                         d dl Z d dlmZmZ d dlZd dlmZmZ d dl	m
Z
 d dlmZmZ d dlmZ d dlmZmZmZ d dlmZ d d	lmZ  e j        e          Z G d
 de          Z G d de
          Z G d de          ZdS )    N)OptionalUnion)AttentionMaskFusionAttention)Fusion)"FusionSimplifiedLayerNormalization&FusionSkipSimplifiedLayerNormalization)NumpyHelper)	NodeProtoTensorProtohelper)	OnnxModel)BertOnnxModelc                        e Zd ZdZdedededef fdZ	 dded	e	d
e	de	dededededede
e         dee	df         fdZdededededededededededededee	df         fdZd Zd Zd Z xZS )FusionT5Attentionz=
    Fuse T5 Attention subgraph into one Attention node.
    modelhidden_size	num_headsattention_maskc           	      h    t                                          ||||dddg           d| _        d S )NF SkipSimplifiedLayerNormalizationAdd)use_multi_head_attentionsearch_op_types   )super__init__	static_kv)selfr   r   r   r   	__class__s        b/var/www/html/ai-engine/env/lib/python3.11/site-packages/onnxruntime/transformers/onnx_model_t5.pyr   zFusionT5Attention.__init__   sJ     	%*?G 	 	
 	
 	
     N
mask_indexq_matmulk_matmulv_matmulinputoutput
add_qk_strscalereturnc                    |dk    sJ |dk    r+||z  dk    r"t                               d| d|            dS | j                            |j        d                   }| j                            |j        d                   }| j                            |j        d                   }|t          |j        d          d           dS t          j        |          }t          j        |          }t          j        |          }|j        |j        k    sJ |j        d         }|j        d         }|j        d         }||cxk    r|k    sn J |dk    r'||k    r!t           	                    d| d| d	           t          j        |j        dd                   }t          j        |||fd
          }d|z  }| j                            d          }t          j        |dz   t           j        ||g|                                d          }| j                            || j                   ||dz   dg}||                    |           n|                    d           |	*|                    d           |                    |	           t          j        d||g|          }d|_        |j                            t          j        d|          g           |
.|j                            t          j        d|
          g           | j        @|j                            t          j        dt9          | j                            g           |S )a  Create an Attention node.
        Args:
            mask_index (str): mask input
            q_matmul (NodeProto): MatMul node in fully connection for Q
            k_matmul (NodeProto): MatMul node in fully connection for K
            v_matmul (NodeProto): MatMul node in fully connection for V
            num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
            hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
            input (str): input name
            output (str): output name
        Returns:
            Union[NodeProto, None]: the node created or None if failed.
        r   input hidden size # is not a multiple of num of heads Nr   zl is not an initializer. Please set do_constant_folding=True in torch.onnx.export to unblock attention fusionzInput hidden size (z3) is not same as weight matrix dimension of q,k,v (z:). Please provide a correct input hidden size or pass in 0)axis   	Attention_qkv_weightTname	data_typedimsvalsraw inputsoutputsr4   com.microsoftr   r*   mask_filter_value)loggerdebugr   get_initializerr'   printr
   to_arrayshapewarningnpprodstackcreate_node_namer   make_tensorr   FLOATtobytesadd_initializerthis_graph_nameappend	make_nodedomain	attributeextendmake_attributer>   float)r   r#   r$   r%   r&   r   r   r'   r(   r)   r*   q_weightk_weightv_weightqwkwvw
qw_in_size
kw_in_size
vw_in_sizeqw_out_size
qkv_weightqkv_weight_dimattention_node_nameweightattention_inputsattention_nodes                              r!   create_attention_nodez'FusionT5Attention.create_attention_node*   s   4 1}}}}??i 7A==LLikii^giijjj4:--hnQ.?@@:--hnQ.?@@:--hnQ.?@@>!$ g g g   4!(++!(++!(++ x28####Xa[
Xa[
Xa[
Z5555:555555??{j88NNJk J Jfp J J J  
 gbhqrrl++Xr2rl333
["j99+FF#$}4!'n-##%%
 
 
 	
""64+?@@@ -/

 !##J////##B'''!##B'''##J///)#H$	
 
 
 !0 '')>{I)V)V(WXXX$++V-B7E-R-R,STTT!-$++V-BCVX]^b^tXuXu-v-v,wxxxr"   querykeyvalueres_pos_biaspast_key
past_valuepresent_keypresent_valuec                    |dk    sJ |dk    r+||z  dk    r"t                               d| d|            d S | j                            d          }||dn||dn|dg}||                    |           n|                    d           ||                    |           n|                    d           |.|J |                    |           |                    |           |g}|	.|
J |                    |	           |                    |
           t          j        d|||          }d|_        |j        	                    t          j
        d|          g           |j        	                    t          j
        d	d
          g           | j        @|j        	                    t          j
        dt          | j                            g           |                     d           |S )Nr   r-   r.   MultiHeadAttentionr9   r:   r=   r   r*         ?r>   )r?   r@   r   rI   rO   r   rP   rQ   rR   rS   rT   r>   rU   increase_counter)r   rg   rh   ri   r#   rj   rk   rl   r(   rm   rn   r   r   rb   rd   attention_outputsre   s                    r!   create_mha_nodez!FusionT5Attention.create_mha_node   sA    1}}}}??i 7A==LLikii^giijjj4"j99:NOO+BB3-BBU	
 !##J////##B'''###L1111##B''')))##H---##J///#H" ,,,$$[111$$]333) #%$	
 
 
 !0 '')>{I)V)V(WXXX '')>w)L)L(MNNN!-$++V-BCVX]^b^tXuXu-v-v,wxxx2333r"   c                 b    |                      |||           |                     |||           d S N)fuse_t5_encoderfuse_t5_decoder)r   normalize_nodeinput_name_to_nodesoutput_name_to_nodes       r!   fusezFusionT5Attention.fuse   s<    ^-@BUVVV^-@BUVVVVVr"   c                 d   |j         dk    r|j         dk    rd S | j                            |g dg d          }|d S |\  }}}}| j                            |g dg d          }	|	d S |	d         }
| j                            |g dg d          }|d S |\  }}}| j                            |g d	g d
          }|d S |\  }}}d }| j                            |g dg d          }|d S |d         }|d         j         dk    rd S | j                            |          \  }}|dk    r|| _        | j                            |d         j        d                   }d }| j                            |ddgddg          }|d S |d         }|j        d         }| j                            |g dg d          }|d S |\  }}}| j                            |g dg d
          }|d S |\  }}}|j        d         |
j        d         k    rd S |                     |          \  }} | 	                    |||||| |
j        d         |j
        d         |d
  
        }!|!d S | j                            |!           | j        | j        |!j        <   | j                            |dd                     | j                            |           | j                            |d d                    |"| j                            |d d                    | j                            |d d                    d| _        d S )Nr   r   MatMulReshape	Transposer   r   r   r   r   Concat	UnsqueezeGatherShaper   r   r   r   r   r   Softmaxr   r   r   r   r   r   MulSubCastr   r   r   r   r   r   r   r   r   r   r   RelativePositionBiasrq   T)op_typer   match_parent_pathget_constant_inputr>   r   process_maskr'   get_num_heads_and_hidden_sizerf   r(   nodes_to_addrO   rN   node_name_to_graph_namer4   nodes_to_removerS   prune_graph)"r   ry   rz   r{   	qkv_nodes_reshape_qkvtranspose_qkv
matmul_qkvqkv_shape_nodesinput_shape_nodev_nodes	reshape_vmatmul_vqk_nodesadd_qk	matmul_qkr#   
mask_nodesmul_nodemul_valrj   	rpb_nodesrpb_add_nodek_nodes	reshape_kmatmul_kq_nodestranspose_q	reshape_qmatmul_qq_num_headsq_hidden_sizenew_nodes"                                     r!   rw   z!FusionT5Attention.fuse_t5_encoder   s   !%GGGNLbfkLkLkFJ00888LL
 
	
 F4=1;z*66666LL
 

 "F*2.*.....II
 

 ?F!(9h ://(((II
 

 F'69
Z11CCC
 


 Fa=a= E))FZ228<<
7f%,D"(55jn6J16MNN
J00*+F
 
	
 F |#)!,*.....II
 

 ?F!(9h *.....II
 

 ?F+2(Y >! 0 6q 999F%)%G%G	%R%R"]--"1%q!
 
 F  ***6:6J$X]3##IabbM222##H---##GCRCL111 ''555##GCRCL111r"   c                    |j         dk    r|j         dk    rd S | j                            |g dg d          }|d S |\  }}}}| j                            |g dg d          }	|	d S |	d         }
d }d }d }| j                            |g dg d          }|| j                            |g d	g d
          }|R|\  }}}|j        d         }|j        d         }d|vrd S |j        d         |
j        d         k    rd| _        npd| _        nh|j        d         }||v rd S d|vrd S d| _        nG|\  }}}}|j        d         }||v rd S d|vrd S |j        d         }d|vrd S |j        d         }d| _        | j                            |g dg d          }|d S |\  }}}d }d }| j        dk    r| j                            |g dg d          }|d S |d         }|d         j         dk    rd S | j                            |          \  }}|dk    r|| _        | j        	                    |d         j        d                   }na| j                            |ddgddg          }||j        d         }n1| j                            |ddgddg          }|d S |j        d         }d }d }d }| j        dk    r| j                            |g d	g d
          }|g|\  } }!}|!j        d         }||!j        d                  }"|"D ]2}#| j        
                    |#j        d                   }$|$	|$j        } n3|d S d|vrd S n| j                            |dgdg          }|d S |d         } | j        d         }||v rd S d|vrd S n| j                            |g dg dfg dg dfg|          \  }%}}d }&d }"||d         |d          }!}'|!j        d         }|%dk    r!||'j        d                  }&|&j        d         }n|'j        d         }||v rd S d!|vrd S |%dk    rI||'j        d                  }"|"D ]2}#| j        
                    |#j        d                   }$|$	|$j        } n3n|'j        d         }|d S d"|vrd S n| j                            |g d	g d
          }|d S |\  }}!}|!j        d         }||!j        d                  }"|"D ]2}#| j        
                    |#j        d                   }$|$	|$j        } n3|d S d"|vrd S | j                            |g d	g d          }(|(d S |(\  })}*}+|+j        d         |
j        d         k    rd S |                     |*          \  },}-| j        dk    r
||}|}d }d }|                     |+j        d         |||||||j        d         |||,|-          }.|.d S | j                            |.           | j        | j        |.j        <   | j                            |dd                     | j                            |           | j                            |d d                    |"| j                            |d d                    | j                            |(d d                    d#| _        d S )$Nr   r   r~   r   r   r   )r   r   r   r   )r   r   r   r   r   r   r   rn   r   past_value_crosspast_value_selfpresent_value_selfr   r   r   r   r   r   Slicer   present_key_crossr   past_key_cross)r   r   r   r   )r   r   r   r   )r   r   r   r   r   )r   r   r   r   r   past_key_selfpresent_key_selfT)r   r   r   r'   r(   r   r   r>   r   r   find_graph_outputr4   match_parent_pathsr   rt   r   rO   rN   r   r   rS   r   )/r   ry   rz   r{   r   r   r   r   r   r   r   ri   rl   rn   r   transpose_vr   r   concat_vr   r   r   r#   rj   r   r   r   r   rh   rk   rm   r   transpose_kr   present_key_transpose_nodespresent_key_transpose_nodepresent_key_candidateidxpast_key_transpose_nodeconcat_kr   r   r   r   r   r   r   s/                                                  r!   rx   z!FusionT5Attention.fuse_t5_decoderX  s   !%GGGNLbfkLkLkFJ00888LL
 
	
 F4=1;z*66666LL
 

 "F*2.
*..888LL
 

 ?j22222		 G
 "3:0Y!* + 21 5"-77F>!$(8(>q(AAA%&DNN%&DNN'-a0
!444F%Z77F!"(/%HaA!*J000 
22$OA.M#=88OA&EDN://(((II
 

 F'69
>Q55GGG""" J
 !!!}H!}$--66x@@JAw&  )0&,99*R.:Nq:QRRJJ
44 A I
 $%|A J8823F 	
 $F%|A>Qj22222		 G
 ",3)Yoa(.A)BRSTBU.V+2M  .,0J,H,HIcIjklIm,n,n),8&;&@ 9 &F&k99F : *66 MC 
 ?F%aj&,Q/222F#833F 4 #j;;AAA<<<PNNNP_P_P_` $ OC! '+#*.'"&-aj'"+)oa(!88.A(.QRBS.T+6<Q?HH'~a0H222F"(22F!882EhoVWFX2Y/6Q " "204
0L0LMgMnopMq0r0r-0<*?*DK!E = #+/!"4K&F%[88F 9 *66666II 
 ?F")9aoa(.A)BRSTBU.V+2M  .,0J,H,HIcIjklIm,n,n),8&;&@ 9 &F%[88F*.....II
 

 ?F+2(Y>! 0 6q 999F%)%G%G	%R%R"]>Q8#7CEHJ''OAq!
 
 F  ***6:6J$X]3##IabbM222##H---##GCRCL111 ''555##GCRCL111r"   rv   )__name__
__module____qualname____doc__r   intr   r   strr   r   rU   r   rf   rt   r|   rw   rx   __classcell__r    s   @r!   r   r      s          	
 &     8 "&k kk k 	k
 k k k k k k k 
y$	k k k kZ?? ? 	?
 ? ? ? ? ? ? ? ? ? 
y$	? ? ? ?BW W Wz  z  z xA  A  A  A  A  A  A r"   r   c                   .     e Zd Zdedef fdZd Z xZS )FusionRelativePositionBiasBlockr   max_distancec                 n    t                                          |dddg           || _        d| _        d S )Nr   r   r   F)r   r   r   is_bidirectional)r   r   r   r    s      r!   r   z(FusionRelativePositionBiasBlock.__init__]  s;     68HIII( %r"   c           	      x   |j         dk    r|j         dk    rd S | j                            |g dg d          }|$| j                            |g dg d          }|d S |d         }|d         }|d	         }| j                            |g d
g d          }|d S |d         }	| j                            |	g dg d          }
|
+| j                            |	g dg d          }
d| _        |
d S |
d         }| j                            |           | j                            |           | j                            |
           | j        rdnd}| j                            |j        d	                   }t          j	        |          }t          j        |          }t          j        | j                            d|          t          j        t          j        |          d	         t          j        |          d         g|                                d          }| j                            || j                   |j        |j        d         |j        d         g}|j        d	         g}t          j        d||| j                            d|                    }d|_        |j                            t          j        d| j                  g           |j                            t          j        d| j                  g           | j                            |           | j        | j        |j        <   d S )Nr   r   )r   r   r   Where)r   r   r   r   )r   r   r   r   r   )r   r   r   r   r      r   r   )	MinConstantOfShaper   r   r   r   DivLogr   )	r   r   r   r   r   r   r   r   r   )r   Negr   r   r   r   r   Range)r   r   r   r   r   r   r   r   )r   Absr   r   r   )r   r   r   r   r   Tencoderdecoderbias_table_weight)name_prefixr   r3   r   r:   r=   r   r   )r   r   r   r   r   rS   rA   r'   r
   rC   rF   	transposer   rJ   rI   r   rK   rD   rL   rM   rN   r4   r(   rP   rQ   rR   rT   r   r   rO   r   )r   noderz   r{   compute_bias_nodesgatherwhere	unsqueezecompute_buckets_nodesdivrange_nodes
range_nodenode_name_prefixtable_weight_itable_weighttable_weight_t
bias_tabler;   r<   rpb_nodes                       r!   r|   z$FusionRelativePositionBiasBlock.fusec  s|    <5  T\W%<%<F!Z99???
 
 %!%!=!=JJJOOO" " ")#A&"2&&q)	 $
 < <ZZZ'''!
 !

 !(F#B'j22[[[$$$
 

 *66AAA??? K %)D!" _
##$6777##$9:::##K000(,(=L99933FLODD"+N;;l33',,-@N^,__!'(<((+RXl-C-CA-FG''))
 
 

 	
"":t/CDDD/:#3A#6
8H8KL#A&'#",,-CQa,bb	
 
 
 *!!6#8IZ#[#["\]]]!!6#89KTMb#c#c"deee  ***6:6J$X]333r"   )r   r   r   r   r   r   r|   r   r   s   @r!   r   r   \  sh        &i &s & & & & & &LK LK LK LK LK LK LKr"   r   c                   H     e Zd Z fdZd Zd Zd Zd Zd Zd Z	d Z
 xZS )	T5OnnxModelc                 <   t                                          |||           t          |           | _        t	          | | j        | j        | j                  | _        t          |           | _	        t          |           | _        t          | d          | _        d S )N   )r   r   r   r   r   r   r   attention_fusionr   layer_norm_fusionr	   skip_layer_norm_fusionr   
rpb_fusion)r   r   r   r   r    s       r!   r   zT5OnnxModel.__init__  s    	;777+D11 1$8H$.Z^Zm n n!CD!I!I&LT&R&R# :$DDr"   c                 8    | j                                          d S rv   )r   applyr   s    r!   fuse_attentionzT5OnnxModel.fuse_attention  s    ##%%%%%r"   c                 8    | j                                          d S rv   )r   r  r  s    r!   fuse_layer_normzT5OnnxModel.fuse_layer_norm  s    $$&&&&&r"   c                 8    | j                                          d S rv   )r   r  r  s    r!   fuse_skip_layer_normz T5OnnxModel.fuse_skip_layer_norm  s    #))+++++r"   c                 z   g }|                                  D ]}|j        dk    r|                     |g dg d          }|+|                     |dgdg          }|G|d         }|j        d         |j        d<   |                    |           |                    |           |                     |           d S )Nr   )r   r   r   r   r   LessOrEqualTiler   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   nodesr   r   r(   rS   rO   remove_nodesr   r   r   extended_mask_nodesr   r   s         r!   !remove_extended_mask_decoder_initz-T5OnnxModel.remove_extended_mask_decoder_init  s    JJLL 	3 	3D|u$$&*&<&<   655' '#" '. 224:P9QTUSVWW	$$Q<%)[^"&&':;;;&&t,,,!!/222?	3 	3r"   c                 ~   g }|                                  D ]}|j        dk    r|                     |g dg d          }|+|                     |ddgddg          }|I|d         }|j        d         |j        d<   |                    |           |                    |           |                     |           d S )Nr   )r   r   r   r   r   r   r  r  r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  s         r!   remove_extended_mask_decoderz(T5OnnxModel.remove_extended_mask_decoder  s    JJLL  	3  	3D|u$$&*&<&<   988!' '#$ '. 224'CY9Z]^`a\bcc	$$Q<%)[^"&&':;;;&&t,,,!!/222A 	3  	3r"   c                 `    |                                   | j                                         d S rv   )adjust_reshape_and_expandr  r  r  s    r!   
preprocesszT5OnnxModel.preprocess  s.    &&(((r"   c                 ~    |                                   |                                  |                                  d S rv   )r  r  r   r  s    r!   postprocesszT5OnnxModel.postprocess  s>    ..000))+++r"   )r   r   r   r   r  r  r	  r  r  r  r  r   r   s   @r!   r   r     s        E E E E E& & &' ' ', , ,!3 !3 !3F"3 "3 "3H           r"   r   )loggingtypingr   r   numpyrF   fusion_attentionr   r   fusion_baser   fusion_simplified_layernormr   r	   fusion_utilsr
   onnxr   r   r   
onnx_modelr   onnx_model_bertr   	getLoggerr   r?   r   r   r    r"   r!   <module>r&     s  
  " " " " " " " "     ; ; ; ; ; ; ; ;       r r r r r r r r $ $ $ $ $ $ / / / / / / / / / /             ) ) ) ) ) )		8	$	$E	  E	  E	  E	  E	  E	  E	  E	 PSK SK SK SK SKf SK SK SKle e e e e- e e e e er"   