
     NgL                        d dl mZ d dlmZmZ d dlmZ d dlmZm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZmZ d dlmZmZ d dlmZ d dl m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/m0Z0 d dl1m2Z2m3Z3 d dl4m5Z5 d dl6m7Z7m8Z8m9Z9 d dl:m;Z;  ee<          Z= G d de;          Z>dS )    )	getLogger)ListOptional)PackingMode)AttentionMaskFusionAttention)FusionBartAttention)FusionBiasGelu)FusionEmbedLayerNormalization)FusionFastGelu)
FusionGelu)FusionGeluApproximation)FusionGemmFastGelu)FusionLayerNormalizationFusionLayerNormalizationTF)AttentionMaskFormatFusionOptions)FusionQOrderedAttention)FusionQOrderedGelu) FusionQOrderedLayerNormalization)FusionQOrderedMatMul)FusionQuickGelu)FusionReshape)FusionRotaryEmbeddings)FusionShape)"FusionSimplifiedLayerNormalization&FusionSkipSimplifiedLayerNormalization) FusionBiasSkipLayerNormalizationFusionSkipLayerNormalization)FusionUtils)
ModelProtoTensorProtohelper)	OnnxModelc                       e Zd Zd,dededef fdZd Zd Zd Zd	 Z	d
 Z
d Zd Zd Zd Zd Zd Zd-dZd Zd Zd Zdedee         defdZdefdZd Zd.dZd Zd  Zd! Zd" Zd/d%e e!         d&efd'Z"d( Z#d0d)Z$d1d*efd+Z% xZ&S )2BertOnnxModelr   model	num_headshidden_sizec                    |dk    r|dk    s|dk    r	||z  dk    sJ t                                          |           || _        || _        t	          |           | _        t          | | j        | j        | j                  | _        t          | | j        | j        | j                  | _	        t          |           | _        dS )aG  Initialize BERT ONNX Model.

        Args:
            model (ModelProto): the ONNX model
            num_heads (int, optional): number of attention heads. Defaults to 0 (detect the parameter automatically).
            hidden_size (int, optional): hidden dimension. Defaults to 0 (detect the parameter automatically).
        r   N)super__init__r(   r)   r   attention_maskr   attention_fusionr   qordered_attention_fusionr    utils)selfr'   r(   r)   	__class__s       d/var/www/html/ai-engine/env/lib/python3.11/site-packages/onnxruntime/transformers/onnx_model_bert.pyr,   zBertOnnxModel.__init__&   s     Q;!#3#3Q;YbKbfgKgKgKgKg"&+D11 /d6FX\Xk l l)@$"DND4G*
 *
& !&&


    c                 j    | j                                          | j                                         d S N)r.   applyr/   r1   s    r3   fuse_attentionzBertOnnxModel.fuse_attention;   s2    ##%%%&,,.....r4   c                    t          |           }|                                 t          |           }|                                 t          |           }|                                 t	          |           }|                                 d S r6   )r   r7   r   r   r   r1   fusions     r3   	fuse_geluzBertOnnxModel.fuse_gelu@   sn    D!!%% &&#D))r4   c                 N    t          | |          }|                                 d S r6   )r
   r7   )r1   is_fastgelur<   s      r3   fuse_bias_geluzBertOnnxModel.fuse_bias_geluK   s"    k22r4   c                 L    t          |           }|                                 d S r6   )r   r7   r;   s     r3   gelu_approximationz BertOnnxModel.gelu_approximationO   s     (..r4   c                 L    t          |           }|                                 d S r6   )r   r7   r;   s     r3   fuse_gemm_fast_geluz!BertOnnxModel.fuse_gemm_fast_geluS   s     #D))r4   c                 L    t          |           }|                                 d S r6   )r   r7   r;   s     r3   fuse_add_bias_skip_layer_normz+BertOnnxModel.fuse_add_bias_skip_layer_normW   s     1$77r4   c                 L    t          |           }|                                 d S r6   )r   r7   r;   s     r3   fuse_reshapezBertOnnxModel.fuse_reshape[   s     t$$r4   c                 L    t          |           }|                                 d S r6   )r   r7   r;   s     r3   
fuse_shapezBertOnnxModel.fuse_shape_   s     T""r4   c                 N    t          | |          }|                                 d S r6   )r   r7   )r1   use_mask_indexr<   s      r3   fuse_embed_layerzBertOnnxModel.fuse_embed_layerc   s"    .t^DDr4   c                     t          |           }|                                 t          |           }|                                 t          |           }|                                 d S r6   )r   r7   r   r   r;   s     r3   fuse_layer_normzBertOnnxModel.fuse_layer_normg   sV    )$//+D11 2$77r4   c                 L    t          |           }|                                 d S r6   )r   r7   r;   s     r3   fuse_simplified_layer_normz(BertOnnxModel.fuse_simplified_layer_normr   s     3D99r4   Tc                 P    t          | |          }|                                 d S )N)shape_infer)r   r7   )r1   rS   r<   s      r3   fuse_skip_layer_normz"BertOnnxModel.fuse_skip_layer_normv   s%    -dLLLr4   c                 L    t          |           }|                                 d S r6   )r   r7   r;   s     r3   fuse_skip_simplified_layer_normz-BertOnnxModel.fuse_skip_simplified_layer_normz   s     7==r4   c                    t          |           }|                                 t          t          d | j        j        j                            }t          t          d |                    }d}|t          | j        j
                  k     rh| j        j
        |         }d|j        v r)|j        |vr | j        j
                            |           n|dz  }|t          | j        j
                  k     fd S d S )Nc                 .    | j         dk    o
| j        dk    S )NRotaryEmbeddingcom.microsoft)op_typedomainnodes    r3   <lambda>z6BertOnnxModel.fuse_rotary_embeddings.<locals>.<lambda>   s    T\->>a4;RaCa r4   c                     | j         S r6   )r\   r]   s    r3   r_   z6BertOnnxModel.fuse_rotary_embeddings.<locals>.<lambda>   s    dk r4   r   rY      )r   r7   listfilterr'   graphr^   setmaplen	functionsnamer\   remove)r1   r<   rot_emb_nodesnon_ms_domains_to_keepifns         r3   fuse_rotary_embeddingsz$BertOnnxModel.fuse_rotary_embeddings~   s    '--aa
 % 
 
 "%S)A)A=%Q%Q!R!R#dj*++++%a(B BG++	AW0W0W
$++B////Q #dj*++++++++r4   c                 L    t          |           }|                                 d S r6   )r   r7   r;   s     r3   fuse_qordered_mamtulz"BertOnnxModel.fuse_qordered_mamtul   s     %d++r4   r[   input_indicescastedc                   
 g }|                                  }|                     |          }|D ]

fd|D             }|D ]}|                     |          r|s|                    |           /||v rU||         }	|	j        dk    rB|                     |	j        d                   "|r |                    |	j        d                    |S )z
        Get graph inputs that feed into node type (like EmbedLayerNormalization or Attention).
        Returns a list of the graph input names based on the filter whether it is casted or not.
        c                 Z    g | ]'}|t          j                  k     j        |         (S  )rg   input).0rm   r^   s     r3   
<listcomp>zABertOnnxModel.get_graph_inputs_from_node_type.<locals>.<listcomp>   s1    WWWQ1s4:CVCV4:a=CVCVCVr4   Castr   )output_name_to_nodeget_nodes_by_op_typefind_graph_inputappendr[   rw   )r1   r[   rr   rs   graph_inputsr{   nodesbert_inputs
bert_inputparentr^   s             @r3   get_graph_inputs_from_node_typez-BertOnnxModel.get_graph_inputs_from_node_type   s   
 "6688))'22 
	A 
	ADWWWW-WWWK) A A
((44 A! 8$++J777#6660<F~//D4I4I&,WX/4Z4Z4f! A(//Q@@@A r4   c                 n    |                      dg d|          }||                      ddg|          z  }|S )NEmbedLayerNormalization)r   ra      	Attention   )r   )r1   rs   inputss      r3   !get_graph_inputs_from_fused_nodesz/BertOnnxModel.get_graph_inputs_from_fused_nodes   sE    556OQZQZQZ\bcc$66{QCPPPr4   c                    |                                  }d}d}|j        D ]>}|                     |t          j                  \  }}|r|dz  }|t          |          z  }?t                              d| d| d           dS )zPChange data type of all graph inputs to int32 type, and add Cast node if needed.r   ra   z)Graph inputs are changed to int32. Added z Cast nodes, and removed z Cast nodes.N)rd   rw   change_graph_input_typer"   INT32rg   loggerinfo)r1   rd   add_cast_countremove_cast_countgraph_inputnew_noderemoved_nodess          r3   change_graph_inputs_to_int32z*BertOnnxModel.change_graph_inputs_to_int32   s    

 ; 	4 	4K&*&B&B;P[Pa&b&b#Hm $!#]!3!33 A  A  Aar  A  A  A	
 	
 	
 	
 	
r4   
batch_sizemax_seq_lenc                    |                      d          |                      d          z   }| j        j        j        D ]S}|j        |v rH|j        j        j        j        d         }||_	        |#|j        j        j        j        d         }||_	        T| j        j        j
        D ]%}|j        j        j        j        d         }||_	        &dS )zD
        Update input and output shape to use dynamic axes.
        T)rs   Fr   Nra   )r   r'   rd   rw   ri   typetensor_typeshapedim	dim_paramoutput)r1   dynamic_batch_dimdynamic_seq_lenbert_graph_inputsrw   	dim_protor   s          r3   use_dynamic_axeszBertOnnxModel.use_dynamic_axes   s     !BB C 
 
22%2@@A Z%+ 	: 	:Ez...!J28<Q?	&7	#". %
 6 < @ CI*9I'j&- 	4 	4F/59!<I"3I	4 	4r4   c                 .    |                                   d S r6   )adjust_reshape_and_expandr8   s    r3   
preprocesszBertOnnxModel.preprocess   s    &&(((r4   c                 l   g }|                                  D ]X}|j        dk    rI|                     |j        d                   }|N|j        dk    rC|                    |g           |                     |j        d         |j        d                    |                     |g dg d| 	                                          }||d         }|                     |j        d                   }|d         }|                     |j        d                   }|d         }	|O|Mt          |          d	k    r:t          |          dk    r'|d         |d         k    r|	j        d         |j        d<   Z|rA|                     |           t                              d
t          |                      d S d S )NReshapera   r   )Expandr   r   Slice)r   r   r   r      z"Removed Reshape and Expand count: )r   r[   get_constant_valuerw   sizeextendreplace_input_of_all_nodesr   match_parent_pathr{   rg   remove_nodesr   r   )
r1   nodes_to_remover^   reshape_shapereshape_pathexpand_nodeexpand_shape_valuereshape_before_expandshape_value
slice_nodes
             r3   r   z'BertOnnxModel.adjust_reshape_and_expand   s   JJLL !	= !	=D|y(( !% 7 7
1 F F ,1Cq1H1H#**D622233DKNDJqMRRR  $55<<< LL,,..	     +".r"2K)-)@)@ARSTAU)V)V&,8,<)"&"9"9:O:UVW:X"Y"YK!-b!1J*6'3 233q88,,11.q1[^CC(2(9!(<
1 	Uo...KKSS=Q=QSSTTTTT	U 	Ur4   c                 "   |                                  }g }|                                 D ]}dddd}|j        |v r||j                 }|                     |g d|dddddg|          }|e|\  }}}	}
}}|j        d         |                                 j        d         j        k    r)|j        d         |j        d<   |                                  }|j        dk    r|                     |g dg d|          }||d	         j        d         |                                 j        d         j        k    rt          j	        d|j        dt          |j                  dz
           |j        |j        d
z             }d|_        |j                            t          j        d| j                  g           |                     ||                     |          j                   |                    |           |                     |           d S )Nra   r   r   )r   	ReduceSumr   )rz   ConstantOfShapeConcat	UnsqueezeGatherShaper   )r   rz   r   r   )r   r   r   r   r   _remove_mask)r   outputsri   rZ   r(   )r{   r   r[   r   rw   rd   ri   r   r#   	make_noderg   r\   	attributer   make_attributer(   add_nodeget_graph_by_noder~   r   )r1   r{   r   r^   op_input_idrm   parent_nodescastconstantOfShapeconcat	unsqueezegatherr   attention_nodes                 r3   clean_graphzBertOnnxModel.clean_graph   s@   "6688JJLL <	5 <	5D 78aVWXXK|{**-#55   1aA&'     + %'!{1~);A)>)CCC38<?-a0.2.F.F.H.H+|{**
  $55EEE LL'	     +#B'-a0DJJLL4Fq4I4NNN)/)9'#':a#dj//A2E.E#F$(K!%^!;	* * * 1@-&0779N{\`\j9k9k8lmmmnd6L6L^6\6\6abbb'..t444/*****r4   c                 V    |                                   |                                  d S r6   )r   prune_graphr8   s    r3   postprocesszBertOnnxModel.postprocessB  s,    r4   NFoptionsadd_dynamic_axesc                 :   ||j         s|                                  | j                                         | j                                         ||j        r(|                                  |                                  ||j        r| 	                                 | 
                                 |                                  ||j        r.|                     |j                    |                                  ||j        r|                                  |l| j                            |j                   |j        rFt+          | j        t.                    s,t1          | | j        | j        | j        |j                  | _        ||j        r|                                  ||j        r|                                  |                                  ||j         r*|j        tB          j"        k    }| #                    |           | j        $                                 | %                                 ||j&        r,| '                    d           | '                    d           ||j(        r| )                                 ||j*        r| +                                 ||j,        r| -                                 | .                                 |r| /                                 t`          1                    d| 2                                            d S )NT)r?   Fzopset version: )3enable_shape_inferencedisable_shape_inferencer0   remove_identity_nodesremove_useless_cast_nodesenable_layer_normrO   rQ   enable_gelur=   r   rH   enable_skip_layer_normrT   rV   enable_rotary_embeddingsro   r-   set_mask_formatattention_mask_formatuse_multi_head_attention
isinstancer.   r	   r   r)   r(   enable_attentionr9   enable_qordered_matmulrq   rJ   enable_embed_layer_normr   MaskIndexEndrM   remove_useless_reshape_nodesr   enable_bias_gelur@   enable_bias_skip_layer_normrF   enable_gelu_approximationrB   enable_gemm_fast_gelurD   remove_unused_constantr   r   r   get_opset_version)r1   r   r   rL   s       r3   optimizezBertOnnxModel.optimizeF  s   )G((***
((*** 	
,,...O 9O  """++---O 3ONNO >O%%g&DEEE00222O @O'')))//0MNNN/ 
4CXZm8n8n (7$N'4) )% O 8O!!! O >O%%'''O ?O$:>Q>^^N!!.111 	
//111 O 8OD111E222O CO..0007#D##%%%7#@$$&&&##%%%  	$!!###@d&<&<&>&>@@AAAAAr4   c                     i }g d}g d}||z   D ])}|                      |          }t          |          ||<   *t                              d|            |S )z8
        Returns node count of fused operators.
        )r   r   MultiHeadAttentionGeluFastGeluBiasGeluGemmFastGeluLayerNormalizationSimplifiedLayerNormalizationSkipLayerNormalization SkipSimplifiedLayerNormalizationrY   )QOrderedAttentionQOrderedGeluQOrderedLayerNormalizationQOrderedMatMulzOptimized operators: )r|   rg   r   r   )r1   op_countopsq_opsopr   s         r3   get_fused_operator_statisticsz+BertOnnxModel.get_fused_operator_statistics  s     
 
 

 
 
 + 	& 	&B--b11Eu::HRLL6H66777r4   c                    |                                  dt          ffd} |d          } |d           |d          z    |d          z   } |d           |d	          z    |d
          z   } |d           |d          z   } |d           |d          z   }|dk    o|dk    o||k    o|d|z  k    p|d|z  k    }|dk    rt                              d           |dk    rt                              d           |dk    rt                              d           |dk    rt                              d           |dk    rt                              d           |S )zA
        Returns True when the model is fully optimized.
        Nop_namec                 2                         |           pdS )Nr   )get)r  fused_op_counts    r3   r  z2BertOnnxModel.is_fully_optimized.<locals>.op_count  s    !%%g..3!3r4   r   r   r   r   r   r   r   r   r   r   r   r   r   zLayer Normalization not fusedz$Simple Layer Normalization not fusedzGelu (or FastGelu) not fusedz!EmbedLayerNormalization not fusedz+Attention (or MultiHeadAttention) not fused)r  strr   debugwarning)	r1   r
  r  embed	attentiongelu
layer_normsimple_layer_norm
is_perfects	    `       r3   is_fully_optimizedz BertOnnxModel.is_fully_optimized  s    !!??AAN	4c 	4 	4 	4 	4 	4 	4 233H[))HH5I,J,JJXXViMjMjj	x((:"6"66*9M9MMX233hh?W6X6XX
$H%CDDxxPrGsGss QY XQXd"X I-V3DI3U	 	 ??LL8999!!LL?@@@199LL7888A::LL<===>>NNHIIIr4   use_symbolic_shape_inferc                 N    t          |           }|                    |           d S r6   )r   convert)r1   r  packing_modes      r3   convert_to_packing_modez%BertOnnxModel.convert_to_packing_mode  s*    "4((566666r4   )r   r   )T)r   r   )NFr6   )F)'__name__
__module____qualname__r!   intr,   r9   r=   r@   rB   rD   rF   rH   rJ   rM   rO   rQ   rT   rV   ro   rq   r  r   boolr   r   r   r   r   r   r   r   r   r   r   r  r  r  __classcell__)r2   s   @r3   r&   r&   %   so       ' 'j 'S '3 ' ' ' ' ' '*/ / /
	 	 	              	 	 	         (  s 4PS9 ^b    ,    

 
 
4 4 4 4(  'U 'U 'UR@+ @+ @+D  OB OB 7 OBRV OB OB OB OBb  @& & & &P7 7 7 7 7 7 7 7 7 7r4   r&   N)?loggingr   typingr   r   r  r   fusion_attentionr   r   fusion_bart_attentionr	   fusion_biasgelur
   fusion_embedlayerr   fusion_fastgelur   fusion_gelur   fusion_gelu_approximationr   fusion_gemmfastgelur   fusion_layernormr   r   fusion_optionsr   r   fusion_qordered_attentionr   fusion_qordered_gelur   fusion_qordered_layernormr   fusion_qordered_matmulr   fusion_quickgelur   fusion_reshaper   fusion_rotary_attentionr   fusion_shaper   fusion_simplified_layernormr   r   fusion_skiplayernormr   r   fusion_utilsr    onnxr!   r"   r#   
onnx_modelr$   r  r   r&   rv   r4   r3   <module>r9     sX         ! ! ! ! ! ! ! ! / / / / / / ; ; ; ; ; ; ; ; 5 5 5 5 5 5 * * * * * * ; ; ; ; ; ; * * * * * * " " " " " " = = = = = = 2 2 2 2 2 2 Q Q Q Q Q Q Q Q = = = = = = = = = = = = = = 3 3 3 3 3 3 F F F F F F 7 7 7 7 7 7 , , , , , , ( ( ( ( ( ( : : : : : : $ $ $ $ $ $ r r r r r r r r _ _ _ _ _ _ _ _ $ $ $ $ $ $ 0 0 0 0 0 0 0 0 0 0            	8		|7 |7 |7 |7 |7I |7 |7 |7 |7 |7r4   