
     NgL0                     n    d dl mZ d dlmZ  G d d          Z G d de          Z G d d          Zd	S )
    )ArgumentParser)Enumc                       e Zd ZdZdZdZdZdS )AttentionMaskFormatr            N)__name__
__module____qualname__MaskIndexEndMaskIndexEndAndStartAttentionMaskNoMask     c/var/www/html/ai-engine/env/lib/python3.11/site-packages/onnxruntime/transformers/fusion_options.pyr   r   	   s)        L  M FFFr   r   c                   0    e Zd ZdZdZdZdZd Zd Zd Z	dS )	AttentionOpType	AttentionMultiHeadAttentionGroupQueryAttentionPagedAttentionc                     | j         S Nvalueselfs    r   __str__zAttentionOpType.__str__   s
    zr   c                 *    t          | j                  S r   )hashr   r   s    r   __hash__zAttentionOpType.__hash__!   s    DJr   c                 "    |j         | j         k    S r   r   )r   others     r   __eq__zAttentionOpType.__eq__$   s    {dj((r   N)
r
   r   r   r   r   r   r   r    r#   r&   r   r   r   r   r      sW        I-/%N       ) ) ) ) )r   r   c                   d    e Zd ZdZd ZddZd ZdefdZe	d             Z
e	d	efd
            ZdS )FusionOptionsz'Options of fusion in graph optimizationc                    d| _         d| _        d| _        d| _        d| _        d| _        d| _        d| _        d| _        d| _	        d| _
        d| _        d| _        d| _        d| _        |dk    rd| _        t          j        | _        |dk    rt          j        | _        n|dk    rt          j        | _        d | _        |dv r3d| _        d| _        d| _        d| _        d| _        d| _        d| _        d S d S )NTFclipbertvitunetvaer*   )enable_geluenable_layer_normenable_attentionenable_rotary_embeddingsuse_multi_head_attention!disable_multi_head_attention_biasenable_skip_layer_normenable_embed_layer_normenable_bias_skip_layer_normenable_bias_geluenable_gelu_approximationenable_qordered_matmulenable_shape_inferenceenable_gemm_fast_gelugroup_norm_channels_lastr   r   attention_mask_formatr   r   attention_op_typeenable_nhwc_convenable_group_normenable_skip_group_normenable_bias_splitgeluenable_packed_qkvenable_packed_kvenable_bias_add)r   
model_types     r   __init__zFusionOptions.__init__+   s'   !% $(,% ).%16.&*#'+$+/( $).&&*#&*#%*"(,%+0D( &9%F")<)ID&&5  )<)CD&!% 000$(D!%)D"*.D')-D&%)D"$(D!#'D    10r   Tc                 R    |rt           j        | _        d S t           j        | _        d S r   )r   r   r?   r   )r   use_raw_masks     r   use_raw_attention_maskz$FusionOptions.use_raw_attention_maskZ   s-     	J)<)JD&&&)<)ID&&&r   c                 (    t           j        | _        d S r   )r   r   r?   r   s    r   disable_attention_maskz$FusionOptions.disable_attention_mask`   s    %8%?"""r   attn_op_typec                     || _         d S r   )r@   )r   rO   s     r   set_attention_op_typez#FusionOptions.set_attention_op_typec   s    !-r   c                    t          | j                  }| j        rd|_        | j        rd|_        | j        rd|_        | j        rd|_	        | j
        rd|_
        | j        rd|_        | j        rd|_        | j        rd|_        | j        rd|_        | j        rd|_        | j        rd|_        | j        rd|_        | j        r|                    d           | j        r|                    d           | j        r|                                 | j        dv rp| j        rd|_        | j        rd|_        | j        rd|_         | j!        rd|_"        | j#        rd|_$        | j%        rd|_&        | j'        rd|_(        | j)        rd|_*        |S )NFTr-   )+r(   rH   disable_gelur0   disable_layer_normr1   disable_rotary_embeddingsr3   disable_attentionr2   r4   disable_skip_layer_normr6   disable_embed_layer_normr7   disable_bias_skip_layer_normr8   disable_bias_gelur9   r:   disable_shape_inferencer<   r=   use_mask_indexrL   no_attention_maskrN   use_group_norm_channels_firstr>   disable_nhwc_convrA   disable_group_normrB   disable_skip_group_normrC   disable_bias_splitgelurD   disable_packed_qkvrE   disable_packed_kvrF   disable_bias_addrG   )argsoptionss     r   parsezFusionOptions.parsef   s   00 	("'G" 	.(-G%) 	5/4G,! 	-',G$( 	4/3G,' 	3-2G*( 	4.3G+, 	827G/! 	-',G$) 	504G-' 	3-2G*% 	1,0G) 	2**5111& 	1**4000! 	-**,,,?5551 9380% 1+0(& 2,1)+ 716.* 605-& 2,1)% 1+0($ 0*/'r   parserc                 L   |                      dddd           |                     d           |                      dddd           |                     d	           |                      d
ddd           |                     d           |                      dddd           |                     d           |                      dddd           |                     d           |                      dddd           |                     d           |                      dddd           |                     d           |                      dddd           |                     d           |                      dddd           |                     d           |                      dddd            |                     d!           |                      d"ddd#           |                     d$           |                      d%ddd&           |                     d'           |                      d(ddd)           |                     d*           |                      d+ddd,           |                     d-           |                      d.ddd/           |                     d0           |                      d1ddd2           |                     d3           |                      d4ddd5           |                     d6           |                      d7ddd8           |                     d9           |                      d:ddd;           |                     d<           |                      d=ddd>           |                     d?           |                      d@dddA           |                     dB           |                      dCdddD           |                     dE           |                      dFdddG           d S )HNz--disable_attentionF
store_truezdisable Attention fusion)requiredactionhelp)rV   z--disable_skip_layer_normz%disable SkipLayerNormalization fusion)rW   z--disable_embed_layer_normz&disable EmbedLayerNormalization fusion)rX   z--disable_bias_skip_layer_normz2disable Add Bias and SkipLayerNormalization fusion)rY   z--disable_bias_geluz)disable Add Bias and Gelu/FastGelu fusion)rZ   z--disable_layer_normz!disable LayerNormalization fusion)rT   z--disable_geluzdisable Gelu fusion)rS   z--enable_gelu_approximationz+enable Gelu/BiasGelu to FastGelu conversion)r:   z--disable_shape_inferencez disable symbolic shape inference)r[   z--enable_gemm_fast_geluzenable GemmfastGelu fusion)r=   z--use_mask_indexzWuse mask index to activate fused attention to speed up. It requires right-side padding!)r\   z--use_raw_attention_maskzuse raw attention mask. Use this option if your input is not right-side padding. This might deactivate fused attention and get worse performance.)rL   z--no_attention_maskz1no attention mask. Only works for model_type=bert)r]   z--use_multi_head_attentionzUse MultiHeadAttention instead of Attention operator for testing purpose. Note that MultiHeadAttention might be slower than Attention when qkv are not packed. )r4   z--disable_group_normz9not fuse GroupNorm. Only works for model_type=unet or vae)r`   z--disable_skip_group_normzPnot fuse Add + GroupNorm to SkipGroupNorm. Only works for model_type=unet or vae)ra   z--disable_packed_kvz[not use packed kv for cross attention in MultiHeadAttention. Only works for model_type=unet)rd   z--disable_packed_qkvz[not use packed qkv for self attention in MultiHeadAttention. Only works for model_type=unet)rc   z--disable_bias_addz0not fuse BiasAdd. Only works for model_type=unet)re   z--disable_bias_splitgeluz6not fuse BiasSplitGelu. Only works for model_type=unet)rb   z--disable_nhwc_convz:Do not use NhwcConv. Only works for model_type=unet or vae)r_   z--use_group_norm_channels_firstznUse channels_first (NCHW) instead of channels_last (NHWC) for GroupNorm. Only works for model_type=unet or vae)r^   z--disable_rotary_embeddingsz5Do not fuse rotary embeddings into RotaryEmbedding op)add_argumentset_defaults)ri   s    r   add_argumentszFusionOptions.add_arguments   s   !+	 	 	
 	
 	
 	e444'8	 	 	
 	
 	
 	E:::(9	 	 	
 	
 	
 	U;;;,E	 	 	
 	
 	
 	???!<	 	 	
 	
 	
 	e444"4	 	 	
 	
 	
 	u555&	 	 	
 	
 	
 	///)>	 	 	
 	
 	
 	e<<<'3	 	 	
 	
 	
 	E:::%-	 	 	
 	
 	
 	%888j	 	 	
 	
 	
 	5111& e	 	 	
 	
 	
 	5999!D	 	 	
 	
 	
 	e444(d	 	 	
 	
 	
 	U;;;"L	 	 	
 	
 	
 	u555'c	 	 	
 	
 	
 	E:::!n	 	 	
 	
 	
 	e444"n	 	 	
 	
 	
 	u555 C	 	 	
 	
 	
 	U333&I	 	 	
 	
 	
 	5999!M	 	 	
 	
 	
 	e444- B	 	 	
 	
 	
 	%@@@)H	 	 	
 	
 	
 	
 	
r   N)T)r
   r   r   __doc__rI   rL   rN   r   rQ   staticmethodrh   r   rq   r   r   r   r(   r(   (   s        11-( -( -(^J J J J@ @ @./ . . . . 3 3 \3j w
n w
 w
 w
 \w
 w
 w
r   r(   N)argparser   enumr   r   r   r(   r   r   r   <module>rv      s   
 $ # # # # #             ) ) ) ) )d ) ) )"l
 l
 l
 l
 l
 l
 l
 l
 l
 l
r   