
     Ng                          d dl Z d dlmZ d dlmZmZ d dlmZ d dlm	Z	m
Z
 d dlmZ d dlmZ  e j        e          Z G d d	e          Z G d
 de          ZdS )    N)Union)AttentionMaskFusionAttention)NumpyHelper)	NodeProtohelper)	OnnxModel)BertOnnxModelc                   v     e Zd ZdZdedededef fdZdede	d	e	deded
ededede
e	df         fdZd Z xZS )FusionTnlrAttentionz
    Fuse TNLR Attention subgraph into one Attention node.
    TNLR Attention has extra addition after qk nodes and adopts [S, B, NH] as I/O shape.
    modelhidden_size	num_headsattention_maskc                 P    t                                          ||||           d S N)super__init__)selfr   r   r   r   	__class__s        d/var/www/html/ai-engine/env/lib/python3.11/site-packages/onnxruntime/transformers/onnx_model_tnlr.pyr   zFusionTnlrAttention.__init__   s)     	YGGGGG    
mask_indexmatmuladdinputoutput
add_qk_strreturnNc	                    |dk    sJ |dk    r+||z  dk    r"t                               d| d|            d S | j                            |j        d                   }	| j                            |j        d                   p$| j                            |j        d                   }
|	|
d S t          j        |	          }t          j        |
          }| j                            d          }|	j        }t          j
        |          }t          j        |dz   ||d|z  g|                    |                                          d	          }	| j                            |	| j                   t          j        |d
z   |d|z  g|                    |                                          d	          }
| j                            |
| j                   ||dz   |d
z   g}||                    |           n|                    d           |*|                    d           |                    |           t          j        d||g|          }d|_        |j                            t          j        d|          g           |S )Nr   zinput hidden size z# is not a multiple of num of heads    	Attention_qkv_weight   T)name	data_typedimsvalsraw	_qkv_bias )inputsoutputsr%   zcom.microsoftr   )loggerdebugr   get_initializerr   r   to_arraycreate_node_namer&   r   tensor_dtype_to_np_dtypemake_tensorastypetobytesadd_initializerthis_graph_nameappend	make_nodedomain	attributeextendmake_attribute)r   r   r   r   r   r   r   r   r   weightbias
qkv_weightqkv_biasattention_node_nametensor_dtypenp_typeattention_inputsattention_nodes                     r   create_attention_nodez)FusionTnlrAttention.create_attention_node    s    1}}}}??i 7A==LLikii^giijjj4++FLO<<z))#)A,77c4:;U;UVYV_`aVb;c;c>T\4 )&11
'--"j99+FF'1,??#$}4"q;/""7++3355
 
 
 	
""64+?@@@!${2"k/"))1133
 
 
 	
""4)=>>> -/+-

 !##J////##B'''!##B'''##J///)#H$	
 
 
 !0 '')>{I)V)V(WXXXr   c                    |}|j         dk    rd S | j                            |g dg d          }|
|\  }}}}}	}
nd S g }t          |j                  D ]7\  }}||vr
||d         j        d         k    r"|                    |           8t          |          dk    rd S |d         }| j                            |
g dg d          }|d S |\  }}}}}| j                            |dgdg          }|d         }| j                            |
g d	g d
          }|d S |\  }}}| j                            |g dg d          }|d S |d         }|d         }| j                            |g dg d          }|d S |d         }|d         }| j                            |ddgddg          }|d S |j        d         |k    rd }|}|                     |||| j	        | j
        ||j        d         |d         j        d                   }|d S | j                            |           | j        | j        |j        <   t          j        dd|j        z   g|j        d         gd|j        z   g d          }| j                            || j                   |j        d         |j        d<   d|j        z   |j        d<   | j                            ||	|
g           | j                            |           | j                            |           | j                            |           | j                            |           d| _        d S d S )NSkipLayerNormalization)WhereAddMatMulReshape	TransposerM   )r!   r!   r!   r   r   r   r   r!   )rO   rN   SlicerL   rM   )r!   r   r   r   r!   rO   )SoftmaxrL   rM   )r   r   r   )MulrO   rN   rP   rL   rM   )r   r   r   r   r   r!   rN   rK   back_transpose_in_back_transpose_)r!   r      )permT)op_typer   match_parent_path	enumerater   r   r9   lenrH   r   r   nodes_to_addr8   node_name_to_graph_namer%   r   r:   add_nodenodes_to_remover=   prune_graph)r   normalize_nodeinput_name_to_nodesoutput_name_to_node
start_node	qkv_nodes_matmul_belowreshape_qkvtranspose_qkv
matmul_qkvother_inputs_ir   
root_inputv_nodesr   r   upper_nodes	transposeqk_nodesadd_qk	matmul_qkq_nodesk_nodesrelative_position_bias_nodesr   attention_last_nodenew_nodeback_transposes                                 r   fusezFusionTnlrAttention.fuseh   s    $
!%===F J00HHH
 
	
  KTHQ<mZZF":#344 	' 	'IB///	!+A...&&&&|!!F!!_
*..>>>OO
 

 ?F!(Aq#vj226K=1#NNN	://
<X<X<XZcZcZcddF!)FI*..EEE
 

 ?Fbk*..>>>OO
 

 ?Fbk'+z'C'CFYX_L`cdfgbh'i'i$'/F<?j((J"- 11 #*1-,Q/5a8	 	H $$X...:>:ND(7 $-%56#$!HM1YY  N J0DEEE ) 2HN1!5!EHOA '')<mZ(XYYY ''111 ''000 ''000 ''000  $DS )(r   )__name__
__module____qualname____doc__r	   intr   r   strr   r   rH   r{   __classcell__r   s   @r   r   r      s        
HH H 	H
 &H H H H H HFF F 	F
 F F F F F 
y$	F F F FPq$ q$ q$ q$ q$ q$ q$r   r   c                   $     e Zd Z fdZd Z xZS )TnlrOnnxModelc                     t                                          |||           t          |           | _        t	          | | j        | j        | j                  | _        d S r   )r   r   r   r   r   r   r   attention_fusion)r   r   r   r   r   s       r   r   zTnlrOnnxModel.__init__   sT    	;777+D11 3D$:JDN\`\o p pr   c                 8    | j                                          d S r   )r   apply)r   s    r   fuse_attentionzTnlrOnnxModel.fuse_attention   s    ##%%%%%r   )r|   r}   r~   r   r   r   r   s   @r   r   r      sL        q q q q q
& & & & & & &r   r   )loggingtypingr   fusion_attentionr   r   fusion_utilsr   onnxr   r   
onnx_modelr	   onnx_model_bertr
   	getLoggerr|   r.   r   r    r   r   <module>r      s  
        ; ; ; ; ; ; ; ; $ $ $ $ $ $ " " " " " " " "             ) ) ) ) ) )		8	$	$H$ H$ H$ H$ H$/ H$ H$ H$V& & & & &M & & & & &r   