
     NgU/                         d dl mZ d dlmZmZ d dlZd dlmZ d dl	m
Z
mZmZmZ d dlmZ  ee          Z G d de          ZdS )	    )	getLogger)TupleUnionN)Fusion)	NodeProtoTensorProtohelpernumpy_helper)	OnnxModelc                        e Zd ZdZdededef fdZdededeeef         fd	Z	d
edededededededede
de
deedf         fdZd Z xZS )FusionAttentionVaezI
    Fuse Attention subgraph of Vae Decoder into one Attention node.
    modelhidden_size	num_headsc                     t                                          |ddg           || _        || _        d| _        d| _        d S )N	AttentionSoftmaxT)super__init__r   r   num_heads_warninghidden_size_warning)selfr   r   r   	__class__s       i/var/www/html/ai-engine/env/lib/python3.11/site-packages/onnxruntime/transformers/fusion_attention_vae.pyr   zFusionAttentionVae.__init__   sH    i[999&" "&#'       	reshape_qadd_qreturnc                 \   | j                             |d          }|t          |j                  dk    r| j        | j        fS | j                             |j        d                   }|%t          |t          j	                  r|j
        dk    s| j        | j        fS t          |          }|dk    r| j        | j        fS | j                             |          \  }}|%t          |t          j	                  r|j        dk    r| j        | j        fS |j        d         }| j        dk    r:|| j        k    r/| j        r(t                               d|| j                   d| _        | j        dk    r:|| j        k    r/| j        r(t                               d|| j                   d| _        ||fS )	zDetect num_heads and hidden_size from a reshape node.

        Args:
            reshape_q (NodeProto): reshape node for Q
            add_q (NodeProto): add node for Q

        Returns:
            Tuple[int, int]: num_heads and hidden_size
           N      r   z?Detected number of attention heads is %d. Ignore --num_heads %dFz3Detected hidden size is %d. Ignore --hidden_size %d)r   
get_parentleninputr   r   get_constant_value
isinstancenpndarraysizeintget_constant_inputndimshaper   loggerwarningr   )	r   r   r   concatvaluer   _biasr   s	            r   get_num_heads_and_hidden_sizez0FusionAttentionVae.get_num_heads_and_hidden_size   s    &&y!44>S..!33>4#333
--fl1o>>!j
&C&C!
VW>4#333JJ	>>>4#333*//664L*T2:">">L49PQ>>>4#333jm>A)t~"="=% /UW`bfbp   */&aK43C$C$C' 1TVacgcsttt+0(+%%r   q_matmulq_addk_matmulk_addv_matmulv_add
input_nameoutput_nameNc                 	   |j         d         |	k    s"|j         d         |	k    s|j         d         |	k    r@t                              d|j         d         |j         d         |j         d                    dS |dk    r'||z  dk    rt                              d||           dS | j                            |j         d                   }| j                            |j         d                   }| j                            |j         d                   }|r|r|sdS | j                            |j         d                   p$| j                            |j         d                   }| j                            |j         d                   p$| j                            |j         d                   }| j                            |j         d                   p$| j                            |j         d                   }t          j        |          }t          j        |          }t          j        |          }t          j        |j	                  }t          j        |j	                  }t          j        |j	                  }|j
        dk    rt                              d           dS t          j        |          }t          j        |          }t          j        |          }|j	        |j	        k    s|j	        |j	        k    rdS |j	        d         }|j	        d         }|j	        d         }||k    r||k    sJ |dk    r||k    rt          d| d	| d
          t          j        |j	        dd                   }t          j        |||fd          }dt          |          z  }| j                            d          } ||cxk    r|k    sn J d}!t          j        |||fd          }"d|z  }!|                     | dz   t           j        ||g|           t          j        d|gt          j                  }"d|z  }!|                     | dz   t           j        |!g|"           |	| dz   | dz   g}#t)          j        d|#|
g|           }$d|$_        |$j                            t)          j        d|          g           |                     d           |$S )at  Create an Attention node.

        Args:
            q_matmul (NodeProto): MatMul node in fully connection for Q
            q_add (NodeProto): Add bias node in fully connection for Q
            k_matmul (NodeProto): MatMul node in fully connection for K
            k_add (NodeProto): Add bias node in fully connection for K
            v_matmul (NodeProto): MatMul node in fully connection for V
            v_add (NodeProto): Add bias node in fully connection for V
            num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
            hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
            input_name (str): input name
            output_name (str): output name

        Returns:
            Union[NodeProto, None]: the node created or None if failed.
        r   zRFor self attention, input hidden state for q and k/v shall be same. Got %s, %s, %sNz9input hidden size %d is not a multiple of num of heads %dr    
   zBweights are in fp16. Please run fp16 conversion after optimizationzInput hidden size (z,) is not same as weight dimension of q,k,v (z:). Please provide a correct input hidden size or pass in 0)axis   r   _qkv_weight)name	data_typedimsvals)dtype	_qkv_bias)inputsoutputsrC   zcom.microsoftr   zAttention (self attention))r%   r/   debugr   get_initializerr
   to_arrayr(   prodr.   rD   
ValueErrorstackr+   create_node_nameadd_initializerr   FLOATzerosfloat32r	   	make_nodedomain	attributeextendmake_attributeincrease_counter)%r   r6   r7   r8   r9   r:   r;   r   r   r<   r=   q_weight_tensork_weight_tensorv_weight_tensorq_bias_tensork_bias_tensorv_bias_tensorq_biask_biasv_biasq_bias_shapek_bias_shapev_bias_shapeq_weightk_weightv_weight
qw_in_size
kw_in_size
vw_in_sizeqw_out_size
qkv_weightqkv_weight_dimattention_node_nameqkv_bias_dimqkv_biasattention_inputsattention_nodes%                                        r   create_attention_nodez(FusionAttentionVae.create_attention_nodeG   s   < >!
**hnQ.?:.M.MQYQ_`aQbfpQpQpLLdq!q!q!	   4??i 7A==LLTVaclmmm4*44X^A5FGG*44X^A5FGG*44X^A5FGG 	O 	 	4
225;q>BBpdjF`F`afalmnaoFpFp
225;q>BBpdjF`F`afalmnaoFpFp
225;q>BBpdjF`F`afalmnaoFpFp&}55&}55&}55wv|,,wv|,,wv|,, $**LL]^^^4(99(99(99 >X^++x~/O/O4^A&
^A&
^A&
Z''J*,D,D,D,D??{j88Jk J J_i J J J   ghnQRR011Xx8<1EEE
S---"j99+FF|;;;;|;;;;;;8VVV41===<'$}4!'n-	 	 	
 	
 	
 8Q,BJ???;${2!'	 	 	
 	
 	
 -/+-
  )# M$	
 
 
 !0 '')>{I)V)V(WXXX:;;;r   c                    | j                             |d|d          }|d S | j                             |d|d          }|d S | j                             |d|d          }|d S | j                             |d|d          }|d S | j                             |d|d          }|d S | j                             |d|d          }	|	d S | j                             |	d|d          }
|
d S | j                             |g dg d          }|t                              d	           d S |\  }}}}}| j                             |g d
g d          }||\  }}}}nt                              d           d S | j                             |g dg d          }|t                              d           d S |\  }}}}}| j                             |g dg d          }|t                              d           d S |\  }}}}}}|}|                     ||          \  }}|dk    rt                              d           d S |                     |||||||||j        d         |j        d         
  
        }|d S | j	        
                    |           | j        | j        |j        <   | j                            ||g           d| _        d S )NMatMulF)	recursiveReshape	TransposeAdd)rz   r{   rz   r|   rx   )r    r   r   r   Nz&fuse_attention: failed to match v path)r   r|   Mulrx   )r   r   r   r   z'fuse_attention: failed to match qk path)r   r   r   r   Nz&fuse_attention: failed to match q path)r{   rz   r{   rz   r|   rx   )r    r   r   r   r   Nz&fuse_attention: failed to match k pathr   z*fuse_attention: failed to detect num_headsT)r   find_first_child_by_typematch_parent_pathr/   rK   r5   rv   r%   outputnodes_to_addappendthis_graph_namenode_name_to_graph_namerC   nodes_to_removerY   prune_graph) r   softmax_nodeinput_name_to_nodesoutput_name_to_node
matmul_qkvreshape_qkvtranspose_qkvreshape_out
matmul_outadd_outtranspose_outv_nodesr3   add_vmatmul_vqk_nodes_softmax_qk	_add_zero_mul_qk	matmul_qkq_nodes_transpose_qr   r   matmul_qk_nodesadd_kmatmul_kattention_last_nodeq_num_headsq_hidden_sizenew_nodes                                    r   fusezFusionAttentionVae.fuse   s   Z88xQdpu8vv
Fj99*iQdpu9vvF
;;&9U < 
 
  Fj999&9U : 
 
 FZ88hPcot8uu
F*55j%I\hm5nn?F
;;G[Reqv;ww F*..LLLN`N`N`
 
 ?LLABBBF%,"Aq%://
<_<_<_amamamnn;C8[)WiiLLBCCCF*..KKKM_M_M_
 
 ?LLABBBF8?5L)UH*..XXXZoZoZo
 
 ?LLABBBF(/%Aq!UH)%)%G%G	SX%Y%Y"]!LLEFFFF --N1&q)
 
 F  ***6:6J$X]3##%8-$HIII  r   )__name__
__module____qualname____doc__r   r+   r   r   r   r5   strr   rv   r   __classcell__)r   s   @r   r   r      sB        (i (c (c ( ( ( ( ( ('&y '& '&W\]`be]eWf '& '& '& '&RHH H 	H
 H H H H H H H 
y$	H H H HT\  \  \  \  \  \  \ r   r   )loggingr   typingr   r   numpyr(   fusion_baser   onnxr   r   r	   r
   
onnx_modelr   r   r/   r    r   r   <module>r      s   
                         = = = = = = = = = = = =            	8		]  ]  ]  ]  ]  ]  ]  ]  ]  ] r   