
     NgLQ                         d dl mZ d dlmZmZ d dlZd dlmZ d dl	m
Z
 d dlmZmZmZ d dlmZ  ee          Z G d d	e          ZdS )
    )	getLogger)TupleUnionN)Fusion)NumpyHelper)	NodeProtohelpernumpy_helper)	OnnxModelc                        e Zd ZdZdededef fdZdedefdZd	edefd
Z	d Z
	 d dedededeeef         fdZdedededededededededeedf         fdZd Zd ZdefdZd!dZdededededef
dZ xZS )"FusionMultiHeadAttentionSam2zI
    Fuse MultiHeadAttention subgraph of Segment Anything v2 (SAM2).
    modelhidden_size	num_headsc                     t                                          |ddg           || _        || _        d| _        d| _        d S )NMultiHeadAttentionLayerNormalizationT)super__init__r   r   num_heads_warninghidden_size_warning)selfr   r   r   	__class__s       j/var/www/html/ai-engine/env/lib/python3.11/site-packages/onnxruntime/transformers/fusion_attention_sam2.pyr   z%FusionMultiHeadAttentionSam2.__init__   sM     	 47K6LMMM&" "&#'       	reshape_qreturnc                 "   d}| j                             |j        d                   }|Ht          |t          j                  r.t          |j                  dgk    rt          |d                   }t          |t                    r|dk    r|S dS )Detect num_heads from a reshape node.

        Args:
            reshape_q (NodeProto): reshape node for Q
        Returns:
            int: num_heads, or 0 if not found
        r      N      )	r   get_constant_valueinput
isinstancenpndarraylistshapeint)r   r   r   shape_values       r   get_decoder_num_headsz2FusionMultiHeadAttentionSam2.get_decoder_num_heads$   s     	 j33IOA4FGG"+rz22 0tK<M7N7NSTRU7U7UA//	i%% 	)a--qr   
reshape_inc                 n   d}| j                             |j        d                   }|It          |t          j                  r.t          |j                  dgk    rt          |d                   }n| j         	                    |dd          }|t          |j                  dk    ro| j                             |j        d                   }|Ht          |t          j                  r.t          |j                  dgk    rt          |d                   }t          |t                    r|dk    r|S dS )r   r   r    N      Concat)r   r#   r$   r%   r&   r'   r(   r)   r*   match_parentlen)r   r-   r   r+   concat_shapes        r   get_encoder_num_headsz2FusionMultiHeadAttentionSam2.get_encoder_num_heads9   s%    	j33J4DQ4GHH"+rz22 0tK<M7N7NSTRU7U7UA//	:22:xKKL'C0B,C,Cq,H,H"j;;L<Nq<QRR*!+rz:: 8tKDU?V?V[\Z]?]?]$'A$7$7	i%% 	)a--qr   c                     | j                             |j        d                   }|rt          j        |          j        d         S dS )zDetect hidden_size from LayerNormalization node.
        Args:
            layernorm_node (NodeProto): LayerNormalization node before Q, K and V
        Returns:
            int: hidden_size, or 0 if not found
        r"   r   )r   get_initializerr$   r   to_arrayr)   )r   layernorm_nodelayernorm_biass      r   get_hidden_sizez,FusionMultiHeadAttentionSam2.get_hidden_sizeU   sH     33N4H4KLL 	A'77=a@@qr   Fr9   
is_encoderc                    |r|                      |          }n|                     |          }|dk    r| j        }| j        dk    r?|| j        k    r4| j        r-t                              d| j         d| d           d| _        |                     |          }|dk    r| j        }| j        dk    r?|| j        k    r4| j        r-t                              d| j         d| d           d| _        ||fS )a  Detect num_heads and hidden_size.

        Args:
            reshape_q (NodeProto): reshape node for Q
            layernorm_node (NodeProto): LayerNormalization node before Q, K, V
        Returns:
            Tuple[int, int]: num_heads and hidden_size
        r   z--num_heads is z. Detected value is z. Using detected value.Fz--hidden_size is )	r5   r,   r   r   loggerwarningr;   r   r   )r   r   r9   r<   r   r   s         r   get_num_heads_and_hidden_sizez:FusionMultiHeadAttentionSam2.get_num_heads_and_hidden_sizeb   s)     	>229==II229==I>>I>A)t~"="=% /wwwU^wwwxxx).&**>::!*KaK43C$C$C' 1r(8rrkrrr   ,1(+%%r   q_matmulq_addk_matmulk_addv_matmulv_addoutputNc
           
         |dk    r+||z  dk    r"t                               d| d|            dS | j                            |j        d                   }
| j                            |j        d                   }| j                            |j        d                   }|
r|r|sdS t          j        |
          }t          j        |          }t          j        |          }t                               d|j         d|j         d|j         d	|            | j                            d
          }|j	        d         |j	        d         |j	        d         g}t          j        d
||	g|          }d|_        |j                            t          j        d|          g           d                    d          }|                     |           |S )aF  Create an Attention node.

        Args:
            q_matmul (NodeProto): MatMul node in fully connection for Q
            q_add (NodeProto): Add bias node in fully connection for Q
            k_matmul (NodeProto): MatMul node in fully connection for K
            k_add (NodeProto): Add bias node in fully connection for K
            v_matmul (NodeProto): MatMul node in fully connection for V
            v_add (NodeProto): Add bias node in fully connection for V
            num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
            hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
            output (str): output name

        Returns:
            Union[NodeProto, None]: the node created or None if failed.
        r   zinput hidden size z# is not a multiple of num of heads Nr    zqw=z kw=z vw=z hidden_size=r   inputsoutputsnamecom.microsoftr   MultiHeadAttention ({})zcross attention)r>   debugr   r7   r$   r   r8   r)   create_node_namerG   r	   	make_nodedomain	attributeextendmake_attributeformatincrease_counter)r   rA   rB   rC   rD   rE   rF   r   r   rG   q_weightk_weightv_weightqwkwvwattention_node_nameattention_inputsattention_nodecounter_names                       r   create_attention_nodez2FusionMultiHeadAttentionSam2.create_attention_node   s   8 ??i 7A==LLikii^giijjj4:--hnQ.?@@:--hnQ.?@@:--hnQ.?@@ 	X 	( 	4!(++!(++!(++[28[[[[rx[[k[[\\\"j99:NOO LOLOLO
  ) #H$	
 
 
 !0 '')>{I)V)V(WXXX0778IJJl+++r   c                    |                      |||          rd S |                     |          }|J|j        d         |vrd S ||j        d                  }|j        dk    rd S |                     |          }|d S |\	  }}}}	}
}}}}|}|                     ||d          \  }}|dk    rt
                              d           d S |                     |	|
|||||||j        d         	  	        }|d S | j	        
                    |           | j        | j        |j        <   | j                            ||g           d| _        d S )Nr   AddF*fuse_attention: failed to detect num_heads)rG   T)fuse_sam_encoder_patternmatch_attention_subgraphr$   op_typer@   r>   rO   rb   rG   nodes_to_addappendthis_graph_namenode_name_to_graph_namerL   nodes_to_removerT   prune_graph)r   normalize_nodeinput_name_to_nodesoutput_name_to_node	match_qkvskip_addreshape_qkvtranspose_qkvr   matmul_qadd_qmatmul_kadd_kmatmul_vadd_vattention_last_nodeq_num_headsq_hidden_sizenew_nodes                      r   fusez!FusionMultiHeadAttentionSam2.fuse   s   ((9LNabb 	F11.AA	#A&.AAA*>+?+BCH5((55h??I cl`]Ix%QY[`)%)%G%G	Sach%i%i"]!LLEFFFF --&-a0 . 

 

 F  ***6:6J$X]3##%8-$HIII  r   c           	      .   | j                             |g dg d          }|dS |\  }}}}}| j                             |g dg d          }|t                              d           dS |\  }}}}	| j                             |ddgd	d	g          }
|
|
\  }}nt                              d
           dS | j                             |g dg d          }|t                              d           dS |\  }}}}}| j                             |g dg d          }|t                              d           dS |\  }}}}}| j                             |g dg d          }||d         |k    rt                              d           dS ||||||||	|f	S )z.Match Q, K and V paths exported by PyTorch 2.*rd   MatMulReshape	Transposer   )NNNr   r   N)r   r   rd   r   )r    r   r   Nz&fuse_attention: failed to match v pathSoftmaxr   r   z'fuse_attention: failed to match qk path)Mulr   r   rd   r   )r   Nr   r   Nz&fuse_attention: failed to match q path)r    Nr   r   Nz&fuse_attention: failed to match k path)SqrtDivr   CastSliceShaper   r   )Nr   r    r   r   r   r   r   z*fuse_attention: failed to match mul_q pathr   match_parent_pathr>   rO   )r   node_after_output_projection	qkv_nodes_rt   ru   
matmul_qkvv_nodesr{   rz   qk_nodes_softmax_qk	matmul_qkq_nodesmul_q_transpose_qr   rw   rv   k_nodes_mul_kry   rx   mul_q_nodess                           r   rg   z5FusionMultiHeadAttentionSam2.match_attention_subgraph   s   J00(???$$$
 
	 49B6A{M:*..z;d;d;dfufufuvv?LLABBB4")Auh://
Y<QTUWXSYZZ'/$[))LLBCCC4*..GGGI^I^I^
 
 ?LLABBB4<C9i*..GGGI^I^I^
 
 ?LLABBB4*1'Auh j22UUU'''
 

 +b/Y">">LLEFFF4M9hxQVX`bgggr   c                     | j                             |g dg d          }| | j                             |g dg d          }|| j                             |dgdg          }|dS |d         }|                     |t          |          d	k    rd	nd 
          }|dS |\  }}}	}
}}t	          j        |
d          }t          |t                    r|g dk    rdS t	          j        |d          }t          |t                    r|g dk    rdS t	          j        |d          }t          |t                    r|g dk    rdS | j                             |	g dg d          }|dS |\  }}}|                     ||d          \  }}|dk    rt          
                    d           dS d}| j                             |          }|Lt          j        t          j        g dd          |          }| j                             || j                   | j                             d          }t'          j        d|
j        d         |g|
j        d         dz   g|          }| j                            |           | j        | j        |j        <   |
}|j        d         |j        d<   |j        d         dz   |j        d<   t          
                    d|d|           |                     ||||          }|dS t          | j                             ||                    d	k    sJ |j        d         |j        d<   | j                            |           | j        | j        |j        <   | j                            |g           d| _        dS )N)rd   r   r   r   r   Nr   r   )rd   r   r   r   r   r   )r   Nr   r   r   r   rd   r   Fr   r    )input_indexperm)r   r"   r    r0   )r   r"   r0   r    )r   rd   r   )r   r   NTre   bsnh_to_bsd_reshape_dims)r   r   r   int64)dtype)rL   r   _BSDrI   _BNSHzFound MHA: q_num_heads=z q_hidden_size=) r   r   $match_sam_encoder_attention_subgraphr3   r   get_node_attributer%   r(   r@   r>   rO   r7   r
   
from_arrayr&   arrayadd_initializerrk   rP   r	   rQ   r$   ri   rj   rl   rL   rG   create_mha_nodeget_childrenrm   rT   rn   )r   ro   rp   rq   nodesr   matched_sdpareshape_outtranspose_out	split_qkvtranspose_qtranspose_ktranspose_vpermutation_qpermutation_kpermutation_vinput_projection_nodesr-   add_in	matmul_inr}   r~   new_dims_namenew_dimsreshape_q_namer   transpose_k_bnshr   s                               r   rf   z5FusionMultiHeadAttentionSam2.fuse_sam_encoder_pattern2  s*   < 
,,666OO
 

 =J00LLL%%% E
 =J00 E
 =5',Ry$@@(3u::??aaPT A 
 
 5WcT]I{K "4[&II=$// 	M\\\4Q4Q5 "4[&II=$// 	M\\\4Q4Q5 "4[&II=$// 	M\\\4Q4Q5!%!=!=(((LL"
 "

 ")5(>%
FI%)%G%G
Tbdh%i%i"]!LLEFFF5 3:--m<<#.rx


'/R/R/RYfgggHJ&&x1EFFF44Y??$%a(-8 &q)F23	
 
 
	 	  +++7;7K$Y^4 '$/$5a$8q!%0%6q%9G%C"B;BB-BBCCC ''	
 
 5 4:**=:MNNOOSTTTTT'q1!  ***6:6J$X]3##]O444  tr   c           	         | j                             |g d|ddddg          }|dS |\  }}}}}| j                             |g dg d          }|t                              d           dS |\  }	}}
}| j                             |ddgddg          }||\  }}nt                              d	           dS | j                             |g d
g d          }|>| j                             |g dg d          }|t                              d           dS |d         |
k    rdS |d         }| j                             |g d
g d          }|t                              d           dS |d         |
k    rdS |\  }}}}|||
|||	fS )z%Match SDPA pattern in SAM2 enconder.*r   Nr   )r   SqueezeSplitr   )r    r   r   r   zfailed to match v pathr   r   zfailed to match qk path)r   r   r   r   r   )	r   r   r   r   MaxPoolr   r   r   r   )	r   Nr   r   r   r   r   r   r   zfailed to match q pathr   r    )r    Nr   r   zfailed to match k pathr   )r   r   r   	out_nodesr   r   r   matmul_qk_vr   r   r   rt   r   r   r   r   r   r   mul_kr   
_squeeze_ks                        r   r   zAFusionMultiHeadAttentionSam2.match_sam_encoder_attention_subgraph  s    J00(???$a+
 
	 4:C7A{M; *..{<h<h<hjvjvjvww?LL122243:0aK://i=RUVXYTZ[['/$[))LL23334*..y:b:b:bdsdsdstt?j22sss... G
 5666t2;)##4aj*..y:b:b:bdsdsdstt?LL122242;)##4.5+ZM9k;P[[[r   r   r   c                    | j                             d          }|j        d         |j        d         |j        d         g}|dz   }t          j        d||g|          }d|_        |j                            t          j        d|          g           d	                    d          }	| 
                    |	           |S )	a  Create a MultiHeadAttention node for SAM2 encoder.

        Args:
            reshape_q (NodeProto): Reshape node for Q, output is 3D BxSxNH format
            transpose_k (NodeProto): Transpose node for K, output is BNSH format
            transpose_v (NodeProto): Transpose node for V, output is BNSH format
            num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.

        Returns:
            NodeProto: the MultiHeadAttention node created.
        r   r   _outrI   rM   r   rN   zself attention)r   rP   rG   r	   rQ   rR   rS   rT   rU   rV   rW   )
r   r   r   r   r   r^   rJ   rG   r`   ra   s
             r   r   z,FusionMultiHeadAttentionSam2.create_mha_node  s    & #j99:NOO Qq!q!
 %v-) H$	
 
 
 !0 '')>{I)V)V(WXXX0778HIIl+++r   )F)N)__name__
__module____qualname____doc__r   r*   r   r   r,   r5   r;   boolr   r@   strr   rb   r   rg   rf   r   r   __classcell__)r   s   @r   r   r      s!        (( ( 	( ( ( ( ( (y S    *	 c    8   SX"& "&""&4="&KO"&	sCx"& "& "& "&H>> > 	>
 > > > > > > 
y$	> > > >@0  0  0 d5h 5h 5htBdh B B B BH5\ 5\ 5\ 5\n)) ) 	)
 ) 
) ) ) ) ) ) ) )r   r   )loggingr   typingr   r   numpyr&   fusion_baser   fusion_utilsr   onnxr   r	   r
   
onnx_modelr   r   r>   r    r   r   <module>r      s   
                         $ $ $ $ $ $ 0 0 0 0 0 0 0 0 0 0            	8		E E E E E6 E E E E Er   