
     NgL                          d dl Z d dlZd dlZd dlmZ ddlmZmZmZm	Z	m
Z
 ddlmZ ddlmZ  G d d	e          Z	  G d
 de          Z	  G d de          Z G d de          ZdS )    N)onnx_pb   )TENSOR_NAME_QUANT_SUFFIXQuantizedValueQuantizedValueTypefind_by_nameget_mul_node   )QuantOperatorBase)QDQOperatorBasec                   $     e Zd Z fdZd Z xZS )	QOpMatMulc                 L    t                                          ||           d S Nsuper__init__selfonnx_quantizer	onnx_node	__class__s      e/var/www/html/ai-engine/env/lib/python3.11/site-packages/onnxruntime/quantization/operators/matmul.pyr   zQOpMatMul.__init__   #    33333    c                 J   | j                             | j                  s$t          j        d| j        j         d           dS | j                             | j        j        d                   sN| j                             | j        j        d                   s$t          j        d| j        j         d           dS | j         j	        r[| j         
                    | j        j        d                   s1t          j        d| j         j         d| j        j         d           dS d	S )
NzIgnore MatMul ]Fr
   r   z&Ignore MatMul due to non float inputs z%Ignore MatMul due to non constant B: [T)	quantizershould_quantize_nodenodeloggingdebugnameis_float_tensorinputinfoq_matmul_const_b_onlyfind_initializer_in_pathgraph_scope)r   s    r   should_quantizezQOpMatMul.should_quantize   s   ~2249== 	M<49><<<===5..tyq/ABB 	..tyq/ABB	 LS$).SSSTTT5 >/ 	>::49?1;MNN sT^E_ssbfbkbpssstttutr   )__name__
__module____qualname__r   r+   __classcell__r   s   @r   r   r      sG        4 4 4 4 4      r   r   c                   $     e Zd Z fdZd Z xZS )MatMulIntegerc                 L    t                                          ||           d S r   r   r   s      r   r   zMatMulInteger.__init__)   r   r   c                    | j         }|j        dk    sJ | j                            |dg          \  }}}}| j                            |dgdd          \  }}}}	|                    |           |                    |           |                    |           |                    |	           |j        d         dz   }
|j        r
|j        dz   nd}t          j	        
                    d	||z   |
g|          }|                    |           |
d
z   }| j                            |j        d         d          }t          j	        
                    d|
g|g|
dz   |          }|                    |           t          |          dk    sJ |r|dz   n|d         dz   |d         z   dz   }t          || j        j                  }|)t!          ||dz   |          }|                    |           |j        d         }d}|r|dz   }|                    t!          ||g|j        d         |                     | j        xj        |z  c_        d S )NMatMulr   r
   Treduce_rangeop_level_per_channel_output_quantized_quant r2   _cast_output)	mandatoryCast_cast)tor   _scales_mul__mulz:0_output_scale_mul)r!   op_typer   quantize_activationquantize_weightextendoutputr$   onnxhelper	make_nodeappendget_tensor_typelenr   	new_nodesr	   )r   r!   quantized_input_nameszero_point_namesscale_namesnodesquantized_input_names_weightzero_point_names_weightscale_names_weightnodes_weightmatmul_integer_outputmatmul_integer_namematmul_integer_nodecast_op_outputotype	cast_nodescales_mul_opscales_mul_nodescales_mul_op_outputoutput_scale_mul_ops                       r   quantizezMatMulInteger.quantize,   s   y|x'''' N..taS99	
! N**4!4^b*cc	
(#$$%ABBB 7888-...\""" $A1D D6:iGdi(22R"k33!$44"#	
 
 	())) /?..t{1~.NNK))"#!G+ * 
 
	 	Y ;1$$$$ #@-//Q#%A6? 	 '}dn6NOO"*;8Lm\\OLL))).5a8 ! 	L"58K"K!56A# 	
 	
 	
 	  E)    r   r,   r-   r.   r   rc   r/   r0   s   @r   r2   r2   (   sN        4 4 4 4 4G* G* G* G* G* G* G*r   r2   c                   (     e Zd Z fdZ fdZ xZS )QLinearMatMulc                 L    t                                          ||           d S r   r   r   s      r   r   zQLinearMatMul.__init__|   r   r   c                    | j         }|j        dk    sJ | j                            |dg          \  }}}}| j                            |dgdd          \  }}}}	|                    |           |                    |           |                    |           |                    |	           | j                            |j        d                   \  }
}}}}|
r| t                      	                                S |j        d         t          z   }|j        r
|j        dz   nd}g }|                    |d                    |                    |d                    |                    |d                    |                    |d                    |                    |d                    |                    |d                    |                    |           |                    |           | j        j        t          j        j        t          j        j        t          j        j        t          j        j        hv rdnd}t(          j                            d	||g||
          }|                    |           t/          |j        d         |||t0          j                  }|| j        j        |j        d         <   | j        xj        |z  c_        d S )Nr5   r   r
   Tr6   r:   r;   zcom.microsoftrf   )domain)r!   rE   r   rF   rG   rH   _get_quantization_paramsrI   r   rc   r   r$   rM   weight_qType
onnx_protoTensorProtoFLOAT8E4M3FNFLOAT8E4M3FNUZ
FLOAT8E5M2FLOAT8E5M2FNUZrJ   rK   rL   r   r   Inputquantized_value_maprP   )r   r!   rQ   rR   rS   rT   rU   rV   rW   rX   
data_foundoutput_scale_nameoutput_zp_namerB   qlinear_matmul_outputqlinear_matmul_nameqlinear_matmul_inputsri   qlinear_matmul_nodeq_outputr   s                       r   rc   zQLinearMatMul.quantize   s    y|x'''' N..taS99	
! N**4!4^b*cc	
(#$$%ABBB 7888-...\""" N33DKNCC	
 	&2:77##%%% $A1I I6:iGdi(22R "$$%:1%=>>>$$[^444$$%5a%8999$$%:1%=>>>$$[^444$$%5a%8999$$%6777$$^444 ~*&3&5&1&5	  O  	 #k33!"# 4 
 
 	())) "KN!$
 
 >F*4;q>:  E)    r   rd   r0   s   @r   rf   rf   {   sZ        4 4 4 4 4N* N* N* N* N* N* N* N* N*r   rf   c                   $     e Zd Z fdZd Z xZS )	QDQMatMulc                 L    t                                          ||           d S r   r   r   s      r   r   zQDQMatMul.__init__   r   r   c                    | j         }|j        dk    sJ | j        r|j        }nt	          j        |j        |j                  }|D ]}t          || j        j	        
                                          r^| j                            |d|j                  \  }}|r| j                            ||           q| j                            |           | j                            |           d S )Nr5   r
   )default_axisrE   )r!   rE   disable_qdq_for_node_outputr&   	itertoolschainrI   r   r   modelinitializeris_tensor_per_channel"quantize_weight_tensor_per_channelquantize_weight_tensorquantize_activation_tensor)r   r!   nodes_to_iteratetensor_nameis_per_channelchannel_axiss         r   rc   zQDQMatMul.quantize   s   y|x''''+ 	H#z(tz4;GG+ 
	G 
	GKK)=)I)I)K)KLL 	G/3~/S/Sa 0T 0 0, " GNEEkS_````N99+FFFF99+FFFF
	G 
	Gr   rd   r0   s   @r   r}   r}      sN        4 4 4 4 4G G G G G G Gr   r}   )r   r"   rJ   r   rl   quant_utilsr   r   r   r   r	   base_operatorr   qdq_base_operatorr   r   r2   rf   r}    r   r   <module>r      s\         & & & & & & r r r r r r r r r r r r r r , , , , , , . . . . . .    !   .
K* K* K* K* K*I K* K* K*\
R* R* R* R* R*I R* R* R*jG G G G G G G G G Gr   