
     Ng                         d dl Z d dlZd dlZd dlZd dlmZ ddlmZm	Z	 ddl
mZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ ddlm Z   G d	 d
e          Z!dS )    N)onnx_pb   )BaseQuantizerQuantizationParams)
TensorData)	ONNXModel)TENSOR_NAME_QUANT_SUFFIXQuantizationModeQuantizedValueQuantizedValueType__producer____version__add_infer_metadataattribute_to_kwargcompute_scale_zpcompute_scale_zp_float8find_by_nameget_qmin_qmax_for_qTypeget_qrange_for_qType	ms_domain&save_and_reload_model_with_shape_infertensor_proto_to_array)CreateOpQuantizerc                       e Zd Z	 d dZd Zd Zd Zd Zd Zd Z	d	 Z
d!dZd Zd Zd Zd Zd"dZ	 d#dZd Zd$dZd Zd!dZ	 	 	 	 d%dZ	 	 	 	 	 d&dZd'dZ	 	 d(dZd Zd Zd ZdS ))ONNXQuantizerNc                    t          j        | |||||||	|
||           |s| j                                         t	          | j        j                  }d |j        j        D             | _        | j                            d |j        j	        D                        | j                            d |j        j
        D                        t          |          | _        || _        || _        | j        dk    | _        d| j        v o| j        d         | _        g | _        d| _        i | _        | j                            d |j        j	        D                        | j                            d |j        j
        D                        | j        j        j        j        D ]+}| j                            d	 |j	        D                        ,| j        t,          vrt/          d
| j                   |                                 | _        d| _        d| _        d| _        d| _        i | _        | j                                        | _         d S )Nc                     i | ]
}|j         |S  name).0vis     c/var/www/html/ai-engine/env/lib/python3.11/site-packages/onnxruntime/quantization/onnx_quantizer.py
<dictcomp>z*ONNXQuantizer.__init__.<locals>.<dictcomp>G   s    MMMMMM    c                     i | ]
}|j         |S r   r   r!   ots     r#   r$   z*ONNXQuantizer.__init__.<locals>.<dictcomp>H   s    $N$N$NRRWb$N$N$Nr%   c                     i | ]
}|j         |S r   r   r!   its     r#   r$   z*ONNXQuantizer.__init__.<locals>.<dictcomp>I   s    $M$M$MRRWb$M$M$Mr%   
   MatMulConstBOnly/c                     i | ]
}|j         d S r   r   r'   s     r#   r$   z*ONNXQuantizer.__init__.<locals>.<dictcomp>U   s    !J!J!J"'1!J!J!Jr%   c                     i | ]
}|j         d S r0   r   r*   s     r#   r$   z*ONNXQuantizer.__init__.<locals>.<dictcomp>V   s    !I!I!I"'1!I!I!Ir%   c                     i | ]}|d S r0   r   )r!   output_names     r#   r$   z*ONNXQuantizer.__init__.<locals>.<dictcomp>X   s    %T%T%Tk1%T%T%Tr%   zunsupported quantization mode fixed_quantization_range_uint8fixed_quantization_range_int8
fixed_zerofixed_zero_zp)!r   __init__modelreplace_gemm_with_matmulr   graph
value_infovalue_infosupdateoutputinputr   modestaticopset_versionfuse_dynamic_quantextra_optionsq_matmul_const_b_only	new_nodesgraph_scopetensor_namesnoder
   
ValueErrorcalculate_quantization_paramsquantization_paramsfixed_qrange_uint8_namefixed_qrange_int8_namefixed_zero_namefixed_zero_zp_namequantized_value_mapget_non_initializer_inputsgenerated_value_names)selfr9   per_channelreduce_rangerA   rB   weight_qTypeactivation_qTypetensors_rangenodes_to_quantizenodes_to_excludeop_types_to_quantizerE   rJ   s                 r#   r8   zONNXQuantizer.__init__&   sa    	 	
 	
 	
  	*J//111:4:;KLLEMMek6LMMMD##$N$N5;;M$N$N$NOOO##$M$M5;;L$M$M$MNNN"5))DJ	"&"4r"9%74;M%M%xRVRdewRx"  !J!Ju{7I!J!J!JKKK  !I!Iu{7H!I!I!IJJJJ$*/ 	V 	VD$$%T%T%T%T%TUUUU9,,,IdiIIJJJ#'#E#E#G#G  (H$&E#+"1 $&  &*Z%J%J%L%L"""r%   c                    t           j                            |d| j        j        j                  }t          |           t          || j        | j        | j	        | j
        | j        | j        | j        | j        | j        | j        | j                  }| |_        | j         | d|_        |                                 |j        j        j        S )z
        generate submodel for the subgraph, so that we re-utilize current quantization implementation.
        quantize the submodel
        update subgraph and set it back to node
        onnx-quantizer)producer_nameopset_importsr.   )onnxhelper
make_modelr9   opset_importr   r   rV   rW   rA   rB   rX   rY   rZ   r[   r\   r]   rE   parentrH   quantize_modelr;   )rU   subgraph	graph_keywarped_modelsub_quantizers        r#   quantize_subgraphzONNXQuantizer.quantize_subgrapho   s     {--***7 . 
 

 	<(((%IK!"!%
 
  $'+'7$E$E$E$E!$$&&&"(..r%   c                    d |j         D             }t          |          dk    r|S |j        r|j        n|j         dt          | j                   }i }|j         D ]}|j        t          j        j        k    r-|j        | 	                    |j
        | d|j                   i}n|j        t          j        j        k    rZg }|j        D ]F}|                    | 	                    || d|j         dt          |                     g           G|j        |i}nt          |          }|                    |           t          j        j        |j        |j        |j        fd|j        i|S )z|
        Check subgraph, if any, quantize it and replace it.
        return new_nodes added for quantizing subgraph
        c                 z    g | ]8}|j         t          j        j        k    s|j         t          j        j        k    6|9S r   )typerb   AttributeProtoGRAPHGRAPHS)r!   attrs     r#   
<listcomp>z>ONNXQuantizer.quantize_node_with_sub_graph.<locals>.<listcomp>   sG     
 
 
yD/555dFYF`9`9` 9`9`9`r%   r   _node_count_:r    )	attributelenr    op_typerG   ro   rb   rp   rq   rl   grr   graphsextendr   r>   rc   	make_noder@   r?   )	rU   rJ   graph_attrs	node_namekwargsrs   kvvaluerh   s	            r#   quantize_node_with_sub_graphz*ONNXQuantizer.quantize_node_with_sub_graph   s   

 

 
 

 {q  K!%bDII4<0b0bSQUQ_M`M`0b0b	N 	 	DyD/555i!7!79@Z@Zty@Z@Z![![\d1888 $  HLL 22 (#, G Gty G G3u:: G G     i''--MM"{$T\4:t{eeQUQZe^deeer%   c                 b    t          d | j                                        D                       S )zQ
        Detect if model already has QuantizeLinear or DequantizeLinear.
        c              3   B   K   | ]}|j         d k    p
|j         dk    V  dS )QuantizeLinearDequantizeLinearN)ry   r!   rJ   s     r#   	<genexpr>z.ONNXQuantizer.has_QDQ_nodes.<locals>.<genexpr>   sH       
 
W[DL,,R@R0R
 
 
 
 
 
r%   )anyr9   nodes)rU   s    r#   has_QDQ_nodeszONNXQuantizer.has_QDQ_nodes   sA      
 
_c_i_o_o_q_q
 
 
 
 
 	
r%   c                     t          || j                                                  dS | j        | j                            |          S dS )NTF)r   r9   initializerrf   find_initializer_in_path)rU   initializer_names     r#   r   z&ONNXQuantizer.find_initializer_in_path   sJ    ($**@*@*B*BCCO4;";778HIIIur%   c                     | j                             |           |D ]&}|j        D ]}| j                            |           'd S N)rG   r|   r?   rT   add)rU   r   rJ   r3   s       r#   add_new_nodeszONNXQuantizer.add_new_nodes   sa    e$$$ 	< 	<D#{ < <*..{;;;;<	< 	<r%   c                    |                                  rt          j        d           | j                                        D ]}| j        r|                     |          }t          | j                  }t          | |          }|
                                 t          |t          | j                            D ]1}| j        |         j        D ]}| j                            |           2|                                  | j                                                            d           | j                                        j                            | j                   | j        N| j                                        \  }}t          |          dk    rt-          dt/          |          z             t0          | j        j        _        t4          | j        j        _        d | j        j        j        D             }|sId | j        D             }	|	r6| j        j        j                                        }
d|
_        t<          |
_        | j        j        S )NzPlease check if the model is already quantized. Note you don't need to quantize a QAT model. OnnxRuntime support to run QAT model directly.rJ   r   z0Invalid model with unknown initializers/tensors.c                 2    g | ]}|j         t          k    |S r   )domainr   )r!   opsets     r#   rt   z0ONNXQuantizer.quantize_model.<locals>.<listcomp>   s%    bbbeXaHaHaEHaHaHar%   c                 (    g | ]}|j         d k    |S )zcom.microsoft)r   r   s     r#   rt   z0ONNXQuantizer.quantize_model.<locals>.<listcomp>   s$    ZZZ4;/;Y;Y;Y;Y;Yr%   r   ) r   loggingwarningr9   r   enable_subgraph_quantizationr   rx   rG   r   quantizeranger?   rT   r   _dequantize_outputsr;   
ClearFieldrJ   r|   rf   clean_initializersRuntimeErrorstrr   r`   r   producer_versionre   versionr   r   )rU   rJ   number_of_existing_new_nodesop_quantizerir3   _initializers_not_foundms_opsetms_nodesr   s              r#   rg   zONNXQuantizer.quantize_model   sG    	On  
 J$$&& 
	@ 
	@D0 ?88>>+.t~+>+>(,T488L!!###7T^9L9LMM @ @#'>!#4#; @ @K.22;????@@ 	  """ 	
%%f---
&&t~666 ;(,
(E(E(G(G%A%)**Q.."#UX[\rXsXs#sttt)5
&,7
)bbtz'7'Dbbb 	)ZZZZZH )
(599;; !(zr%   c                     d| j         v r.t          j        d|| j         d                    | j         d         S t          d|d          )NDefaultTensorTypezDget_tensor_type returns DefaultTensorType for tensor name %r, use %dz)Unable to find data type for weight_name=a7  . shape_inference failed to return a type probably this node is from a different domain or using an input produced by such an operator. This may happen if you quantize a model already quantized. You may use extra_options `DefaultTensorType` to indicate the default weight type, usually `onnx.TensorProto.FLOAT`.)rE   r   infor   rU   tensor_names     r#   _get_default_tensor_typez&ONNXQuantizer._get_default_tensor_type   ss    $"444LV"#67  
 %&9::J J J J
 
 	
r%   Fc                 P   t          || j                                                  }||j        S || j        v rd| j        |         }|j                            d          r=|r*|j        j        j        dk    r| 	                    |          S |j        j        j        S | j
        r| j        |r| 	                    |          S d S | j                            |          }||S | j
        r%| j        r| j                            |          }||S |r| 	                    |          S d S )Ntensor_typer   )r   r9   r   	data_typer=   ro   HasFieldr   	elem_typer   r   rf   is_valid_quantize_weightget_tensor_type)rU   r   	mandatoryweightr"   otyperess          r#   r   zONNXQuantizer.get_tensor_type  sF   k4:+A+A+C+CDD##$***!+.Bw.. 5 F!4!>!!C!C88EEEw*441 	t{7J B44[AAA444[AAL, 	 	+--k::C
 	>00===tr%   c                    |                      |          r|                     |          S || j        v r~| j        |         }|j                            d          r5|j        j        j        t          j        j	        t          j        j
        fv rdS t          j        d|d|j         d           dS | j        r!| j        r| j                            |          S t          j        d|d           dS )	Nr   Tz<Inference failed or unsupported type to quantize for tensor z
, type is .Fz%Failed to infer data type of tensor: zS. Please add data type info for this tensor if your model has customized operators.)is_input_a_initializerr   r=   ro   r   r   r   
onnx_protoTensorProtoFLOATFLOAT16r   r   r   rf   is_float_tensor)rU   r   r"   s      r#   r   zONNXQuantizer.is_float_tensor  s"   &&{33 	>00===$***!+.Bw.. 273F3P&,&.U 4 4 tOr{rrhjhorrr   5, 	< 	<;..{;;;7K 7 7 7	
 	
 	
 ur%   c                     |t           j        j        k    r|                     |||          S |t           j        j        k    r|                     |||          S t          d| d          )a  
        Create nodes for dynamic quantization of input and add them to nodes_list.
            parameter input_name: Name of the input.
            parameter nodes_list: new nodes are appended to this list.
            parameter qType: type to quantize to.
            parameter initial_type: type to quantize from
            return: scale_name, zero_point_name, scale_shape, zero_point_shape.
        zUnexpected value for qType=r   )r   r   INT8+_get_dynamic_input_quantization_params_int8UINT8,_get_dynamic_input_quantization_params_uint8rK   )rU   
input_name
nodes_listqTypeinitial_types        r#   &_get_dynamic_input_quantization_paramsz4ONNXQuantizer._get_dynamic_input_quantization_params5  st     J*///CCJPZ\hiiiJ*000DDZQ[]ijjj?u???@@@r%   c                    t           j        j        }|dz   }|dz   }t          j                            d|g|dz   g|d          }|                    |           |dz   }t          j                            d|g|dz   g|d          }	|                    |	           |d	z   }
t          j                            d
|j        d         g|
dz   g|
          }|                    |           |d	z   }t          j                            d
|	j        d         g|dz   g|          }|                    |           |dz   }t          j                            d|j        d         |j        d         g|dz   g|          }|                    |           t          j                            | j	        |g t          |          dz  g          }| j                            |           |dz   }t          j                            d|j        d         | j	        g|g|          }|                    |           t          j                            | j        |g dg          }| j                            |           || j        g g fS )az  
        Create nodes for dynamic quantization of input to int8 and add them to nodes_list
            parameter input_name: Name of the input.
            parameter nodes_list: new nodes are appended to this list.
            parameter initial_type: initial weight type (FLOAT or FLOAT16)
            return: scale_name, zero_point_name, scale_shape, zero_point_shape.
        _scale
_ReduceMin	ReduceMin:0r   keepdims
_ReduceMax	ReduceMax_AbsAbs_Abs_MaxMaxg       @	scale_DivDiv)r   r   r   rb   rc   r}   appendr?   make_tensorrO   r   r9   add_initializerrQ   )rU   r   r   r   r   input_scale_namereduce_min_namereduce_min_nodereduce_max_namereduce_max_nodereduce_min_abs_namereduce_min_abs_nodereduce_max_abs_namereduce_max_abs_nodeabs_max_nameabs_max_nodeinitializer_divscale_div_namescale_div_nodeinitializer_zps                       r#   r   z9ONNXQuantizer._get_dynamic_input_quantization_params_int8D  s    &+ &0$|3+//Lt#$ 0 
 
 	/***$|3+//Lt#$ 0 
 
 	/*** .6"k33#A&' 4'(	
 
 	-...-6"k33#A&' 4'(	
 
 	-...!J.{,, '*,?,Fq,IJD !	
 
 	,'''+11'!%((3./	
 
 	
""?333#k1.. #T%@A	
 
 	.))) 001H%QSVWUXYY
"">222!8"b@@r%   c                    t           j        j        }|dz   }|dz   }|dz   }t          j                            d|g|dz   g|d          }|                    |           |dz   }	t          j                            d	|g|	dz   g|	d          }
|                    |
           t          j                            | j        |g t          |          g          }| j
                            |           t          j                            | j        |g d
g          }| j
                            |           |dz   }t          j                            d|
j        d         |j        d         g|dz   g|          }|                    |           |dz   }t          j                            d|j        d         | j        g|g|          }|                    |           |dz   }t          j                            d| j        |j        d         g|dz   g|          }|                    |           |dz   }t          j                            d|j        d         |g|dz   g|          }|                    |           |dz   }t          j                            d|j        |dz   g|          }|                    |           |dz   }t          j                            d|j        |g||          }|                    |           ||g g fS )a{  
        Create nodes for dynamic quantization of input to uint8 and add them to nodes_list
            parameter input_name: Name of the input.
            parameter nodes_list: new nodes are appended to this list.
            parameter initial_type: initial weight type (FLAOT or FLOAT16)
            return: scale_name, zero_point_name, scale_shape, zero_point_shape.
        r   _zero_pointr   r   r   r   r   r   r   g        
_scale_SubSub
_scale_Divr   _zero_point_Sub_zero_point_Div_zero_point_FloorFloor_zero_point_CastCast)to)r   r   r   rb   rc   r}   r   r   rN   r   r9   r   rP   r?   )rU   r   r   r   r   r   input_zp_namer   r   r   r   initializer_qrangeinitializer_qvaluescale_sub_namescale_sub_noder   r   zp_sub_namezp_sub_nodezp_div_namezp_div_nodezp_floor_namezp_floor_nodezp_cast_namezp_cast_nodes                            r#   r   z:ONNXQuantizer._get_dynamic_input_quantization_params_uint8  sb    &,%0"]2$|3+//Lt#$ 0 
 
 	/***$|3+//Lt#$ 0 
 
 	/*** "[44(!%(()	
 
 	
""#5666![44T5I<Y[^a]bcc
""#5666 $l2..#A&(>q(ABd"#	
 
 	.)))#l2.."1%t'CD	
 
 	.))) !#44k++!?#9!#<=4 	
 
 	+&&& #44k++"$454 	
 
 	+&&&"%88--g{7IM\`L`Kacpqq-(((!$66{,,V]5IM?\hmr,ss,'''B66r%   c                 "   | j         }||R| j        	|| j        vrt          j        d| d           dS | j        |         }t	          |t
                    s#t          dt          |           d|d          |t          |          dk    rt          d	| d
|           t          j        |d         g          }t          |d         d          r%|d         j        t          j        t          j        fvr(t          dt          |d                    d|          t          j        |d         g          }|j        t          j        k    sJ |d         }ntt          j        |g          }t          j        |g          }| j        |         }d|v r"|d         j        }|                    |          }|j        t          j        k    sJ g }	|dz   }
g }|dz   }t$          j                            |
||	|                                                                          }| j                            |           |j        t          j        k    rt2          j        j        }nA|j        t          j        k    rt2          j        j        }nt          d|j         d|          t$          j                            ||||                    d                                                    }| j                            |           d||
||	fS )a\  
        Create initializers and inputs in the graph for zero point and scale of output.
        Zero point and scale values are obtained from self.quantization_params if specified.
            parameter param_name: Name of the quantization parameter.
            return: result, scale_name, zero_point_name, scale_shape, zero_point_shape.
        Nz$Quantization parameters for tensor:"z" not specified)F r  r  r  Unexpected type  for r      zbQuantization parameters should contain zero point, scale, quant type. Specified values for output z: 
zero_pointscaledtypez and param_name=
quant_typer   r   zUnexpected dtype=z for param_name=)T)rY   rM   r   r   
isinstancer   	TypeErrorro   rx   rK   nparrayhasattrr  float32float16float64astyperb   rc   r   raveltolistr9   r   r   r   r   r   reshape)rU   
param_name	use_scaleuse_zeropointzero_point_typeparamszero_point_valuesscale_valuesr  zero_point_shapezero_point_namescale_shape
scale_nameinit_zp
scale_type
init_scales                   r#   _get_quantization_paramsz&ONNXQuantizer._get_quantization_params  s5    / 5'/:TE]3]3]_J___```,,-j9Ff&899 W U4<< U Uj U U UVVV~V!1!1 J3=J JAGJ J  
 !#&*>)? @ @6'?G44 kw8MVXV`bdblUm8m8m !iD4I4I!i!i[e!i!ijjj8VG_$566L%3333$\2OO "- 9 98YK00L-j9F&  w-+22599%3333$}4(*
 +))_.>@Q@W@W@Y@Y@`@`@b@b
 
 	
""7+++++#/5JJ2:--#/7JJc1CccU_ccddd[,,Z[R^RfRfglRmRmRtRtRvRvww

"":...Z+?OOOr%   c           	         |j         |         }|dk    s
J d            |t          z   }|dz   }	|	|d||}}}
n|                     |          \  }
}}}}g }|
r't          j                            d|||g|g|	          }n| j        rdS | j        rF|t          j	        j
        k    r1|dz   }|dz   }t          j                            d	|g|||g|	          }n\|J d
|d| d| d|             |                     ||||          \  }}}}t          j                            d|||g|g|	          }t          |||||          | j        |<   g ||S )a  
        Given an input for a node (which is not a initializer), this function

        - add nodes to compute zero point and scale for this input if they don't exist.
        - add new QuantizeLinear node to quantize the input.

        :param node: node being quantized in NodeProto format.
        :param input_index: index of input in node.input.
        :param qType: type to quantize to.
        :param given_scale_name: if those inputs need to be quanitzed using this scale tensor.
        :param given_zp_name: if those inputs to be quantized using this zeropoint tensor.
        :param initial_type: type of the weight to quantize
        :return: List of newly created nodes in NodeProto format.
        r  z*Cannot access undefined variable in graph._QuantizeLinearNTr   r   r   DynamicQuantizeLinearzCCannot quantize input without knowing the initial type, input_name=z, input_index=z, qType=z, node=r   )r@   r	   r.  rb   rc   r}   rB   rD   r   r   r   r   r   rR   )rU   rJ   input_indexr   given_scale_namegiven_zp_namer   r   r3   ql_node_name
data_foundr*  zp_namer   r   qlinear_noder)  zp_shapes                     r#   _get_quantize_input_nodesz'ONNXQuantizer._get_quantize_input_nodes1  s
   " Z,
R!M #;;!$55(}/H/35E}G
JJ484Q4QR\4]4]1J
GQ %	;00 Z1	 LL { t & 5J4J4P+P+P'(2
$}4#{44+L *g6 	    $//h",h h>Ih hSXh haeh h 0// ??
ESXgs?tt#{44$W5 M 	    0>j+Wacjlq/r/r ,%%%%r%   c                 t    || j         v r| j         |         S | j        | j                            |          S d S r   )rR   rf   find_quantized_value)rU   r   s     r#   r=  z"ONNXQuantizer.find_quantized_valuew  sA    111+J77;";33J???tr%         ?c           
         || j         v r| j         |         j        S | j         |         j        }t          || j                                                  }t          |          }|| j         v r| j         |         j        }n8|| j        v r|                     |          \  }	}}	}	}	nt          d| d          t          || j                                                  }
t          |
          }| 
                    ||||          \  }}}}}}|| j         vsJ t          ||||t          j        |j        dk    rdnd||          }|| j         |<   |S )z]
        Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
        z	Expected z5 to be in quantized value map for static quantizationr   r   N)	node_type
node_qtype)rR   q_namer*  r   r9   r   r   rM   r.  rK   quantize_bias_static_implr   r   Initializersize)rU   	bias_namer   weight_namebetaweight_scale_nameweight_initializerweight_scaler   r   inputscale_initializerinput_scalequantized_bias_namequantized_bias_scale_namequantized_bias_zp_namebias_scale_datar@  rA  quantized_values                      r#   quantize_bias_staticz"ONNXQuantizer.quantize_bias_static~  s    000+I6== !4[AL)*;TZ=S=S=U=UVV,-?@@ 111#7
CN4333+/+H+H+T+T(AAqqjjjjkkk!-.>
@V@V@X@X!Y!Y+,BCC **9k<QUVV	
%"  88888(%"* %))AAt!	
 	
 	
 /> +""r%   c                 8    || j         v p|| j        v p|| j        v S )zq
        only check for value info and newly generated tensor names, initializers are checked separately
        )r=   rI   rT   r   s     r#   contains_tensorzONNXQuantizer.contains_tensor  s4    
 D,, ;t00;t99	
r%   c           	      :    |                      ||dddd|          S )NFr  rJ   indicesinitializer_use_weight_qTyperW   op_level_per_channelaxisfrom_subgraph_ONNXQuantizer__quantize_inputs)rU   rJ   rX  r\  s       r#   quantize_activationz!ONNXQuantizer.quantize_activation  s4    %%).!&' & 
 
 	
r%   r  c           	      :    |                      ||d||||          S )NTrW  r]  )rU   rJ   rX  rW   rZ  r[  r\  s          r#   quantize_weightzONNXQuantizer.quantize_weight  s6     %%)-%!5' & 
 
 	
r%   Tc           
         g }g }	g }
g }|D ]4}|j         |         }|| j        v r\| j        |         }|                    |j                   |	                    |j                   |
                    |j                   u|s@|
                    d           |                    d           |	                    d           t          || j                                                  }|| j	        r2|r0| 
                    |j        |r| j        n| j        ||          \  }}}n)|                     ||r| j        n| j        |          \  }}}|
                    |           |	                    |           |                    |           |                     |          r| j                            |dz   | j        | j                                                  }||j         |         }|| j        v rj| j        |         }|                    d          sJ d| d            |j                            d          sJ d| d            |j        j        j        }n$|| j        v sJ d	|d
            | j        |         }|                     ||| j        |          }| dS |r|                     |           n|                    |           |d         }|j        dk    r\|
                    |j                   |                    |j         d                    |	                    |j         d                    9|
                    |j        d                    |                    |j        d                    |	                    |j        d                    | j        z| j                            ||g||||d          \  }}}}|
                    |d                    |                    |d                    |	                    |d                    tA          d| d| j!                   |
|	||fS )a  
        Given a node, this function quantizes the inputs as follows:
            - If input is an initializer, quantize the initializer data, replace old initializer
              with new initializer
            - Else, add QuantizeLinear nodes to perform quantization
            parameter node: node being quantized in NodeProto format.
            parameter indices: input indices to quantize.
            return: (List of quantized input names,
                     List of zero point names used for input quantization,
                     List of scale names used for input quantization,
                     List of new QuantizeLinear nodes created)
        r  Nr0  ro   zvalue_info=z has no type.r   z is not a tensor.zshape inference failed for zF and attribute 'tensor_names' does not have any value for this tensor.r2  )NNNNr  r   r      r   T)rY  rW   rZ  r[  r\  z!Invalid tensor name to quantize: z @graph scope)"r@   rR   r   r*  r8  rB  r   r9   r   rV   quantize_weight_per_channelr    rX   rY   quantize_initializerrU  find_node_by_namerG   r;   r=   r   ro   r   r   rI   r;  r   r|   ry   r?   rf   r^  rK   rH   )rU   rJ   rX  rY  rW   rZ  r[  r\  scale_nameszero_point_namesquantized_input_namesr   r3  
node_inputrR  r   q_weight_namer8  r*  r9  r   r<   r   quantize_input_nodesparent_quantized_input_namesparent_zero_point_namesparent_scale_namesr   s                               r#   __quantize_inputszONNXQuantizer.__quantize_inputs  s   .  "" c	r c	rKK0J T555"&"::"F""?#=>>> ''(?@@@%,,_-CDDD %,,R000""2&&& ''+++&z4:3I3I3K3KLLK&# (< 
 88#(-Id))tOd$	 	%"
 :>9R9R#-Id))tOd$: :6M7J &,,];;; ''000"":....%%j11 ;r#z;;!22DNDJDTDTDVDV     '!%K!8J!T%555%)%5j%A
)226::cc<c*<c<c<cccc)77FFssHsV`HsHsHssss'1'B'L  *T->>>>,* , , ,  ?>>
 (,'8'D+/+I+Ik4+@| ,J , ,( ,3777$ ;**+?@@@@%9:::#7#;L'+;;;)001DEEE&&|'9!'<===$++L,>q,ABBBB)001DQ1GHHH&&|':1'=>>>$++L,?,BCCCC( K11 M1M!-)="& 2  0+& &,,-I!-LMMM""#5a#8999 ''(?(BCCCC !!pZ!p!p^b^n!p!pqqq$&6UJJr%   c                    |j         | j        v r&| j        |j                  }|j        |j        |j        fS |                     ||||          \  }}}t          |j         |||t          j        d          }|| j        |j         <   |||fS )a  
        :param weight: TensorProto initializer
        :param qType: type to quantize to
        :param keep_float_weight: Whether to quantize the weight. In some cases, we only want to qunatize scale and zero point.
                                  If keep_float_weight is False, quantize the weight, or don't quantize the weight.
        :return: quantized weight name, zero point name, scale name
        N)	r    rR   rB  r8  r*  quantize_initializer_implr   r   rD  )	rU   r   r   rW   keep_float_weightrR  rk  r8  r*  s	            r#   re  z"ONNXQuantizer.quantize_initializer\  s     ;$222"6v{CO&'*  .2-K-KE<):.
 .
*w

 )K*
 
 1@ -gz11r%   c                     || j         v r!| j         |         }|j        |j        |j        fS |                     |||||          \  }}}	t          |||	|t          j        d           }|| j         |<   |||	fS r   )rR   rB  r8  r*   quantize_weight_per_channel_implr   r   rD  )
rU   rG  rX   channel_axisrW   rs  rR  rk  r8  r*  s
             r#   rd  z)ONNXQuantizer.quantize_weight_per_channel}  s     $222"6{CO&'*  .2-R-R|\CT.
 .
*w
 )*
 
 1@ -gz11r%   c                 b   || j         v r$|| j        vr| j         |         }t          |j        | j                                                  }| j        j        j        dk    s| j        j        j        dk    r,|*t          j        	                    |          j
        dk    sJ |dz   }| j                            || j        | j                                                  }|9|j        |j        |j        g}t          j                            d||g|          }|S ||j        d         k    sJ dS )a  
        Given a value (input/output) which is quantized, add a DequantizeLinear node to dequantize
        it back to float32 or float16
            parameter value_name: value to dequantize
            parameter new_nodes_list: List of new nodes created before processing current node
            return: None if there is already a DequantizeLinear node that dequantizes it
                    A DequantizeLinear node otherwise
        r_   Nr   _DequantizeLinearr   r   )rR   rT   r   r*  r9   r   r`   rb   numpy_helperto_arrayrE  rf  rG   r;   rB  r8  rc   r}   r?   )rU   
value_namerR  
scale_initdqlinear_namedqlinear_nodedqlinear_inputsdequantize_nodes           r#   _dequantize_valuezONNXQuantizer._dequantize_value  sE    $2224Ke9e9e"6zBO &o&@$*BXBXBZBZ[[J z-1AAA
 .2BBBzG] (11*==BaGGGG&)<<M J88X\XbXhXhXjXjkkM$#*#.#+#
 #'+"7"7&*}# # '& "]%9!%<<<<<tr%   c                     | j                                         j        D ]8}|                     |j                  }|| j                            |           9dS )z
        Dequantize output if it is quantized
            parameter new_nodes_list: List of new nodes created before processing current node
            return: List of new nodes created
        N)r9   r;   r?   r  r    rG   r   )rU   r?   r  s      r#   r   z!ONNXQuantizer._dequantize_outputs  s`     j&&((/ 	7 	7F"44V[AAO*%%o666	7 	7r%   c           	      Z   | j         d S |                                  i }| j         D ]}| j         |         }t          |t                    s#t	          dt          |           d|d          | j                            |i           }| j        }d|v r|d         j	        }d|v rd|v r|d         |d         }}n|t          j        j        k    rt          ||j        d                   \  }}n|                    d	|j        d
                   }|                    d|j        d                   }	|                    d| j                  }
|                    dd          }t%          |||
          \  }}t'          ||	|||
| j                  \  }}t+          |||          ||<   |S )Nr  r  r   )default_valr  r  r  r   rminr   rmax	symmetricrW   F)rW   r  )r  r  r  )rZ   adjust_tensor_rangesr  r   r  ro   tensor_quant_overridesget_per_tensor_overridesrY   r   rb   r   FLOAT8E4M3FNr   avg_stdgetrange_valueis_activation_symmetricr   r   min_real_ranger   )rU   rM   r   tdquant_overridesr  zeror  r  r  r  rW   qminqmaxs                 r#   rL   z+ONNXQuantizer.calculate_quantization_params  s   %4!!### - 	w 	wK#K0Bb*-- T R488 R R+ R R RSSS"9RRS^lnRooO.J..,\:F
/))lo.M.M-l;_W=Uet/<<<5j"*Q-PPee&**62>!3DEE&**62>!3DEE+//T=YZZ	.22>5II4Zlfoppp
d.tT4yRVReffe/ATY^ku/v/v/v,,""r%   r   )F)NN)NNN)r>  )FFr  F)TFFr  F)FF)TF)__name__
__module____qualname__r8   rl   r   r   r   r   rg   r   r   r   r   r   r   r.  r;  r=  rS  rU  r_  ra  r^  re  rd  r  r   rL   r   r%   r#   r   r   %   s2        FM FM FM FMR/ / /> f  f  fD
 
 
  < < <+  +  + Z
 
 
"   2  2A A ARA RA RAh\7 \7 \7|9P 9P 9P 9Px aeD& D& D& D&L  /# /# /# /#b
 
 
	
 	
 	
 	
" "
 
 
 
. &*"AK AK AK AKF2 2 2 2L 2 2 2 2@% % %N
7 
7 
7 #  #  #  #  #r%   r   )"r   numpyr  rb   onnx.numpy_helperr   r   base_quantizerr   r   	calibrater   
onnx_modelr   quant_utilsr	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   registryr   r   r   r%   r#   <module>r     s             & & & & & & = = = = = = = = ! ! ! ! ! ! ! ! ! ! ! !                                   $ ( ' ' ' ' 'K# K# K# K# K#M K# K# K# K# K#r%   