
     Ngj                         d dl Z d dlmZmZ d dlZd dlZd dlZ	 d dlm	Z	 n# e
$ r dZ	Y nw xY wddlmZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZ ddlmZ  G d	 d
          Z G d d          ZdS )    N)AnyDict)to_array_extended   )
TensorData)	ONNXModel)
ONNX_TYPE_TO_NP_TYPETENSOR_NAME_QUANT_SUFFIXfind_by_namemodel_has_infer_metadatanormalize_axispack_bytes_to_4bitquantize_dataquantize_nparray&save_and_reload_model_with_shape_infertensor_proto_to_array)TensorQuantOverridesHelperc                   J    e Zd Zdeeef         fdZd	dZd Zd Z	d Z
d ZdS )
QuantizationParamsdatac                 h   i | _         |                                D ]\  }}t          |t                    s#t	          dt          |           d|d          |dk    rJt          |t          t          t          j        f          s#t	          dt          |           d|d          |dk    r7t          |t                    s"| t	          dt          |           d          |dk    r9|j	        t          j
        t          j        fvrt          d|j	         d|          || j         |<   d S )	NzKeys must be strings not z for k=.axisz1Values must be numpy arrays, int, float, str not z'Axis value must be an int or None, not scalez5scale must a float32 or float16 numpy element but is )r   items
isinstancestr	TypeErrortypeintnpndarraydtypefloat32float16
ValueError)selfr   kvs       c/var/www/html/ai-engine/env/lib/python3.11/site-packages/onnxruntime/quantization/base_quantizer.py__init__zQuantizationParams.__init__%   s@   	JJLL 		 		DAqa%% T RDGG R RA R R RSSSF{{:a#sBJ1G#H#H{ jTXYZT[T[ j jde j j jkkkF{{:a#5#5{!- T$q'' T T TUUUG||
BJ/G G G !nYZY`!n!nij!n!noooDIaLL		 		    Nc                 8    | j                             ||          S N)r   get)r'   keydefault_values      r*   r/   zQuantizationParams.get2   s    y}}S-000r,   c              #   $   K   | j         E d {V  d S r.   r   r'   s    r*   __iter__zQuantizationParams.__iter__5   s&      9r,   c                     | j         |         S r.   r3   )r'   r0   s     r*   __getitem__zQuantizationParams.__getitem__8   s    y~r,   c                     || j         |<   d S r.   r3   )r'   r0   values      r*   __setitem__zQuantizationParams.__setitem__;   s    	#r,   c                 *    t          | j                  S r.   )lenr   r4   s    r*   __len__zQuantizationParams.__len__>   s    49~~r,   r.   )__name__
__module____qualname__r   r   r   r+   r/   r5   r7   r:   r=    r,   r*   r   r   $   s        tCH~    1 1 1 1          r,   r   c                       e Zd Z	 ddZdej        j        defdZd Z	d Z
d Zd	 Zd
 Zd ZddZddZ	 	 ddZd ZdS )BaseQuantizerNc                    t          |          st          |          }d |j        j        D             | _        | j                            d |j        j        D                        | j                            d |j        j        D                        t          |          | _	        || _
        || _        |
r|
ni | _        d| j        v o| j        d         | _        d | _        d| j        v o| j        d         | _        | j                            dd           | _        | j                            dd          | _        | j                            d	          | _        t)          |d
|          | _        t)          |d
|          | _        	 |lt/          t1          d |                                                    r<t5          dt7          d |                                D                        d          || _        || _        || _        |	| _        |                                  | _!        tE          | j                            di                     | _#        d | j	        $                                D             | _%        | j#        &                    | j%        | j        '                                |          \  }}|stQ          |          | j#        )                                | _*        d S )Nc                     i | ]
}|j         |S rA   name).0vis     r*   
<dictcomp>z*BaseQuantizer.__init__.<locals>.<dictcomp>R   s    IIIBBGRIIIr,   c                     i | ]
}|j         |S rA   rF   )rH   ots     r*   rJ   z*BaseQuantizer.__init__.<locals>.<dictcomp>S   s     J J J" J J Jr,   c                     i | ]
}|j         |S rA   rF   )rH   its     r*   rJ   z*BaseQuantizer.__init__.<locals>.<dictcomp>T   s     I I I" I I Ir,   EnableSubgraphForceQuantizeNoInputCheckWeightSymmetricActivationSymmetricFMinimumRealRangetensor_typec                 .    t          | t                     S r.   )r   r   )ts    r*   <lambda>z(BaseQuantizer.__init__.<locals>.<lambda>w   s    z!Z?X?X;X r,   z(tensors_range contains unexpected types c              3   4   K   | ]}t          |          V  d S r.   )r   )rH   r)   s     r*   	<genexpr>z)BaseQuantizer.__init__.<locals>.<genexpr>y   s(      >g>g1tAww>g>g>g>g>g>gr,   z, not TensorData.TensorQuantOverridesc                     i | ]
}|j         |S rA   rF   )rH   initzers     r*   rJ   z*BaseQuantizer.__init__.<locals>.<dictcomp>   s    [[[wW\7[[[r,   )+r   r   graph
value_infovalue_infosupdateoutputinputr   modelper_channelreduce_rangeextra_optionsenable_subgraph_quantizationparentforce_quantize_no_input_checkr/   _is_weight_symmetricis_activation_symmetricmin_real_rangegetattractivation_qTypeweight_qTypeanymapvaluesr   settensors_rangenodes_to_quantizenodes_to_excludeop_types_to_quantizecheck_opset_versionopset_versionr   tensor_quant_overridesinitializerinitializersis_validkeysr&   get_quant_typestensor_quant_override_qtypes)r'   rc   rd   re   ro   rn   rt   ru   rv   rw   rf   overrides_validoverrides_errs                r*   r+   zBaseQuantizer.__init__C   s    (.. 	B:5AAEII%+2HIII J Ju{7I J J JKKK I Iu{7H I I IJJJu%%
&(.;C]] 22[t7IJZ7[ 	) '4+==q$BTUpBq 	* 261C1G1GHY[_1`1`!'+'9'='=>SUZ'['[$"0445GHH '(8-IY Z Z#L-NN
	 $S1X1XZgZnZnZpZp-q-q)r)r$z3>g>gP]PdPdPfPf>g>g>g;g;gzzz   +!2 0$8!!5577 'AASAWAWXnprAsAs&t&t#[[$*BXBXBZBZ[[[)-)D)M)Mt/44668H*
 *
&  	,]+++,0,G,W,W,Y,Y)))r,   weight_quant_typereturnc                     | j         | j         S |t          j        j        t          j        j        t          j        j        t          j        j        fv S r.   )rj   onnxTensorProtoINT4INT8INT16FLOAT8E4M3FN)r'   r   s     r*   is_weight_symmetricz!BaseQuantizer.is_weight_symmetric   sJ    $0,, !!")	%
 
 	
r,   c                     t           r.   )NotImplementedErrorr4   s    r*   quantize_modelzBaseQuantizer.quantize_model   s    !!r,   c                 X    t          || j                                                  }|d uS r.   )r   rc   r{   )r'   
input_namer{   s      r*   is_input_a_initializerz$BaseQuantizer.is_input_a_initializer   s*    ":tz/E/E/G/GHH$&&r,   c                     | j         S r.   )rd   r4   s    r*   is_per_channelzBaseQuantizer.is_per_channel   s    r,   c                     t          || j                                                  }|)|j        t          j        j        t          j        j        fv S | j        r| j	        dS | j	        
                    |          S )NF)r   rc   r{   	data_typer   r   FLOATFLOAT16rg   rh   is_valid_quantize_weight)r'   weight_nameweights      r*   r   z&BaseQuantizer.is_valid_quantize_weight   sq    k4:+A+A+C+CDD#(8(>@P@X'YYY1 	t{7J5{33K@@@r,   c                     | j         (t          | j                   dk    r|j        | j         vrdS |j        | j        vrdS | j        |j        | j        v rdS dS )Nr   FT)ru   r<   rG   op_typerw   rv   )r'   nodes     r*   should_quantize_nodez"BaseQuantizer.should_quantize_node   sm    ".D*++q00	!7775<t8885 ,d>S1S1S5tr,   c                 d   d | j         j         j        D             }t          |          dk    rt          d          |d         j        }|dk    rt          j        d| d           dS |dk     rt          j        d| d           | j         j         j                            |d                    | j         j         j                            t          j
                            d	d
          g           d
}|dk     r| j        t          j        j        k    rt          j        d| d           | j         j         j                            |d                    | j         j         j                            t          j
                            d	d          g           d| j         j         _        d}|S )Nc                 6    g | ]}|j         r|j         d k    |S )zai.onnx)domain)rH   opsets     r*   
<listcomp>z5BaseQuantizer.check_opset_version.<locals>.<listcomp>   s7     
 
 
EL
TYT`dmTmTmETmTmTmr,   r   z$Failed to find proper ai.onnx domainr   
   z$The original model opset version is ze, which does not support node fusions. Please update the model to opset >= 11 for better performance.z, which does not support quantization. Please update the model to opset >= 11. Updating the model automatically to opset 11. Please verify the quantized model.       z, which does not support quantization to float 8. Please update the model to opset >= 19. Updating the model automatically to opset 19. Please verify the quantized model.	   )rc   opset_importr<   r&   versionloggingwarningremoveextendr   helpermake_opsetidro   r   r   
ir_version)r'   ai_onnx_domainry   s      r*   rx   z!BaseQuantizer.check_opset_version   s   
 
#z/<
 
 
 ~!##CDDD&q)1BO l}  l  l  l   22O f}  f  f  f   J)001BCCCJ)00$+2J2J2r2R2R1STTTM2$"3t7G7T"T"TO5} 5 5 5  
 J)001BCCCJ)00$+2J2J2r2R2R1STTT*+DJ'Mr,         ?c                 n	   t          || j                                                  }t          |          }|t          z   }| j        t          j        j        k    r	t          j
        |          }|j        t          j        k    rt          j        j        }	n?|j        t          j        k    rt          j        j        }	nt!          d|j         d          |                    t          j                  }
t          j        dg|
j                  }|                    d          }t          j                            |
|          }| j                            |g           d}n||z  |z  }t          j
        |t          j                  t          j
        |t          j                  z  }
|
                                }
t          j        t          j        t          j                  j                  }t          j        t          j        t          j                  j                  }t          j        |
|k               st          j        |
|k              rt=          j        d| d           t          j         |
||                              t          j                  }
t          j
        |
t          j                                      |j!                  }t          j                            ||          }| j                            |g           t          j
        ||j                                      d          }d	}| j        }	|d
z   }t          j                            ||          }| j                            |g           | j        t          j        j        k    r| j        }nt          j        j"        }|dz   }| j        t          j        j        k    r*t          j#        $                    || j        dgdg          }n|j%        dk    rYt          j&        |j'        t          j                                      d          }t          j                            ||          }n#t          j#        $                    ||g dg          }| j                            |g           ||||||	fS )z]
        Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
        zEOnly float16 or float32 are supported with float 8 but bias dtype is r   r   r#   CastzQuantized bias `z<` exceeds the range of a int32. The bias scale is too small.DequantizeLinear_scale_zero_point        r   )(r   rc   r{   r   r
   ro   r   r   r   r!   asarrayr#   r%   r   r$   r   r   astypearrayreshapenumpy_helper
from_arrayinitializer_extendfloat64roundiinfoint32minmaxrp   r   r   clipdimsINT32r   make_tensorsizezerosshape)r'   	bias_nameinput_scaleweight_scalebetabias_initializer	bias_dataquantized_bias_namer   
node_qtypequantized_data
bias_scalebias_scale_datapacked_bias_initializer	node_type	int32_min	int32_maxbias_np_dataquantized_bias_scale_namepacked_bias_scale_initializerrT   quantized_bias_zp_namepacked_bias_zp_initializerbias_zp_datas                           r*   quantize_bias_static_implz'BaseQuantizer.quantize_bias_static_impl   s-    (	4:3I3I3K3KLL)*:;;	'*BB  0 ===:i((DzRZ''!-5

rz))!-3

 uhlhr u u uvvv![[44N1#^-ABBBJ(0044O&*&7&B&B>Sf&g&g#J))+B*CDDDII %|3d:J  Z	DDDrzR\dfdnGoGoGooN+1133N 
28BH#5#5#9::I
28BH#5#5#9::Ivny011 RVNY<V5W5W nynnn    W^Y	JJQQRTRZ[[N :nBHEEEMMN^NcddL&*&7&B&B<Qd&e&e#J))+B*CDDD !j9?KKKSSTVWWO*I*J %8($B!(,(9(D(D_Vo(p(p%
%%'D&EFFF  0 ===+KK*0K!4}!D 0 ===)-)@)@AWY]Yjmnloruqv)w)w&&_q  8J$4BHEEEMMbQQL)-):)E)ElTj)k)k&&)-)@)@AWYdfhkljm)n)n&
%%'A&BCCC  %"
 	
r,   Fc                 2   |j         t          z   }|j         dz   }|j         dz   }t          |          }| j                            |j         i           }	d|	v r|	d         j        }d|	v rd|	v rt          j        |	d         t          |                   }
t          j        |	d                   }t          ||
                                ||
          }t          |
t          j                  sJ dt          |
                       |
j        t          j        k    r|
j        t          j        k    sJ d	|
j                     t          |t          j                  sJ dt          |                       nQ|| j        k    r|                     |          n| j        }t)          |
                                ||	                    d
|          |	                    d| j        o|          | j        |	                    d          |	                    d                    \  }
}}t          |
t          j                  sJ dt          |
                       |
j        t          j        k    r|
j        t          j        k    sJ d	|
j                     t          |t          j                  sJ dt          |                       |j        }t2          j                            ||g |                    d                                                    }t2          j                            ||g |
                    d                                                    }| j                            ||g           |s| j        t2          j         j!        k    rFt3          j                     }| j        |_        |j"        #                    |j"                   ||_         |
                                $                                %                                |_&        tN          tO          |          }|j(        |j(        k    s*|%                                |%                                k    rrtS          d|j(         d|%                                dd          d|%                                dd          d|j(         dtU          |          dd          d          n|t2          j         j+        t2          j         j,        fv r|j        t          j-        t          j.        fvrtS          d| d          t_          ta          |%                                                    }t2          j                            |||j"        |d          }nkt          j1        |t2          j        2                    |                                        |j"                  }t2          j3        4                    ||          }| j                            |g           |||fS )a  
        :param weight: TensorProto initializer
        :param qType: type to quantize to
        :param keep_float_weight: Whether to quantize the weight. In some cases, we only want to qunatize scale and zero point.
                                  If keep_float_weight is False, quantize the weight, or don't quantize the weight.
        :return: quantized weight name, zero point name, scale name
        r   r   default_val
quant_typer   
zero_pointr   Unexpected type Unexpected dtype 	symmetricre   rminrmaxre   rl   rmin_overridermax_override)r   NzThe initializer of shape z! could not be created, expecting r   z, got z and shape=z
raw=   r   Quantized weights for . must be 8-bit before packing as 4-bit values.Traw)5rG   r
   r   rz   get_per_tensor_overridesrT   r!   r   r	   r   flattenr   r"   r   r#   r$   r%   ro   r   rk   r   r/   re   rl   r   r   r   r   r   tolistrc   r   r   r   r   r   copytobytesraw_datar   r   RuntimeErrorr   r   UINT4int8uint8bytesr   r   tensor_dtype_to_np_dtyper   r   )r'   r   qTypere   keep_float_weightq_weight_namezp_name
scale_nameweight_dataquant_overridesr   r   q_weight_datar   scale_dtypescale_initializerzero_initializerq_weight_initializercheckpacked_datas                       r*   quantize_initializer_implz'BaseQuantizer.quantize_initializer_impl1  s    &>>+-[8+
 ,F335NNv{hjNkk?**#L1=Eo%%,/*I*I/,"?G[\aGbcccJH_W566E,UK4G4G4I4I5R\]]Mj"*55\\7\$zJZJZ7\7\\\\ BJ..:3Crz3Q3Q3Q5:#355 4R3Q3QeRZ00RR2RT%[[2R2RRRRR <ADDU;U;U00777[_[wI/<##%%##K;;,00ARAcWcdd#2-11&99-11&990 0 0,J} j"*55\\7\$zJZJZ7\7\\\\ BJ..:3Crz3Q3Q3Q5:#355 4R3Q3QeRZ00RR2RT%[[2R2RRRR& K33JRQVQ^Q^_dQeQeQlQlQnQnoo;227E2zGYGYZ_G`G`GgGgGiGijj
%%'8:J&KLLL  #	B D$4$AAA'+'7'9'9$151B$.$)00===,9$)0=0E0E0G0G0L0L0N0N0V0V0X0X$-$0 ..BCCE{k&7775==??mNcNcNeNe;e;e*H8I H H,4466ss;H HCH==??SVTVSVCWH HdjdpH H%()=%>%>tt%DH H H  
 4+0$2B2HIII &rw.AAA&nnnn   $$6}7L7L7N7N$O$OPP (,{'>'>}eU[U`bmsw'>'x'x$$ "
=@d@dej@k@k l l l t tK! ! (,'8'C'CMS`'a'a$J))+?*@AAAgz11r,   Tc                 @   t          || j                                                  }|t          d|          t	          |          }t          |j                  }t          ||          \  }	}
|	st          d| d| d|           |
}|j        |         }| j        	                    |d|ig          }t          |          }|dk    r||k    rt          d| d	| d
          t          |d         d         |          \  }}|r||k    r%t          d| d| d|d         d          d          d|d         v r|d         d         j
        }|d                             d|                     |                    }|d                             d| j        o|          }g }g }g }t          |j                  }t          |          }d||<   t          |          D ]}|                    ||          }||k     r|nd}||         }d|v rDd|v r?t#          j        |d         t&          |                   }t#          j        |d                   }t)          ||                                ||          }t-          |t"          j                  sJ dt1          |                       |j        t"          j        k    r|j        t"          j        k    sJ d|j                     t-          |t"          j                  sJ dt1          |                       t-          |t"          j                  sJ dt1          |                       n/t9          |                                |||| j        |                    d          |                    d                    \  }}}t-          |t"          j                  sJ dt1          |                       |j        t"          j        k    r|j        t"          j        k    sJ d|j                     t-          |t"          j                  sJ dt1          |                       t-          |t"          j                  sJ dt1          |                       |                    |           |                    |           |                    t#          j        |                               |                     	t#          j!        ||          }|tD          z   }|dz   }|dz   } |j#        |         g}!tH          j%        &                    | |j'        |!t#          j(        |          )                                          }"tH          j%        &                    |||!t#          j(        |          )                                          }#| j        *                    |"|#g           |sJ|tH          j+        j,        tH          j+        j-        fv r|j        t"          j.        t"          j/        fvrta          d| d          tc          te          |3                                                    }$tH          j%        &                    ||||$d           }%| j        *                    |%g           nt#          j        |tH          j%        4                    |                                         |j#                  }tH          j5        6                    ||          }%| j        *                    |%g           ||| fS )!Nz{} is not an initializerzWeight z# has a per-channel axis with value z  that is out-of-bounds for rank r   r   r   z.Per-channel tensor quantization overrides for z must have either 1 or z& elements in the list of dictionaries.r   z"Tensor quantization overrides for z& specify an unexpected axis. Expected z
, but got r   r   r   re   r   r   r   r   r   r   r   r   r   r   r   r   Tr   )7r   rc   r{   r&   r   r<   r   r   rz   get_per_channel_overridesrT   r/   r   re   listrangetaker!   r   r	   r   r   r   r"   r   r#   r$   r%   r   rl   appendr   r   concatenater
   r   r   r   r   r   hstackr   r   r   r   r   r  r  r   r  r   r   r  r   r   )&r'   r   ro   channel_axisre   r  r{   weightsweights_rankis_axis_valid	axis_normchannel_countquant_overrides_for_channelsnum_channel_overridesis_axis_override_validaxis_overrider   zero_point_list
scale_listquantized_per_channel_data_listweights_shapereshape_dimsiper_channel_datachannel_override_indexchannel_quant_overridesr   r   quantized_per_channel_dataquantized_weightsr  r  r	  zero_scale_shaper  r  r  r  s&                                         r*    quantize_weight_per_channel_implz.BaseQuantizer.quantize_weight_per_channel_impl  s    #;
0F0F0H0HII7EEE'447=))#1,#M#M y 	9+ 9 9, 9 9*69 9  
 !l3'+'B'\'\v|&<%= (] (
 (
$ !$$@ A A A%%*?=*P*PU U U,U U U  
 1??[\]?^_e?fht0u0u-% 	,)F)F_[ _ _(_ _4PQR4STZ4[_ _ _   7:::7:<HTL0377TE]E]^jEkEkll	3A6::>4K\Kmamnn
*,'W]++M**%&\"}%% *	q *	qA&||A|<<*+.C*C*CQQ"&BCY&Z#111lF]6]6]X&=l&KSghtSuvvv
!8!ABB-= "2":":"<"<eZ. .* "*bj99``;`dS]N^N^;`;````$
22z7G2:7U7U7U9z'799 8V7U7U!%44VV6Ve6V6VVVV!.
  I IHd+E&F&FHHI I I I
 AN$,,.. !-#'#6"9"="=f"E"E"9"="=f"E"EA A A=
E#= "*bj99``;`dS]N^N^;`;````$
22z7G2:7U7U7U9z'799 8V7U7U!%44VV6Ve6V6VVVV!.
  I IHd+E&F&FHHI I I "":...e$$$+222:>X3Y3Y3a3abn3o3opppp N+JLYY#&>>- 8+
 (,\:; K33-/?:AVAVA]A]A_A_
 
  ;22\#3RY5O5O5V5V5X5X
 
 	
%%'8:J&KLLL  	F 0 5t7G7MNNN$*27BH2EEE&nnnn   $$67H7P7P7R7R$S$STT (,{'>'>!<QU (? ( ($ 
--/C.DEEEE$&J%+>>|LL% % % '+*++ " (,'8'C'CDUWd'e'e$
--/C.DEEEgz11r,   c                    | j         d S | j                                        D ]b}|j        dv r|                     |          s"t          | j                                        |j        d                            dk    r^|j        d         | j         vs|j        d         | j         vr| j         |j        d                  }t          |t                    s.t          dt          |           d|j        d         d          || j         |j        d         <   |j        dk    r_|                     |          st          t          j        d          t          j        d	          
          | j         |j        d         <   dd S )N)ClipRelur   r   r   z for r   Softmaxr   r   )lowesthighest)rt   rc   nodesr   r   r<   input_name_to_nodesrb   ra   r   r   r   r   r!   r$   )r'   r   tds      r*   adjust_tensor_rangesz"BaseQuantizer.adjust_tensor_ranges  s   %FJ$$&& 	q 	qD|///0066 tz5577
1FGG1LL:a=(:::dk!nTXTf>f>f'A7!"j11 [#$YtBxx$Y$YdkRSn$Y$Y$YZZZ46"4:a=11**0066 5?rzRU`b`jkn`o`o5p5p5p"4;q>2#	q 	qr,   r.   )r   )FF)TF)r>   r?   r@   r+   r   r   DataTypeboolr   r   r   r   r   r   rx   r   r  r2  r<  rA   r,   r*   rC   rC   B   s$        IZ IZ IZ IZV
T5E5N 
SW 
 
 
 
" " "' ' '     A A A   ! ! !FR
 R
 R
 R
hY2 Y2 Y2 Y2@ L2 L2 L2 L2\q q q q qr,   rC   )r   typingr   r   numpyr!   r   onnx.numpy_helperonnx.reference.op_runr   ImportError	calibrater   
onnx_modelr   quant_utilsr	   r
   r   r   r   r   r   r   r   r   rz   r   r   rC   rA   r,   r*   <module>rG     s                    7777777    " ! ! ! ! ! ! ! ! ! ! !                        ? > > > > >       <mq mq mq mq mq mq mq mq mq mqs   ! ++