
     Ng                    8   d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	Z	d dl
Z
d dl
mZmZmZ d dl
mZ d dlmZmZmZmZ d dlmZ d d	lmZmZmZ 	 d d
lmZ n# e$ r dZY nw xY w	 d dlmZmZ n# e$ r dZdZY nw xY w	 d dl m!Z! n# e$ r dZ!Y nw xY wdZ"dZ#dZ$dZ%dZ&dZ'dZ(dZ)dZ*dZ+i Z,d  e-e          D             Z. G d de          Z/ G d de          Z0 G d de          Z1 G d de          Z2ej        j3         e	j4        d           ej        j5         e	j4        d!          ej        j6         e	j4        d"          ej        j7         e	j4        d#          ej        j8        eej        j9        eej        j:        eiZ;ej        j5         e	j<        d e	j=        $           e	j<        d%e	j=        $          fej        j3         e	j<        d&e	j>        $           e	j<        d'e	j>        $          fej        j7         e	j<        d e	j?        $           e	j<        d(e	j?        $          fej        j6         e	j<        d)e	j@        $           e	j<        d*e	j@        $          fej        j:         e	j<        d e$           e	j<        d+e$          fej        j9         e	j<        d,e$           e	j<        d-e$          fiZAej        j5         e	j<        d e	j=        $           e	j<        d.e	j=        $          fej        j3         e	j<        d/e	j>        $           e	j<        d'e	j>        $          fej        j7         e	j<        d e	j?        $           e	j<        d0e	j?        $          fej        j6         e	j<        d1e	j@        $           e	j<        d*e	j@        $          fiZBej        j5         e	j<        d e	j=        $           e	j<        d'e	j=        $          fej        j3         e	j<        d2e	j>        $           e	j<        d3e	j>        $          fej        j7         e	j<        d e	j?        $           e	j<        d*e	j?        $          fej        j6         e	j<        d4e	j@        $           e	j<        d5e	j@        $          fej        j:         e	j<        d e$           e	j<        d-e$          fej        j9         e	j<        d6e$           e	j<        d7e$          fiZCd8d9d:ZDdd;ZEdd=ZFd> ZG	 	 	 	 dddLZH	 dddNZI	 	 dddWZJddXZKddYZLdd]ZMddaZN G db dc          ZO G dd de          ZP G df dg          ZQdh ZRdi ZSdj ZTdk ZUddpZVdq ZWddsZXdduZYddwZZddyZ[dd|Z\dd}Z]dd~Z^ddZ_ddZ`ddZaddZbddZcddZdddZeddZfddZgddZhdS )    )annotationsN)Enum)Path)
ModelProtoTensorProtoexternal_data_helper)onnx_pb)
make_graph
make_model	make_nodemake_tensor_value_info)ReferenceEvaluator)GraphOptimizationLevelInferenceSessionSessionOptionsfloat8e4m3fn)int4uint4)to_array_extendedzonnx.quantizez0.1.0zai.onnxzcom.microsoftQuantizeLinear_QuantizeLinear_InputDequantizeLinear_DequantizeLinear_Output
_quantizedl        c                    i | ]@}t          t          t          |          t                    *t          t          |          |AS  )
isinstancegetattrr   int).0ks     `/var/www/html/ai-engine/env/lib/python3.11/site-packages/onnxruntime/quantization/quant_utils.py
<dictcomp>r$   8   sA    qqqq
SZ[fhiSjSjloHpHpqQ''qqq    c                  2    e Zd ZdZdZd Zed             ZdS )QuantizationModer      c                    | j         S Nnameselfs    r#   __str__zQuantizationMode.__str__C   
    yr%   c                V    	 t           |          S # t          $ r t                      w xY wr*   )r'   KeyError
ValueError)modes    r#   from_stringzQuantizationMode.from_stringF   s7    	#D)) 	 	 	,,	    (N)__name__
__module____qualname__
IntegerOps
QLinearOpsr/   staticmethodr5   r   r%   r#   r'   r'   ?   sH        JJ     \  r%   r'   c                  2    e Zd ZdZdZd Zed             ZdS )QuantizedValueTyper   r(   c                    | j         S r*   r+   r-   s    r#   r/   zQuantizedValueType.__str__R   r0   r%   c                V    	 t           |          S # t          $ r t                      w xY wr*   )r>   r2   r3   )vs    r#   r5   zQuantizedValueType.from_stringU   s7    	%a(( 	 	 	,,	r6   N)r7   r8   r9   InputInitializerr/   r<   r5   r   r%   r#   r>   r>   N   sH        EK     \  r%   r>   c                  \    e Zd ZdZdZdZdZdZdZdZ	d Z
ed	             Zed
             ZdS )	QuantTyper   r(                  c                    | j         S r*   r+   r-   s    r#   r/   zQuantType.__str__f   r0   r%   c                V    	 t           |          S # t          $ r t                      w xY wr*   )rE   r2   r3   )ts    r#   r5   zQuantType.from_stringi   s6    	Q< 	 	 	,,	r6   c                   | t           j        k    rt          j        S | t           j        k    rt          j        S | t           j        k    rt          j        S | t           j        k    rt          j	        S | t           j
        k    rt          j        S | t           j        k    rt          j        S | t           j        k    rt          j        S t!          d| d          )NzUnexpected value qtype=.)rE   QInt8r   INT8QUInt8UINT8QUInt16UINT16QInt16INT16QFLOAT8E4M3FNFLOAT8E4M3FNQUInt4UINT4QInt4INT4r3   r-   s    r#   tensor_typezQuantType.tensor_typep   s    9?""##9###$$9$$$%%9###$$9***++9###$$9?""##<4<<<===r%   N)r7   r8   r9   rP   rR   rX   rV   rT   r\   rZ   r/   r<   r5   propertyr^   r   r%   r#   rE   rE   ]   s|        EFMFGEF     \ > > X> > >r%   rE   c                  2    e Zd ZdZdZd Zed             ZdS )QuantFormatr   r(   c                    | j         S r*   r+   r-   s    r#   r/   zQuantFormat.__str__   r0   r%   c                V    	 t           |          S # t          $ r t                      w xY wr*   )ra   r2   r3   )formats    r#   r5   zQuantFormat.from_string   s7    	v&& 	 	 	,,	r6   N)r7   r8   r9   	QOperatorQDQr/   r<   r5   r   r%   r#   ra   ra      sH        I
C     \  r%   ra   int8uint8int16uint16dtype   i   i  i i     i      ii  ii@   i i @  rG   zero_point_indexc                T   g }t          |          D ]\  }}t          j        t          |          t          j                  r(|                    t          j        |                     nEt          |t          j                  r|                    |           nt          d| d|           || k    rI|d         }|j
        t          j        k    s|j
        t          j        k    rt          d|j
                   t          |          dk    rt          |          n|d         S )Nzarg z is not an array: rt   zzero_point cannot be r(   r   )	enumeratenumpy
issubdtypetypenumberappendarrayr   ndarray	TypeErrorrl   float32float16lentuple)rv   argsnew_argsiarA   s         r#   _check_typer      s   H$ 
C 
C1DGGU\22 	=OOEKNN++++5=)) 	=OOA;1;;;;<<<   Aw%-''17em+C+C A A ABBB!(mma//5???Xa[@r%   c                   | t           v sJ d|  d            | t          j        j        t          j        j        t          j        j        t          j        j        fv rF|dk    rt          d|d          |j        t          j
        k    rt          j        }n:|j        t          j        k    rt          j        }nt          d|j         d          t          t!          t#          dg dgt$          j                            d| g dg          	          t#          d
g ddg          gdt+          d|d           t+          d|d           gt+          d| d           g                    }t-          |          }t/          |                    d ||d          d                   S t           |          }	t3          | dd          \  }
}|t5          |
|          n|
}|t7          ||          n|}t          j        |                    t          j
                  |z                                  |z             }t          j        ||||           t/          |                    |	                    S )NUnexpected data type > requested. Only INT8, UINT8, INT16, and UINT16 are supported.r   z2zero_point is expected to be null for float 8 not rO   zUnexpected dtype Constant
zero_point)valuer   )Xscaler   Yqur   r   )r   r   F)reduce_range	symmetric)out) ONNX_TYPE_TO_NP_TYPE
onnx_protor   rY   FLOAT8E4M3FNUZ
FLOAT8E5M2FLOAT8E5M2FNUZNotImplementedErrorrl   ry   r   FLOATr   FLOAT16r3   r   r
   r   onnxhelpermake_tensorr   r   r   runget_qmin_qmax_for_qTypemaxminasarrayastyperoundclip)qTypearrr   r   lowhigh	onnx_type
onnx_modelrefrl   qminqmaxcliplowcliphigharr_fp32s                  r#   quantize_nparrayr      st   %%%%duddd 	&%%+-)-	   ??%&j[e&j&j&jkkk9%%#)IIY%-''#+II====>>>"Bdk>U>UVbdikmpqor>s>s   .0L0L0LseTT	 *3	4@@*7ItDD (UD99: 
 

  !,,3774sU)C)CDDQGHHH %U+,URWXXX
d$'O#dC...&*&63tT???D=#**U]";";e"C!J!J!L!Lz!YZZ
8WhH====8??511222r%   Fc           	        |dk    s|dk     rt          d| d|           t          j        | t          j        d| j                            } t          j        |t          j        d|j                            }|,t          || t          j        || j                  z             }|r?t          j        t          j        |           t          j        |                    }| } |
 }||k    sJ d|  d|             t          j        || z
  t          j	                  }t          j        |t          j	                  t          j        |t          j	                  z
  }t          j        ||z            }	|	dk    s
J d            |	t          j
        |j                  j        k     r7t          j        d	|j                  }	t          j        d|j                  }
n|rRt          j        t          j        ||z   t          j        d
t          j	                  z            |j                  }
n3t          j        t          j        || |	z  z
            |j                  }
|	                    |j                  }	|
|	gS )a  Calculate the scale s and zero point z for the quantization relation
    r = s(q-z), where r are the original values and q are the corresponding
    quantized values.

    r and z are calculated such that every value within [rmin,rmax] has an
    approximate representation within [qmin,qmax]. In addition, qmin <= z <=
    qmax is enforced. If the symmetric flag is set to True, the interval
    [rmin,rmax] is symmetrized to [-absmax, +absmax], where
    absmax = max(abs(rmin), abs(rmax)).

    :parameter rmin: minimum value of r
    :parameter rmax: maximum value of r
    :parameter qmin: minimum value representable by the target quantization data type
    :parameter qmax: maximum value representable by the target quantization data type
    :parameter symmetric: True if the floating-point range should be made symmetric. Defaults to False.
    :parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
    :return: zero and scale [z, s]

    r   Bqmin and qmax must meet requirement: qmin <= 0 <= qmax while qmin:, qmmax:rk   Nzqmin=z > qmax=z
scale isse      ?g       @)r3   ry   minimumr~   rl   maximumr   r   absfloat64finfotinyr   r   )rminrmaxr   r   r   min_real_rangeabsmaxdrdqr   r   s              r#   compute_scale_zpr      sQ   ( axx4!88r^brrlprrsss
 =u{1DJ???@@D=u{1DJ???@@D !4nDJ O O OOPP uy	$@@ww4<<<555t55<<<	TD[	6	6	6B	T	/	/	/%+d%-2X2X2X	XBKR  EA:::|:::u{4:&&+++Ctz222[$*555

 
	Y TD[EK5=,Q,Q,QQRRZ^Zd  JJ U[u1D%E%ETZXXXJTZ((r%   c                &  	 d}| t           vr| t          j        k    rTddlm	 ddlm} |}	fdt          d          D             }t          j	        d |D             t          j
                  }nt          d	|  d
          |t           | <   n| t          j        k    rddlm} |}|t          d|  d          t          j        t           |                    }t          j	        d|          }t          j	        ||z  |j                  }||gS )ar  Calculate the scale s for a float8 type (E4M3FN).
    The function assumes the coefficient distribution and the float 8
    distribution are similar to two gaussian laws.

    :return: zero and scale [z, s]

    More details in notebook `quantization_fp8.ipynb
    <https://github.com/microsoft/onnxruntime/blob/main/docs/python/notebooks/quantization_fp8.ipynb>`_.
    Nr   )float8e4m3_to_float32r   c                &    g | ]} |          S r   r   )r!   r   r   s     r#   
<listcomp>z+compute_scale_zp_float8.<locals>.<listcomp>J  s%    GGGq//22GGGr%      c                b    g | ],}t          j        |          t          j        |          *|-S r   )ry   isnanisinf)r!   fs     r#   r   z+compute_scale_zp_float8.<locals>.<listcomp>L  s3    TTTqek!nnTU[QR^^TTTTr%   rk   zQuantization to element_type=z not implemented.zUnexpected element_type rO   )FLOAT8_DISTRIBUTIONSr   rY   onnx.numpy_helperr   #onnx.reference.custom_element_typesr   rangery   r~   r   r3   r   stdrl   )
element_typer   zp_dtyper   
all_valuesvaluesstd_f8zeror   r   s
            @r#   compute_scale_zp_float8r   9  sW    H///;333??????HHHHHH#HGGGGE#JJGGGJ[TTJTTT\a\i  FF \\\\\]]]-3\**	1	1	1DDDDDDB<BBBCCCY+L9::F;q)))DKfCI666E%=r%   datanumpy.ndarray
quant_typeonnx.TensorProto.DataTyper   boolr   r   float | Nonermin_overridermax_overridereturn#tuple[numpy.ndarray, numpy.ndarray]c                   t          | t          j                  s t          dt	          |            d          ||}n%t          |           r|                                 nd}||}n%t          |           r|                                 nd}t          j        || j	                  }t          j        || j	                  }t          j        d| j	                  }	|t          j        k    rJ|rt          d          t          j        |           }
t          ||
          \  }}	t          ||	d	          S |t          j        t          j        t          j        t          j        t          j        t          j        fv rit-          |||
          \  }}t          |           rt/          ||||||          \  }}	nt          j        d|j	                  }t          ||	d	          S t1          d| d          )a  
    Returns the zero_point and scale for the given data.

    :param data: The data for which to compute quantization parameters.
    :param quant_type: The quantization data type.
    :param symmetric: whether symmetric quantization is used or not.
    :parameter reduce_range: True if the quantization range should be reduced. Defaults to False.
    :parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
    :parameter rmin_override: The value of rmin to use if not None. Otherwise, uses min(data).
    :parameter rmax_override: The value of rmax to use if not None. Otherwise, uses max(data).
    :return: zero point and scale
    z%Weight must be given as an array not rO   Ng        rk   r   z1Unsupported option reduce_range=True for float 8.r   ru   r   z Unexpected value for quant_type=)r   ry   r   r   r{   r   r   r   r~   rl   r   rY   RuntimeErrorr   r   r   rQ   rS   rW   rU   r]   r[   r   r   r3   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   s                 r#   compute_data_quant_paramsr   ^  s   * dEM** OMT

MMMNNN  YY/txxzzzC  YY/txxzzzC;t4:...D;t4:...DK4:...E[--- 	TRSSSioo3JDD
E:uqAAAA   -ZQZ[[[
dt99 	: 0tT4Tb c cJQdj999J:uqAAAA
E
EEE
F
FFr%   2tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray]c                   t          | ||||||          \  }}|t          j        k    rt          || ||          }	t	          |	                    t          j                                                  dz  dk              rxt          j	        |           }
t          d|
                                 d|
                                 d|	                                 d|	                                 d	          |||	fS |t          j        t          j        t          j        t          j        t          j        t          j        fv rt          || ||          }	|||	fS t'          d| d          )al  
    :param data: data to quantize
    :param qType: data type to quantize to.
    :param symmetric: whether symmetric quantization is used or not.
    :parameter reduce_range: True if the quantization range should be reduced. Defaults to False.
    :parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
    :parameter rmin_override: The value of rmin to use if not None. Otherwise, uses min(data).
    :parameter rmax_override: The value of rmax to use if not None. Otherwise, uses max(data).
    :return: minimum, maximum, zero point, scale, and quantized weights

    To pack weights, we compute a linear transformation

    - when data `type == uint8` mode, from `[rmin, rmax]` -> :math:`[0, 2^{b-1}]` and
    - when data `type == int8`, from `[-m , m]` -> :math:`[-(2^{b-1}-1), 2^{b-1}-1]` where
        `m = max(abs(rmin), abs(rmax))`

    and add necessary intermediate nodes to transform quantized weight to full weight using the equation

    :math:`r = S(q-z)`, where

    - *r*: real original value
    - *q*: quantized value
    - *S*: scale
    - *z*: zero point
    rn   z+One of the quantized value is NaN data in [z, z], quantized_data in [z].zUnexpected value for qType=rO   )r   r   rY   r   anyr   ry   rh   ravelr   r   r   r   rQ   rS   rW   rU   r]   r[   r3   )r   r   r   r   r   r   r   r   r   quantized_datanp_datas              r#   quantize_datar     s   8 2 J ((()%ujII%%ek2288::S@SHII 	mD))GWgkkmm W Ww{{}} W W&4&8&8&:&:W W>L>P>P>R>RW W W   5.00   *%ujII5.00
;5;;;
<
<<r%   weightonnx.TensorProtor   r   axis
int | Nonequant_weight_name
str | Nonec                (   t          |           }d}|%t          ||                                ||          }n|j        |         }t	          |j                  }	d|	|<   g }
t          |          D ]}|                    ||          }||         }||         }t          ||                                ||          }|
                    t          j	        |          
                    |	                     t          j        |
|          }|r|n| j         t           }|t          j        j        k    rAt          j                    }||_        |j                            | j                   ||_        |                                                                                                |_        t0          t1          |          }|j        |j        k    s*|                                |                                k    rrt3          d|j         d|                                dd          d|                                dd          d| j         dt5          |          dd	          d
          n|t          j        j        t          j        j        fv r|j        t          j        t          j        fvrt3          d| d          tA          tC          |                                                    }t          j"        #                    ||| j        |d          }nmt          j"        $                    |          }t          j	        ||          
                    | j                  }t          j%        &                    ||          }|S )aG  
    Returns a quantized version of the given ONNX initializer.

    :param weight: The ONNX initializer to quantize.
    :param quant_type: The final quantized data type.
    :param zero_point: The zero-point value to use for quantization.
    :param scale: The scale value to use for quantization.
    :param axis: The quantization axis if quantizing per-channel. Defaults to None.
    :param quant_weight_name: The name of the quantized initializer.
                              If not specified, the quantized name is generated.
    :return: The quantized ONNX initializer.
    Nr(   zThe initializer of shape z! could not be created, expecting 
   z, got z and shape=z
raw=   rO   zQuantized weights for z. must be 8-bit before packing as 4-bit values.T)rawrk   )'tensor_proto_to_arrayr   r   shapelistr   taker}   ry   r   reshapeconcatenater,   TENSOR_NAME_QUANT_SUFFIXr   r   rY   	data_typedimsextendflattencopytobytesraw_datar   r   strr]   r[   rl   rg   rh   bytespack_bytes_to_4bitr   r   tensor_dtype_to_np_dtypenumpy_helper
from_array)r   r   r   r   r   r   weight_dataq_weight_datachannel_countchannel_dimsquantized_channel_data_listr   channel_datachannel_scalechannel_zero_pointquantized_channel_dataq_weight_nameq_weight_initializercheckpacked_dataquant_np_dtypes                        r#   quantize_onnx_initializerr    s   ( (//K*.M|([5F5F5H5H%Q[\\#)$/K-..T&(#}%% 	l 	lA&++At44L!!HM!+A%5L..00-AS& &" (..u}=S/T/T/\/\]i/j/jkkkk)*EtLL):j%%6;@jPh@j@jMT%222#/11)3&!((555$1!(5(=(=(?(?(D(D(F(F(N(N(P(P%( &&:;;E{k///5==??mF[F[F]F]3]3]"@0A @ @$,,..ss3@ @;@==??3B3;O@ @\b\h@ @ !566tt<@ @ @  
 
(-t/?/EF	F	Fuz5;&???uuuuvvv .}/D/D/F/FGGHH  ${66}jRXR]_jpt6uu==jIIm>JJJRRSYS^__#0;;M=YYr%   c                   | t           j        j        k    rt          d          d}|rt                              |           }n3|r| t          v rt          |          }nt                              |           }|st          d|  d          |\  }}|dk    s|dk     r&t          d| d| d|j	         d	| d
| d|            |S )z
    Return qmin and qmax, the minimum and maximum value representable by the given qType
    :parameter qType: onnx.onnx_pb.TensorProto.UINT8 or onnx.onnx_pb.TensorProto.UINT8
    :return: qmin, qmax
    z;This function is not implemented for float 8 as not needed.Nr   r   r   r   r   z, dtype=z, reduce_range=z, symmetric=z, qType=)
r   r   rY   r   ONNX_INT_TYPE_REDUCED_RANGEgetONNX_INT_TYPE_SYMMETRIC_RANGEONNX_INT_TYPE_RANGEr3   rl   )r   r   r   qranger   r   s         r#   r   r   (  s#    
&333!"_```F 0,0077	 0u ===.u5$((// xvvvvwwwJD$axx4!8844 4"&4 404
4 4KW4 4"4 4,14 4
 
 	
 Mr%   c                6    t          | ||          \  }}||z
  S )z
    Helper function to get the quantization range for a type.
        parameter qType: quantization type.
        return: quantization range.
    r   )r   )r   r   r   r   r   s        r#   get_qrange_for_qTyper&  H  s&     )	RRRJD$$;r%   r    ranktuple[bool, int]c                <    | dk     r| |z   n| }|dk    o||k     }||fS )z
    Helper function that tries to return a normalized axis in the range [0, rank - 1].
    :parameter axis: The axis to normalize.
    :parameter rank: The tensor rank (number of dimensions).
    :return (is_valid, axis_norm)
    r   r   )r   r'  	axis_normis_valids       r#   normalize_axisr,  R  s7      $axxtTIA~2)d"2HYr%   src_8bitr  	bytearrayc                "   t          |           }|dk    rt                      S |dz   dz  }t          |          }d}d}||dz
  k     r3| |dz            dz  dz  | |         dz  z  ||<   |dz  }|dz  }||dz
  k     3||k     r| |         dz  ||<   |S )aB  
    Copies a source array of 8-bit values into a destination bytearray of packed 4-bit values.
    Assumes that the source values are already in the appropriate int4 range.
    :parameter src_8bit: The 8-bit element values to pack.
    :return A bytearray with every two 8-bit src elements packed into a single byte.
    r   r(   rF   ro   rH   )r   r.  )r-  	num_elemsdst_sizedstsrc_idst_is         r#   r  r  ^  s     HIA~~{{A!#H
H

CEE )a-

	*S0Q68E?S;PQE


 )a-


 ye_s*E
Jr%   c                       e Zd ZdZg g dfdZdS )QuantizedInitializerzJ
    Represents a linearly quantized weight input from ONNX operators
    Nc
                    || _         || _        || _        || _        || _        || _        || _        || _        |	| _        d S r*   )	r,   initializerrminsrmaxszero_pointsscalesr   r   r   )
r.   r,   r8  r9  r:  r;  r<  r   r   r   s
             r#   __init__zQuantizedInitializer.__init__  sJ     	&

&	,			r%   r7   r8   r9   __doc__r=  r   r%   r#   r6  r6  |  s=               r%   r6  c                  "    e Zd ZdZ	 	 	 	 ddZdS )QuantizedValuezI
    Represents a linearly quantized value (input\output\intializer)
    Nc
                    || _         || _        || _        || _        || _        || _        || _        || _        |	| _        d S r*   )	original_nameq_name
scale_namezp_name
value_typer   	node_type
node_qtype
scale_type)
r.   r,   new_quantized_namerE  zero_point_namequantized_value_typer   rH  rI  rJ  s
             r#   r=  zQuantizedValue.__init__  sH     "($&.	"$$r%   )NNNNr>  r   r%   r#   rA  rA    s@          % % % % % %r%   rA  c                      e Zd ZdZd ZdS )BiasToQuantizez+
    Represents a bias to be quantized
    c                0    || _         || _        || _        d S r*   )	bias_name
input_nameweight_name)r.   rQ  rR  rS  s       r#   r=  zBiasToQuantize.__init__  s    "$&r%   Nr>  r   r%   r#   rO  rO    s-         ' ' ' ' 'r%   rO  c                   | j         dk    rt          d| j         d          | j         dk    r| j        }n| j         dk    r| j        }n| j         dk    r| j        }n| j         dk    r| j        }n| j         dk    r| j        }n| j         d	k    r| j        }nl| j         d
k    r| j	        }nY| j         dk    r| j
        }nF| j         dk    r| j        }n3| j         dk    r| j        }n t          d| j         d| j          d          | j        |iS )z
    Convert attribute to kwarg format for use with onnx.helper.make_node.
        :parameter attribute: attribute in AttributeProto format.
        :return: attribute in {key: value} format.
    r   z
attribute z does not have type specified.r(   rF   rG   rH   rI   rJ   rp      	   r   z has unsupported type rO   )r{   r3   r,   r   r   srM   gfloatsintsstringstensorsgraphs)	attributer   s     r#   attribute_to_kwargr_    sB    ~TinTTTUUU ~	1			1			1			1			1		 	1			1		!	1		!	2		 ]in]]IN]]]^^^NE""r%   c                Z      fd|D             }t          |          dk    r|d         ndS )z
    Helper function to find item by name in a list.
        parameter item_name: name of the item.
        parameter item_list: list of items.
        return: item if found. None otherwise.
    c                *    g | ]}|j         k    |S r   r+   )r!   item	item_names     r#   r   z find_by_name.<locals>.<listcomp>  s%    BBBd49	+A+AT+A+A+Ar%   r   N)r   )rc  	item_listitemss   `  r#   find_by_namerf    s;     CBBBiBBBE5zzA~~5884/r%   c                d    d}t          t          |                    D ]}||         | k    r|}|S )zC
    Helper function to return index of an item in a node list
    rt   )r   r   )	elem_name	elem_listelem_idxr   s       r#   get_elem_indexrk    s@     H3y>>""  Q<9$$HOr%   c                H    t           j                            d| |g|          S )z
    Helper function to create a Mul node.
        parameter inputs: list of input names.
        parameter output: output name.
        parameter name: name of the node.
        return: Mul node in NodeProto format.
    Mul)r   r   r   )inputsoutputr,   s      r#   get_mul_noderp    s"     ;  $???r%   filenamer   
identifierr
  c                V    | j                             | j        |z   | j        z             S )zp
    Helper function to generate a identifiable filepath by concatenating the given identifier as a suffix.
    )parentjoinpathstemsuffix)rq  rr  s     r#   generate_identified_filenamerx    s(     ?##HMJ$>$PQQQr%   c                   dd l }dd lm} dd l} |j        |j                   t          d           t          |            t          d           t          |           |                    | |d           |                    d           |	                    d           |
                    d	           |                                 d S )
Nr   )	thresholdz
Histogram:zHistogram Edges:T)fillzTensor valueCountszTensor value V.S. Counts)sysmatplotlib.pyplotpyplotry   set_printoptionsmaxsizeprintstairsxlabelylabeltitleshow)hist
hist_edgesr}  pltry   s        r#   
apply_plotr    s    JJJ######LLLES[1111	,	$KKK	
	*JJtZdJ+++JJ~JJxII()))HHJJJJJr%   rO   c           	     
   ddl ddl}ddlddlmc mc m} ddlmc mc m} ddl	m
mm t          j        d|              G fddj                  }                    | |          }t#          t$          j                            |d          d	          5 }|                    |           ddd           n# 1 swxY w Y                       d          }|                    d
          }	g }
t1          |                                           D ]:}| |         }|                                }t7          |                    d|                                                    t7          |                    d|                                                    g}t=          t?          |                    }|	                     |          }|	                     |          }|!                    |	           |"                    |	|           |#                    |	|           |$                    |	          }|
%                    |           <|&                    |	tO          |
                     |
D ]}|	(                    |           |	)                                }|*                    |	           |+                    |	|           |,                    |	          }|	-                    |           |	.                                }t#          t$          j                            |d          d          5 }|                    |           ddd           n# 1 swxY w Y   t$          j/                            dd          dv r|j        0                    |d          }|1                                }te          |          D ]c}|3                    |          }t          j        |4                                           t          j        |5                                           dt#          t$          j                            |d          d	          5 }t1          |                                           D ]}| |         }|                                }t7          |                    d|                                                    t7          |                    d|                                                    g}|dz   t=          t?          |                    z   }|                    |           |                    d           	 ddd           dS # 1 swxY w Y   dS )z>
    Helper function to write calibration table to files.
    r   N)CalibrationMethod
TensorDataTensorsDatazcalibration cache: c                  "    e Zd Z fdZdS )*write_calibration_table.<locals>.MyEncoderc                \   t          |f          r|                                S t          |j                  r*|                                t	          |j                  ddS t          |          r|j        j        t	          |          dS j        	                    | |          S )Nznumpy.array)r   rl   CLS)r  r   )
r   to_dictr   tolistr
  rl   	__class__r7   JSONEncoderdefault)r.   objr  r  r  jsonnps     r#   r  z2write_calibration_table.<locals>.MyEncoder.default2  s    #
K899 %{{}}$#rz** ] #

s39~~m\\\#011 J"}5CIII#++D#666r%   N)r7   r8   r9   r  )r  r  r  r  r  s   r#   	MyEncoderr  1  sB        	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7r%   r  )clszcalibration.jsonwi   highestlowestzcalibration.flatbufferswbQUANTIZATION_DEBUG)r(   1zcalibration.cache 
)6r  flatbuffersry   5onnxruntime.quantization.CalTableFlatBuffers.KeyValuequantizationCalTableFlatBuffersKeyValue5onnxruntime.quantization.CalTableFlatBuffers.TrtTableTrtTable"onnxruntime.quantization.calibrater  r  r  logginginfor  dumpsopenospathjoinwriter~   Buildersortedkeysr  floatr!  rb  r
  r   CreateStringKeyValueStartKeyValueAddKeyKeyValueAddValueKeyValueEndr}   TrtTableStartDictVectorr   PrependUOffsetTRelative	EndVectorTrtTableStartTrtTableAddDictTrtTableEndFinishOutputenvironGetRootAsTrtTable
DictLengthr   DictKeyValue)calibration_cachedirr  r  r  r  	json_datafiler   builderkey_value_listkeyr   d_valuesrY  r   flat_key
flat_value	key_value	main_dict	cal_tablebufdict_lenr   r  r  r  r  r  s                           @@@@@r#   write_calibration_tabler  !  s!   
 KKKLLLLLLLLLLLLLLLLLLLLLLLL]]]]]]]]]]L:'8::;;;7 7 7 7 7 7 7 7 7 7 7D$ 7 7 7 

,)
<<I	bgll3 233S	9	9 T

9               88A;;D!!$''GN',,..// ) )"3'>>##(,,y$//446677(,,x..335566
 CKK  '',,))%00
w'''222!!':666((11	i(((($$Wc..A.ABBB# 3 3	''	2222!!##I7###Wi000$$W--INN9
..

C	bgll3 9::D	A	A T

3               
z~~*A..(::%77Q??	''))x 	, 	,A!q))IL)))L**++++ 
bgll3 344c	:	: 
d+002233 		 		C&s+F~~''Hhll9d3388::;;hll8T227799::F #ICKK 0 00EJJuJJt		
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
s7   )CCCM))M-0M-&C7U++U/2U/-C6?c                   | dk                         t          j                  }| dk                         t          j                  }|                                }| j        |z
  }|sdS |t          |          z  t          |          z  }|dk     sJ d|||fz              |                      t          j                  }|||z  | |z  z   z  }|dk                                    dk    sJ |S )a~  Given a discrete distribution (may have not been normalized to 1),
    smooth it by replacing zeros with eps multiplied by a scaling factor
    and taking the corresponding amount off the non-zero values.
    Ref: http://web.engr.illinois.edu/~hanj/cs412/bk3/KL-divergence.pdf
         https://github.com//apache/incubator-mxnet/blob/master/python/mxnet/contrib/quantization.py
    r   Nr   z"n_zeros=%d, n_nonzeros=%d, eps1=%f)r   ry   r   sumsizer  )pepsis_zerosis_nonzerosn_zeros
n_nonzeroseps1r  s           r#   smooth_distributionr  }  s     Qu}--H6//%-00KllnnG'!J tw%
"3"33D#:::;? ::: 88EM""DC(Nte{222DAI??!!!!Kr%   
model_pathc                    t          j        |                                 d          }|j        j        D ]}t          j        |          r dS dS )NF)load_external_dataT)r   loadas_posixgraphr8  r   uses_external_data)r  model
intializers      r#   model_has_external_datar    sZ    Ij))++FFFEk-  
2:>> 	44	5r%   opt_model_pathc                    t                      }|                                |_        t          j        |_        i }dg|d<   t          |                                 |fddgi|}dS )z
        Generate model that applies graph optimization (constant folding, etc.)
        parameter model_path: path to the original onnx model
        parameter opt_model_path: path to the optimized onnx model
    :return: optimized onnx model
    ConstantSharingdisabled_optimizers	providersCPUExecutionProviderN)r   r  optimized_model_filepathr   ORT_ENABLE_BASICgraph_optimization_levelr   )r  r  sess_optionkwargs_s        r#   optimize_modelr    sr     !""K+9+B+B+D+DK(+A+RK(F%6$7F !,,..jjH^G_jcijjAAAr%   r  r   c                    ddi}| j         r+| j         D ]#}|                    |j        |j        i           $t          j                            | |           dS )z>Tag the model that it went through quantization pre-processingonnx.quant.pre_processonnxruntime.quantNmetadata_propsupdater  r   r   r   set_model_props)r  r  props      r#   add_pre_process_metadatar    sh    .0CDN :( 	: 	:D!!48TZ"89999K~66666r%   c                Z    | j         r#| j         D ]}|j        dk    r|j        dk    r dS dS )zCCheck the model whether it went through quantization pre-processingr  r  TFr  r  r   )r  r  s     r#   model_has_pre_process_metadatar    sG     ( 	 	Dx333
FY8Y8Ytt5r%   c                    ddi}| j         r+| j         D ]#}|                    |j        |j        i           $t          j                            | |           d S )N
onnx.inferr  r  )r  r  r  s      r#   add_infer_metadatar    sh    "$78N 4% 	4 	4A!!15!'"23333K~66666r%   c                Z    | j         r#| j         D ]}|j        dk    r|j        dk    r dS dS )Nr  r  TFr
  )r  r  s     r#   model_has_infer_metadatar    sF     % 	 	Au$$4G)G)Gtt5r%   c                ,   t          | d          }t          j                            t	          |           t	          |                     t          j        |                                          }t          |           |                                 |S )Nz	-inferred)	rx  r   shape_inferenceinfer_shapes_pathr
  r  r  r  unlink)r  inferred_model_pathr  s      r#   load_model_with_shape_inferr    s{    6z;OO**3z??C@S<T<TUUUI)224455Eu   Lr%   c                   t          j        d          5 }t          |                              d          }t	          j        | |                                d           t          |          cd d d            S # 1 swxY w Y   d S )Nz
ort.quant.)prefixz
model.onnxT)save_as_external_data)tempfileTemporaryDirectoryr   ru  r   
save_modelr  r  )r  quant_tmp_dirr  s      r#   &save_and_reload_model_with_shape_inferr    s    		$L	9	9	9 7]-((11,??
z2244DQQQQ*:667 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7s   AA==BBr8  r   c                    | j         t          j        j        t          j        j        fv rt
          j                            |           S t          d| j	         dt          | j                             )Nz&Only float type is supported. Weights z is )r  r   r   r   r   r   r  to_arrayr3   r,   type_to_name)r8  s    r#   r   r     si    !7!=z?U?] ^^^ ))+666
l1All|T_TiGjll  r%   tensor_namec                    | dz   S )N_QuantizeLinearr   r"  s    r#   add_quant_suffixr&    s    ***r%   c                    | t           z   S r*   )QUANT_INPUT_SUFFIXr%  s    r#   add_quant_input_suffixr)    s    +++r%   c                    | dz   S )N_QuantizeLinear_Outputr   r%  s    r#   add_quant_output_suffixr,    s    111r%   c                    | dz   S )N_DequantizeLinearr   r%  s    r#   add_dequant_suffixr/    s    ,,,r%   c                    | dz   S )N_DequantizeLinear_Inputr   r%  s    r#   add_dequant_input_suffixr2    s    222r%   c                    | t           z   S r*   )DEQUANT_OUTPUT_SUFFIXr%  s    r#   add_dequant_output_suffixr5    s    ...r%   )NN)FN)FNNN)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   )FF)r   r    r'  r    r   r(  )r-  r  r   r.  )rq  r   rr  r
  r   r   )rO   )r  )r  r   )r  r   r  r   )r  r   )r  r   r   r   )r  r   r   r   )r  r   r   r   )r8  r   r   r   )r"  r
  r   r
  )r   r
  )i
__future__r   r  r  r  enumr   pathlibr   ry   r   r   r   r   r	   r   onnx.helperr
   r   r   r   onnx.referencer   onnxruntimer   r   r   r   r   ImportErrorr   r   onnx.reference.op_runr   __producer____version__onnx_domain	ms_domainQUANT_OP_NAMEr(  DEQUANT_OP_NAMEr4  r  MODEL_SIZE_THRESHOLDr   r  r!  r'   r>   rE   ra   rQ   rl   rS   rW   rU   rY   r]   r[   r   r~   rh   rg   rj   ri   r#  r"  r   r   r   r   r   r   r   r  r   r&  r,  r  r6  rA  rO  r_  rf  rk  rp  rx  r  r  r  r  r  r  r  r  r  r  r  r   r&  r)  r,  r/  r2  r5  r   r%   r#   <module>rE     s@
   # " " " " "  				                > > > > > > > > > > & & & & & & Q Q Q Q Q Q Q Q Q Q Q Q - - - - - - P P P P P P P P P P@@@@@@@   LLL
?????????   DEEE7777777   
 	 , $2 ' !  qqCC4D4Dqqq    t          #> #> #> #> #> #> #> #>L    $     V!4!4 +%+g"6"6 +%+g"6"6!;5;x#8#8' %   ;5;q#D#D#DkekRU]b]hFiFiFi"j+%+d%*"E"E"E{u{SV^c^hGiGiGi!j!KEK$F$F$FTYafamHnHnHn#o ;5;vU[#I#I#I;5;W\didoKpKpKp"q ;5;q#>#>#>BV[@\@\@\"]+%+b"="="={u{1TX?Y?Y?Y!Z   ;5;q#D#D#DkekRU]b]hFiFiFi"j+%+d%*"E"E"E{u{SV^c^hGiGiGi!j!KEK$F$F$FTYafamHnHnHn#o ;5;vU[#I#I#I;5;W\didoKpKpKp"q	!   ;5;q#D#D#DkekRU]b]hFiFiFi"j+%+c"D"D"DkekRT\a\fFgFgFg!h!KEK$F$F$FTYafamHnHnHn#o ;5;vU[#I#I#I;5;W\didoKpKpKp"q ;5;q#=#=#={u{1TX?Y?Y?Y"Z+%+b"="="={u{1TX?Y?Y?Y!Z  )+ A A A A A 13 13 13 13h< < < <~" " "R #'"&"&;G ;G ;G ;G ;G~ hl:= := := := :=D $(L  L  L  L  L ^   @   	 	 	 	   <       >% % % % % % % %8' ' ' ' ' ' ' '"# "# "#J0 0 0  @ @ @R R R R  $Y Y Y Yx   :   k k k k 7 7 7 7   7 7 7 7      7 7 7 7   + + + +, , , ,2 2 2 2- - - -3 3 3 3/ / / / / /s6   A A%$A%)A2 2	A>=A>B	 	BB