
     Ng
                     ~    d dl Z d dlZd dlZd dlZd dlmZ  e j        e          Zd Z	d Z
d Z G d d          ZdS )    N)Conv1Dc                     | j         j        \  }}t          j                            ||          }| j         j        j                                        |j         _        | j        j        |j        _        |S )N)	weightshapetorchnnLineardataT
contiguousbias)modulein_sizeout_sizelinears       d/var/www/html/ai-engine/env/lib/python3.11/site-packages/onnxruntime/transformers/quantize_helper.py_conv1d_to_linearr      sX    +GXX__Wh//F+-88::FM{'FKM    c                     t                               d           t          | j                  D ]M}| j        |         }t	          |t
                    rt          |          }|| j        |<   >t          |           NdS )zsin-place
    This is for Dynamic Quantization, as Conv1D is not recognized by PyTorch, convert it to nn.Linear
    zreplace Conv1D with LinearN)loggerdebuglist_modules
isinstancer   r   conv1d_to_linear)modelnamer   r   s       r   r   r      s     LL-...U^$$ % %%ff%% 	%&v..F#)EN4  V$$$$% %r   c                     t          j        |                                 d           t          j                            d          dz  }t          j        d           |S )Nztemp.p   )r   save
state_dictospathgetsizeremove)r   sizes     r   _get_size_of_pytorch_modelr'   '   sM    	Ju!!8,,,7??8$$4DIhKr   c                   J    e Zd Zeej        fd            Zedd            ZdS )QuantizeHelperc                 0   t          |            t          j                            | t          j        j        h|          }t                              dt          |                       t                              dt          |                      |S )z{
        Usage: model = quantize_model(model)

        TODO: mix of in-place and return, but results are different
        )dtypez'Size of full precision Torch model(MB):z"Size of quantized Torch model(MB):)	r   r   quantizationquantize_dynamicr   r	   r   infor'   )r   r+   quantized_models      r   quantize_torch_modelz#QuantizeHelper.quantize_torch_model/   s     	,==eehoEV^c=dda>XY^>_>_aabbbf9STc9d9dffgggr   Fc                    ddl m} ddlm}  ||          j                            dd           t                              dt          j	        
                    |           dz               || ||dt          j        j        i	           t                              d
|            t                              dt          j	        
                    |          dz              d S )Nr   )Path)r-   T)parentsexist_okz&Size of full precision ONNX model(MB):r   DefaultTensorType)use_external_data_formatextra_optionszquantized model saved to:z!Size of quantized ONNX model(MB):)pathlibr2   onnxruntime.quantizationr-   parentmkdirr   r.   r"   r#   r$   onnxTensorProtoFLOAT)onnx_model_pathquantized_model_pathr6   r2   r-   s        r   quantize_onnx_modelz"QuantizeHelper.quantize_onnx_model<   s         ======!"")//t/LLLkRW___=]=]_h=ikklll %=.0@0FG		
 	
 	
 	
 	F0DFFGGGkH\8]8]_h8ikklllllr   N)F)__name__
__module____qualname__staticmethodr   qint8r0   rA    r   r   r)   r)   .   s]        */+ 
 
 
 \
 m m m \m m mr   r)   )loggingr"   r<   r   transformers.modeling_utilsr   	getLoggerrB   r   r   r   r'   r)   rG   r   r   <module>rK      s     				   . . . . . .		8	$	$  % % %  m m m m m m m m m mr   