
    g                         d dl mZmZmZmZmZ ddlmZ erddlm	Z	 ddl
mZmZmZmZ ddlmZ  e            rd dlZ ej        e          Z G d	 d
e          ZdS )    )TYPE_CHECKINGAnyDictListOptional   )HfQuantizer   )PreTrainedModel)is_accelerate_availableis_eetq_availableis_torch_availablelogging)get_module_from_nameNc                        e Zd ZdZdZdZddgZ fdZd ZddZ	ddddde
dee
ef         fdZ	 dddddde
dddee
ef         deee
                  fdZddZg fdddee
         fdZddZed
efd            Z xZS ) EetqHfQuantizera  
    8-bit quantization from EETQ quantization method:
        before loading: converts transformer layers into W8A16Linear during loading: load 16bit weight and pass to the
        layer object after: quantizes individual weights in Linear8bitLt into 8bit at first .cuda() call
    TFeetq
acceleratec                 J     t                      j        |fi | || _        d S N)super__init__quantization_config)selfr   kwargs	__class__s      b/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/quantizers/quantizer_eetq.pyr   zEetqHfQuantizer.__init__-   s1    ,77777#6       c                 ^   t                      st          d          t                      st          d          |                    dd          s|                    dd          rt	          d          t
          j                                        st          d          |                    dd           }|t          
                    d	           d S |Pt          |t                    r=d
|                                v sd|                                v rt	          d          d S d S d S )NzUsing `eetq` 8-bit quantization requires eetq.Please install the latest version of eetq from : https://github.com/NetEase-FuXi/EETQzNLoading an EETQ quantized model requires accelerate (`pip install accelerate`)from_tfF	from_flaxzConverting into 8-bit weights from tf/flax weights is currently not supported, please make sure the weights are in PyTorch format.z/No GPU found. A GPU is needed for quantization.
device_mapzYou have loaded an EETQ model on CPU and have a CUDA device available, make sure to set your model on a GPU device in order to run your model.cpudiskzYou are attempting to load an EETQ model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)r   ImportErrorr   get
ValueErrortorchcudais_availableRuntimeErrorloggerwarning_once
isinstancedictvalues)r   argsr   r"   s       r   validate_environmentz$EetqHfQuantizer.validate_environment1   sk    "" 	h  
 '(( 	pnooo::i'' 	6::k5+I+I 	;  
 z&&(( 	RPQQQZZd33
I     #*d++ *:K:K:M:M1M1MQW[e[l[l[n[nQnQn h   $# QnQnr   torch_dtypetorch.dtypereturnc                     |(t           j        }t                              d|           n*|t           j        k    rt                              d           |S )Na  Overriding torch_dtype=%s with `torch_dtype=torch.float16` due to requirements of `eetq` to enable model loading in 8-bit. Pass your own torch_dtype to specify the dtype of the remaining non-linear layers or pass torch_dtype=torch.float16 to remove this warning.zRWe suggest you to set `torch_dtype=torch.float16` for better efficiency with EETQ.)r(   float16r,   info)r   r3   s     r   update_torch_dtypez"EetqHfQuantizer.update_torch_dtypeQ   s]    -KKKE     EM))KKlmmmr   modelr   param_valueztorch.Tensor
param_name
state_dictc                     ddl m} t          ||          \  }}t          ||          rP| j        s|dk    r,|dk    r$|j        t          j        k    rt          d          dS |dk    rt          d          d	S dS )
Nr   )
EetqLinearbiasweightz6Expect quantized weights but got an unquantized weightFweight_scalez;Expect unquantized weights but got a quantized weight_scaleT)	r   r?   r   r.   pre_quantizeddtyper(   int8r'   )	r   r:   r;   r<   r=   r   r?   moduletensor_names	            r   check_quantized_paramz%EetqHfQuantizer.check_quantized_param_   s     	$#####25*EEfj)) 	! [F%:%:(**{/@EJ/N/N$%]^^^u.00$%bccctur   Ntarget_deviceztorch.deviceunexpected_keysc                     ddl m} t          ||          \  }}	 ||          \  }
}|
                    |          |j        |	<   |                    d|                    |                     dS )zB
        quantizes weights into qweight and weight_scales
        r   )quantize_and_preprocess_weightsweight_scalesN)r   rL   r   to_buffersregister)r   r:   r;   r<   rI   r=   rJ   rL   rF   rG   	new_valuerB   s               r   create_quantized_paramz&EetqHfQuantizer.create_quantized_paramv   s}     	98888825*EE"A"A+"N"N	<'0||M'B'B$)G)GHHHHHr   c                     |S r    )r   r:   r   s      r   #_process_model_after_weight_loadingz3EetqHfQuantizer._process_model_after_weight_loading   s    r   keep_in_fp32_modulesc                     ddl m}m}  ||          | _        | j        j        $| j                            | j        j                    ||| j        | j        | j                  }| j        |j        _        d S )Nr
   )get_keys_to_not_convertreplace_with_eetq_linear)modules_to_not_convertr   rC   )integrationsrX   rY   rZ   r   extendrC   config)r   r:   r"   rV   r   rX   rY   s          r   $_process_model_before_weight_loadingz4EetqHfQuantizer._process_model_before_weight_loading   s     	UTTTTTTT&=&=e&D&D##:F'..t/G/^___((#'#> $ 8,	
 
 
 ,0+C(((r   c                     dS NTrT   )r   safe_serializations     r   is_serializablezEetqHfQuantizer.is_serializable   s    tr   c                     dS r`   rT   )r   s    r   is_trainablezEetqHfQuantizer.is_trainable   s    tr   )r3   r4   r5   r4   r   )r:   r   )__name__
__module____qualname____doc__ requires_parameters_quantizationrequires_calibrationrequired_packagesr   r2   r9   strr   r   rH   r   r   rR   rU   r^   rb   propertyboolrd   __classcell__)r   s   @r   r   r   !   s         (,$ .7 7 7 7 7  @     $ 	
 cN   < 04I I I $I 	I
 &I cNI "$s),I I I I(    +-	D D D #3i	D D D D.    d    X    r   r   )typingr   r   r   r   r   baser	   modeling_utilsr   utilsr   r   r   r   quantizers_utilsr   r(   
get_loggerre   r,   r   rT   r   r   <module>rv      s   < ; ; ; ; ; ; ; ; ; ; ; ; ;        1000000 [ [ [ [ [ [ [ [ [ [ [ [ 2 2 2 2 2 2  LLL 
	H	%	%H H H H Hk H H H H Hr   