
    g1                         d dl Z d dlmZmZ d dlmZ ddlmZ erddlm	Z	 ddl
mZmZmZmZ dd	lmZmZ  e            rd dlZ ej        e          Z G d
 de          ZdS )    N)TYPE_CHECKINGOptional)version   )HfQuantizer   )PreTrainedModel)is_auto_gptq_availableis_optimum_availableis_torch_availablelogging)
GPTQConfigQuantizationConfigMixinc                        e Zd ZdZdZddgZdZdef fdZd Z	ddZ
ddZddZedded         fd            ZddZ xZS )GptqHfQuantizerz
    Quantizer of the GPTQ method - for GPTQ the quantizer support calibration of the model through
    `auto_gptq` package. Quantization is done under the hood for users if they load a non-prequantized model.
    Foptimum	auto_gptqNquantization_configc                      t                      j        |fi | ddlm} |                    | j                                                  | _        d S )Nr   )GPTQQuantizer)super__init__optimum.gptqr   	from_dictr   to_dict_optimumoptimum_quantizer)selfr   kwargsr   	__class__s       b/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/quantizers/quantizer_gptq.pyr   zGptqHfQuantizer.__init__-   s`    ,77777......!.!8!89Q9a9a9c9c!d!d    c                    t          j        t          j                             d                    t          j        d          k    }|s-t          j                                        st          d          t                      rt                      st          d          t          j        t          j                             d                    t          j        d          k     rt          d          d S )Nz	auto-gptqz0.4.2z2GPU is required to quantize or run quantize model.zwLoading a GPTQ quantized model requires optimum (`pip install optimum`) and auto-gptq library (`pip install auto-gptq`)r   zWYou need a version of auto_gptq >= 0.4.2 to use GPTQ: `pip install --upgrade auto-gptq`)r   parse	importlibmetadatatorchcudais_availableRuntimeErrorr   r
   ImportError)r   argsr   gptq_supports_cpus       r    validate_environmentz$GptqHfQuantizer.validate_environment3   s    #M)*<*D*D[*Q*QRRU\UbcjUkUkk  		)@)@)B)B 		STTT&(( 	-C-E-E 	 J   ]9-55kBBCCgmT[F\F\\\i   ]\r!   torch_dtypetorch.dtypereturnc                 x    |t           j        }n*|t           j        k    rt                              d           |S )NzRWe suggest you to set `torch_dtype=torch.float16` for better efficiency with GPTQ.)r&   float16loggerinfo)r   r.   s     r    update_torch_dtypez"GptqHfQuantizer.update_torch_dtype@   s7    -KKEM))KKlmmmr!   modelr	   c                     |j         j        dk    rt          d          | j        r| j                            |          }d S d S )N	input_idsz%We can only quantize pure text model.)r   main_input_namer)   pre_quantizedr   convert_modelr   r6   r   s      r    $_process_model_before_weight_loadingz4GptqHfQuantizer._process_model_before_weight_loadingG   sS    ?*k99FGGG 	@*88??EEE	@ 	@r!   c                 :   | j         r| j                            |          }d S | j        j        |j        | j        _        | j                            || j        j                   t          j        | j        	                                          |j
        _        d S N)r:   r   post_init_modelr   	tokenizername_or_pathquantize_modelr   r   to_dictconfigr<   s      r    #_process_model_after_weight_loadingz3GptqHfQuantizer._process_model_after_weight_loadingN   s     	f*::5AAEEE'195:5G(2"11%9Q9[\\\/9/CDDZDbDbDdDd/e/eEL,,,r!   c                     dS NT )r   r6   s     r    is_trainablezGptqHfQuantizer.is_trainableX   s    tr!   c                     dS rH   rI   )r   safe_serializations     r    is_serializablezGptqHfQuantizer.is_serializable\   s    tr!   )r.   r/   r0   r/   )r6   r	   r?   )__name__
__module____qualname____doc__requires_calibrationrequired_packagesr   r   r   r-   r5   r=   rF   propertyr   rJ   rM   __classcell__)r   s   @r    r   r   #   s        
 !"K0e,C e e e e e e     @ @ @ @f f f f  (+<"=    X       r!   r   )r$   typingr   r   	packagingr   baser   modeling_utilsr	   utilsr
   r   r   r   utils.quantization_configr   r   r&   
get_loggerrN   r3   r   rI   r!   r    <module>r]      s	       * * * * * * * *              1000000 ] ] ] ] ] ] ] ] ] ] ] ] K K K K K K K K  LLL		H	%	%: : : : :k : : : : :r!   