
    g                         d dl Zd dlmZ d dlmZ ddlmZ erddlm	Z	 ddl
mZmZmZmZ dd	lmZ  e            rd dlZ ej        e          Z G d
 de          ZdS )    N)TYPE_CHECKING)version   )HfQuantizer   )PreTrainedModel)is_accelerate_availableis_auto_awq_availableis_torch_availablelogging)AWQLinearVersionc                   f     e Zd ZdZdZddgZ fdZd Zd Zdd
Z	d Z
ddZed             Z xZS )AwqQuantizerzm
    4-bit quantization for Activation-aware Weight Quantization(AWQ) (https://arxiv.org/abs/2306.00978)
    Tawq
acceleratec                 <     t                      j        |fi | d S N)super__init__)selfquantization_configkwargs	__class__s      a/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/quantizers/quantizer_awq.pyr   zAwqQuantizer.__init__-   s)    ,7777777    c                    t                      st          d          t                      st          d          | j        j        t
          j        k    rt	          j        t          j	                            d                    t	          j        d          k     rt          d          |qt          |t                    r^t          j        d          |                                vs%t!          |                                          dk    rt#          d          d S d S d S t          j                                        st          d	          |t(                              d
           d S |Pt          |t                    r=d|                                v sd|                                v rt#          d          d S d S d S )NzPLoading an AWQ quantized model requires auto-awq library (`pip install autoawq`)zMLoading an AWQ quantized model requires accelerate (`pip install accelerate`)autoawqz0.2.6z^To use IPEX backend, you need autoawq>0.6.2. Please install the latest version or from source.cpur   zYou are attempting to load an IPEX version AWQ model with a device_map that contains more than CPU. This is not supported. Please make sure only cpu in the device_map.zaGPU is required to run AWQ quantized model. You can use IPEX version AWQ if you have an Intel CPUzYou have loaded an AWQ model on CPU and have a CUDA device available, make sure to set your model on a GPU device in order to run your model.diskzYou are attempting to load an AWQ model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)r
   ImportErrorr	   r   r   r   IPEXparse	importlibmetadataRuntimeError
isinstancedicttorchdevicevalueslen
ValueErrorcudais_availableloggerwarning_once)r   
device_mapr   s      r   validate_environmentz!AwqQuantizer.validate_environment0   s   $&& 	rpqqq&(( 	omnnn#+/?/DDD}Y/77	BBCCgmT[F\F\\\"t   &z400 '\%((
0A0A0C0CCCs:K\K\K^K^G_G_bcGcGc [  	 '&&&GcGc :**,, "w   !##M     'j$// Uj>O>O>Q>Q5Q5QU[_i_p_p_r_rUrUr$l   (' UrUrr   c                 x    |t           j        }n*|t           j        k    rt                              d           |S )NzQWe suggest you to set `torch_dtype=torch.float16` for better efficiency with AWQ.)r(   float16r/   warning)r   torch_dtypes     r   update_torch_dtypezAwqQuantizer.update_torch_dtypeW   s7    -KKEM))NNnooor   modelr   c                 8   ddl m}m}m}  ||          | _        | j        j        $| j                            | j        j                    ||| j        | j                  \  }} |||j        j                  }|st          
                    d           d S d S )Nr   )get_keys_to_not_convertreplace_quantization_scalesreplace_with_awq_linear)r   modules_to_not_convertzYou are loading an AWQ model but no linear modules were found in your model. Please double check your model architecture, or submit an issue on github if you think this is a bug.)integrationsr:   r;   r<   r=   r   extendconfig
model_typer/   r5   )r   r8   r   r:   r;   r<   has_been_replaceds          r   $_process_model_before_weight_loadingz1AwqQuantizer._process_model_before_weight_loading^   s    pppppppppp&=&=e&D&D##:F'..t/G/^___#:#:t'?X\Xs$
 $
 $
   ,+E5<3JKK  	NNy    	 	r   c                     | j         j        rddlm}  ||| j                   }d|_        | j         j        t          j        k    rddlm}  ||| j         j	                  }| j         j        t          j
        k    rddlm}  ||          }d S d S )Nr   )fuse_awq_modulesT)post_init_awq_exllama_modules)post_init_awq_ipex_modules)r   do_fuser>   rE   _awq_is_fusedr   r   EXLLAMArF   exllama_configr!   rG   )r   r8   rE   rF   rG   s        r   #_process_model_after_weight_loadingz0AwqQuantizer._process_model_after_weight_loadingr   s    #+ 	'777777$$UD,DEEE"&E#+/?/GGGDDDDDD11%9Q9`aaE#+/?/DDDAAAAAA..u55EEE EDr   Nc                     | j         j        rt                              d           dS | j         j        t
          j        k    rt                              d           dS dS )Nz5You cannot save an AWQ model that uses fused modules!Fz7You cannot save an AWQ model that uses Exllama backend!T)r   rH   r/   r5   r   r   rJ   )r   safe_serializations     r   is_serializablezAwqQuantizer.is_serializable   sZ    #+ 	NNRSSS5#+/?/GGGNNTUUU5tr   c                     d}t          j        t          j                             d                    t          j        |          k    S )Nz0.2.0r   )r   r"   r#   r$   )r   MIN_AWQ_VERSION_FOR_PEFTs     r   is_trainablezAwqQuantizer.is_trainable   s;     $+ }Y/77	BBCCw}UmGnGnnnr   )r8   r   r   )__name__
__module____qualname____doc__requires_calibrationrequired_packagesr   r2   r7   rC   rL   rO   propertyrR   __classcell__)r   s   @r   r   r   #   s         
  -8 8 8 8 8% % %N     (6 6 6"
 
 
 
 o o Xo o o o or   r   )importlib.metadatar#   typingr   	packagingr   baser   modeling_utilsr   utilsr	   r
   r   r   utils.quantization_configr   r(   
get_loggerrS   r/   r    r   r   <module>rd      s                                1000000 _ _ _ _ _ _ _ _ _ _ _ _ 8 8 8 8 8 8  LLL		H	%	%po po po po po; po po po po por   