
    g                         d dl mZmZmZmZ ddlmZ erddlmZ ddl	m
Z
mZmZ  e            rd dlZ ej        e          Z G d d	e          ZdS )
    )TYPE_CHECKINGDictListUnion   )HfQuantizer   )PreTrainedModel)is_accelerate_availableis_torch_availableloggingNc                        e Zd ZdZdZdZdgZ fdZd Zdd	Z	g fddd
e
e         fdZdeeeeef         f         deeeeef         f         fdZddZddZedefd            Z xZS )BitNetHfQuantizerz
    1.58-bit quantization from BitNet quantization method:
    Before loading: it converts the linear layers into BitLinear layers during loading.

    Checkout the paper introducing this method : https://arxiv.org/pdf/2402.17764
    FT
acceleratec                 J     t                      j        |fi | || _        d S N)super__init__quantization_config)selfr   kwargs	__class__s      d/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/quantizers/quantizer_bitnet.pyr   zBitNetHfQuantizer.__init__-   s1    ,77777#6       c                 >   t                      st          d          |                    dd          s|                    dd          rt          d          t          j                                        st                              d           d S |                    dd           }|t                              d           d S |Pt          |t                    r=d	|                                v sd
|                                v rt          d          d S d S d S )NzOLoading a BitNet quantized model requires accelerate (`pip install accelerate`)from_tfF	from_flaxztLoading ternary weights from tf/flax is currently not supported, please make sure the weights are in PyTorch format.zhYou don't have a GPU available to load the model, the inference will be slow because of weight unpacking
device_mapzYou have loaded a BitNet model on CPU and have a CUDA device available, make sure to set your model on a GPU device in order to run your model.cpudiskzYou are attempting to load a BitNet model with a device_map that contains a CPU or disk device.This is not supported. Please remove the CPU or disk device from the device_map.)r   ImportErrorget
ValueErrortorchcudais_availableloggerwarning_once
isinstancedictvalues)r   argsr   r   s       r   validate_environmentz&BitNetHfQuantizer.validate_environment1   sU   &(( 	qoppp::i'' 	6::k5+I+I 	;  
 z&&(( 	z   FZZd33
I     #*d++ *:K:K:M:M1M1MQW[e[l[l[n[nQnQn g   $# QnQnr   modelr
   c                     |S r    )r   r.   r   s      r   #_process_model_after_weight_loadingz5BitNetHfQuantizer._process_model_after_weight_loadingN   s    r   keep_in_fp32_modulesc                     ddl m}m}  ||          | _        | j        j        $| j                            | j        j                    ||| j        | j        | j                  }d S )Nr	   )get_keys_to_not_convertreplace_with_bitnet_linear)modules_to_not_convertr   pre_quantized)integrationsr4   r5   r6   r   extendr7   )r   r.   r   r2   r   r4   r5   s          r   $_process_model_before_weight_loadingz6BitNetHfQuantizer._process_model_before_weight_loadingQ   s     	WVVVVVVV&=&=e&D&D##:F'..t/G/^___**#'#> $ 8,	
 
 
r   
max_memoryreturnc                 B    d |                                 D             }|S )Nc                      i | ]\  }}||d z  S )g?r0   ).0keyvals      r   
<dictcomp>z7BitNetHfQuantizer.adjust_max_memory.<locals>.<dictcomp>g   s"    III(#sc3:IIIr   )items)r   r;   s     r   adjust_max_memoryz#BitNetHfQuantizer.adjust_max_memoryf   s'    IIj6F6F6H6HIII
r   target_dtypetorch.dtypec                     t           j        }|S r   )r$   int8)r   rE   s     r   adjust_target_dtypez%BitNetHfQuantizer.adjust_target_dtypej   s    zr   Nc                     dS )NTr0   )r   safe_serializations     r   is_serializablez!BitNetHfQuantizer.is_serializablen   s    tr   c                     dS )NFr0   )r   s    r   is_trainablezBitNetHfQuantizer.is_trainableq   s    ur   )r.   r
   )rE   rF   r<   rF   r   )__name__
__module____qualname____doc__ requires_parameters_quantizationrequires_calibrationrequired_packagesr   r-   r1   r   strr:   r   r   intrD   rI   rL   propertyboolrN   __classcell__)r   s   @r   r   r       sJ         (-$%7 7 7 7 7  :    +-	
 
 
 #3i	
 
 
 
*DeCHo1E,F 4PSUZ[^`c[cUdPdKe           d    X    r   r   )typingr   r   r   r   baser   modeling_utilsr
   utilsr   r   r   r$   
get_loggerrO   r'   r   r0   r   r   <module>r`      s    4 3 3 3 3 3 3 3 3 3 3 3        1000000 H H H H H H H H H H  LLL 
	H	%	%S S S S S S S S S Sr   