
    gF6                         d dl Z d dlmZmZmZmZmZmZ d dlm	Z	 ddl
mZ erddlmZ ddlmZmZmZmZmZmZ dd	lmZ  e            r
d dlZdd
lmZ  ej        e          Z G d de          ZdS )    N)TYPE_CHECKINGAnyDictListOptionalUnion)version   )HfQuantizer   )PreTrainedModel)ACCELERATE_MIN_VERSIONis_accelerate_availableis_bitsandbytes_availableis_torch_availableis_torch_xpu_availablelogging)get_module_from_name)Conv1Dc                   h    e Zd ZdZdZdZdZddgZ fdZd Z	de
eeeef         f         d	e
eeeef         f         fd
Zd#dZd Zd$dZdddddede
eef         fdZ	 d%dddddeddde
eef         deee                  fdZd&dZg fdddee         fdZd%d Zed	efd!            Zd" Z xZS )'Bnb8BitHfQuantizera  
    8-bit quantization from bitsandbytes quantization method:
        before loading: converts transformer layers into Linear8bitLt during loading: load 16bit weight and pass to the
        layer object after: quantizes individual weights in Linear8bitLt into 8bit at fitst .cuda() call
    saving:
        from state dict, as usual; saves weights and 'SCB' component
    loading:
        need to locate SCB component and pass to the Linear8bitLt object
    TFbitsandbytes
acceleratec                 z     t                      j        |fi | | j        j        | j        j        | _        d S d S N)super__init__quantization_configllm_int8_skip_modulesmodules_to_not_convert)selfr   kwargs	__class__s      f/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/quantizers/quantizer_bnb_8bit.pyr   zBnb8BitHfQuantizer.__init__=   sL    ,77777#9E*.*B*XD''' FE    c                 r    t                      st          dt           d          t                      st          d          ddlm} ddlm}  |            } |d           |                    d	d
          s|                    dd
          rt          d          |                    dd           t          t                    r j        j        s fd                                D             }t                                                    dhk    r|rn;d|                                v sd|                                v rt          d          t#          j        t&          j                            d                    t#          j        d          k     rt          d          d S )NzWUsing `bitsandbytes` 8-bit quantization requires Accelerate: `pip install 'accelerate>=z'`zrUsing `bitsandbytes` 8-bit quantization requires the latest version of bitsandbytes: `pip install -U bitsandbytes`r   )!validate_bnb_backend_availability)'is_bitsandbytes_multi_backend_availableT)raise_exceptionfrom_tfF	from_flaxzConverting into 4-bit or 8-bit weights from tf/flax weights is currently not supported, please make sure the weights are in PyTorch format.
device_mapc                 4    i | ]}|j         v||         S  )r    ).0keyr,   r!   s     r$   
<dictcomp>z;Bnb8BitHfQuantizer.validate_environment.<locals>.<dictcomp>_   s4     * * *),SPTPkEkEkZ_EkEkEkr%   cpudiska  Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. r   0.37.2zYou have a version of `bitsandbytes` that is not compatible with 8bit inference and training make sure you have the latest version of `bitsandbytes` installed)r   ImportErrorr   r   integrationsr'   utilsr(   get
ValueError
isinstancedictr    llm_int8_enable_fp32_cpu_offloadkeyssetvaluesr	   parse	importlibmetadata)r!   argsr"   r'   r(   bnb_multibackend_is_enableddevice_map_without_lm_headr,   s   `      @r$   validate_environmentz'Bnb8BitHfQuantizer.validate_environmentC   s0   &(( 	 E  kA  E  E  E   )** 	 E   	EDDDDDCCCCCC&M&M&O&O#))$????::i'' 	6::k5+I+I 	;  
 ZZd33
":t,, #,M #* * * * *0:0A0A* * *& :$$&&''E7227R24;;====KeKlKlKnKnAnAn )   =+33NCCDDw}U]G^G^^^U   _^r%   
max_memoryreturnc                 B    d |                                 D             }|S )Nc                      i | ]\  }}||d z  S )g?r.   )r/   r0   vals      r$   r1   z8Bnb8BitHfQuantizer.adjust_max_memory.<locals>.<dictcomp>v   s"    III(#sc3:IIIr%   )items)r!   rG   s     r$   adjust_max_memoryz$Bnb8BitHfQuantizer.adjust_max_memoryt   s'    IIj6F6F6H6HIII
r%   torch_dtypetorch.dtypec                 X    |'t                               d|           t          j        }|S )Na  Overriding torch_dtype=%s with `torch_dtype=torch.float16` due to requirements of `bitsandbytes` to enable model loading in 8-bit or 4-bit. Pass your own torch_dtype to specify the dtype of the remaining non-linear layers or pass torch_dtype=torch.float16 to remove this warning.)loggerinfotorchfloat16)r!   rN   s     r$   update_torch_dtypez%Bnb8BitHfQuantizer.update_torch_dtypey   s8    KKE     -Kr%   c                 0   |t           j                                        r!dt           j                                        i}n6t	                      r$ddt           j                                         i}nddi}t                              d| d           |S )N zxpu:r2   z:The device_map was not initialized. Setting device_map to zL. If you want to use the model for inference, please set device_map ='auto' )rS   cudais_availablecurrent_devicer   xpurQ   rR   )r!   r,   s     r$   update_device_mapz$Bnb8BitHfQuantizer.update_device_map   s    z&&(( ) %*";";"="=>

')) ) "E)A)A)C)C"E"EF

 %[
KK])3] ] ]  
 r%   target_dtypec                 n    |t           j        k    rt                              d           t           j        S )NzRtarget_dtype {target_dtype} is replaced by `torch.int8` for 8-bit BnB quantization)rS   int8rQ   rR   )r!   r]   s     r$   adjust_target_dtypez&Bnb8BitHfQuantizer.adjust_target_dtype   s*    5:%%KKlmmmzr%   modelr   param_valueztorch.Tensor
param_name
state_dictc                 x   dd l }t          ||          \  }}t          |j                            |d           |j        j                  ro| j        rf|                    dd          |	                                vrt          d          |j        t          j        k    rt          d|j         d          dS dS )	Nr   weightSCBz$Missing quantization component `SCB`zIncompatible dtype `z@` when loading 8-bit prequantized weight. Expected `torch.int8`.TF)r   r   r:   _parametersr8   nn
Int8Paramspre_quantizedreplacer=   r9   dtyperS   r_   )	r!   ra   rb   rc   rd   r"   bnbmoduletensor_names	            r$   check_quantized_paramz(Bnb8BitHfQuantizer.check_quantized_param   s     	#"""25*EEf(,,[$??ARSS 	! %%h66joo>O>OOO$%KLLL$
22$ C{/@  C  C  C   4ur%   Ntarget_deviceztorch.deviceunexpected_keysc                 <   ddl }|                    dd          }|                    dd          }	|                    |d          }
|                    |	d          }t          ||          \  }}||j        vrt          | d| d          t          ||          }t          |j        |         |j        j	                  st          d| d	          |j
        t          j
        d
          k    r/|d
t          j
        d
          fvr|t          | d| d          |                    d          }| j        r#|                                 st          d          t          |j        t"                    r	|
|j        }|j        } |j        j	        |fddi|                    |          }||j        |<   |
@t)          |j        d|
                    |                     ||                    |           |||                    |	           dS dS dS )z
        combines logic from _load_state_dict_into_meta_model and .integrations.bitsandbytes.py::set_module_quantized_tensor_to_device()
        needs aux items from state dicts, if found - removes them from unexpected_keys
        r   Nrf   rg   weight_formatz- does not have a parameter or a buffer named .zParameter `z0` should only be a `bnb.nn.Int8Params` instance.metaz7 is on the meta device, we need a `value` to put in on r2   zDetected int8 weights but the version of bitsandbytes is not compatible with int8 serialization. Make sure to download the latest `bitsandbytes` version. `pip install --upgrade bitsandbytes`.requires_gradF)r   rl   r8   r   rh   r9   getattrr:   ri   rj   devicerS   tork   is_serializable
issubclass
source_clsr   T__dict__setattrrf   remove)r!   ra   rb   rc   rr   rd   rs   rn   fp16_statistics_keyfp16_weights_format_keyfp16_statisticsfp16_weights_formatro   rp   	old_value	new_valuer"   s                    r$   create_quantized_paramz)Bnb8BitHfQuantizer.create_quantized_param   sq    	#"""(005AA","4"4X"O"O$..)<dCC(nn-DdKK25*EEf000ccU`cccdddFK00	&,[936;LMM 	jh;hhhiiiV 4 444fel6.B.B%CCC#ttdqtttuuuNN5))	 	d&:&:&<&< 	q   f'00 	(&%K	#%CF%iOOuOOORRS`aa	*3;'&FM5/*<*<]*K*KLLL*&&':;;; */J""#:;;;;; +*/J/Jr%   c                 F    d|_         |                                 |_        |S )NT)is_loaded_in_8bitr|   is_8bit_serializable)r!   ra   r"   s      r$   #_process_model_after_weight_loadingz6Bnb8BitHfQuantizer._process_model_after_weight_loading   s#    "&%)%9%9%;%;"r%   keep_in_fp32_modulesc                    ddl m}m} | j        j        }| j        j         ||          | _        n| j        j        | _        t          | j        t                    s| j        g| _        | j        	                    |           t          |t                    rt          |                                          dk    r\d |                                D             }t          |          dk    r|st          d          | j        	                    |            ||| j        | j                  }| j        |j        _        d S )Nr   )get_keys_to_not_convertreplace_with_bnb_linearr
   c                      g | ]\  }}|d v 	|S ))r3   r2   r.   )r/   r0   values      r$   
<listcomp>zKBnb8BitHfQuantizer._process_model_before_weight_loading.<locals>.<listcomp>  s'    ```:3uP_G_G_3G_G_G_r%   r   zIf you want to offload some keys to `cpu` or `disk`, you need to set `llm_int8_enable_fp32_cpu_offload=True`. Note that these modules will not be  converted to 8-bit but kept in 32-bit.)r    r   )r6   r   r   r   r<   r   r    r:   listextendr;   lenr=   rL   r9   config)	r!   ra   r,   r   r"   r   r   r<   keys_on_cpus	            r$   $_process_model_before_weight_loadingz7Bnb8BitHfQuantizer._process_model_before_weight_loading   si    	TSSSSSSS+/+C+d( #9A*A*A%*H*HD''*.*B*XD'$5t<< 	H+/+F*GD'#**+?@@@ j$'' 		<C
0A0A,B,BQ,F,F``1A1A1C1C```K;!##,L# >  
 '..{;;;''$*E[_[s
 
 

 ,0+C(((r%   c                     t          j        t          j                             d                    t          j        d          k    }|st                              d           dS dS )Nr   r4   a  You are calling `save_pretrained` to a 8-bit converted model, but your `bitsandbytes` version doesn't support it. If you want to save 8-bit models, make sure to have `bitsandbytes>0.37.2` installed. You will most likely face errors or unexpected behaviours.FT)r	   r@   rA   rB   rQ   warning)r!   safe_serialization _bnb_supports_8bit_serializations      r$   r|   z"Bnb8BitHfQuantizer.is_serializable  so    +2=9K9S9STb9c9c+d+dgngth
 h
 ,
( 0 	NN*  
 5tr%   c                     t          j        t          j                             d                    t          j        d          k    S )Nr   z0.37.0)r	   r@   rA   rB   )r!   s    r$   is_trainablezBnb8BitHfQuantizer.is_trainable,  s3    }Y/77GGHHGMZbLcLcccr%   c                 B    ddl m}  ||| j        | j                  }|S )Nr   )dequantize_and_replace)r   )r6   r   r    r   )r!   ra   r   s      r$   _dequantizezBnb8BitHfQuantizer._dequantize0  s?    999999&&4.DD\
 
 
 r%   )rN   rO   rH   rO   )r]   rO   rH   rO   r   )ra   r   )__name__
__module____qualname____doc__use_keep_in_fp32_modules requires_parameters_quantizationrequires_calibrationrequired_packagesr   rF   r   strr   intrM   rU   r\   r`   r   rq   r   r   r   r   r   r|   propertyboolr   r   __classcell__)r#   s   @r$   r   r   ,   sD          $'+$ '6Y Y Y Y Y/ / /bDeCHo1E,F 4PSUZ[^`c[cUdPdKe    
        
  $ 	
 cN   : 04=< =< =< $=< 	=<
 &=< cN=< "$s),=< =< =< =<~    +-	'D 'D 'D #3i	'D 'D 'D 'DR    dd d d d Xd      r%   r   )rA   typingr   r   r   r   r   r   	packagingr	   baser   modeling_utilsr   r7   r   r   r   r   r   r   quantizers_utilsr   rS   pytorch_utilsr   
get_loggerr   rQ   r   r.   r%   r$   <module>r      se       B B B B B B B B B B B B B B B B              1000000                3 2 2 2 2 2  'LLL&&&&&&		H	%	%J J J J J J J J J Jr%   