
    g>                         d dl Z d dlmZ d dlmZmZmZmZmZm	Z	 d dl
mZ ddlmZ ddlmZ erdd	lmZ dd
lmZmZmZmZmZmZ  e            r
d dlZddlmZ  ej        e          Z G d de          ZdS )    N)cached_property)TYPE_CHECKINGAnyDictListOptionalUnion)version   )HfQuantizer)get_module_from_name   )PreTrainedModel)ACCELERATE_MIN_VERSIONis_accelerate_availableis_bitsandbytes_availableis_torch_availableis_torch_xpu_availablelogging)Conv1Dc                       e Zd ZdZdZdZdZddgZ fdZd Z	d$dZ
dddddedeeef         d
ef
dZ	 d%dddddedddeeef         deee                  fdZdeeeeef         f         d
eeeeef         f         fdZd&dZd Zg fdddee         fdZd'dZd%d Zed
efd!            Zed
efd"            Zd# Z xZ S )(Bnb4BitHfQuantizera  
    4-bit quantization from bitsandbytes.py quantization method:
        before loading: converts transformer layers into Linear4bit during loading: load 16bit weight and pass to the
        layer object after: quantizes individual weights in Linear4bit into 4bit at the first .cuda() call
        saving:
            from state dict, as usual; saves weights and `quant_state` components
        loading:
            need to locate `quant_state` components and pass to Param4bit constructor
    TFbitsandbytes
acceleratec                 z     t                      j        |fi | | j        j        | j        j        | _        d S d S N)super__init__quantization_configllm_int8_skip_modulesmodules_to_not_convert)selfr   kwargs	__class__s      f/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/quantizers/quantizer_bnb_4bit.pyr   zBnb4BitHfQuantizer.__init__>   sL    ,77777#9E*.*B*XD''' FE    c                 r    t                      st          dt           d          t                      st          d          ddlm} ddlm}  |            } |d           |                    d	d
          s|                    dd
          rt          d          |                    dd           t          t                    r j        j        s fd                                D             }t                                                    dhk    r|rn;d|                                v sd|                                v rt          d          t#          j        t&          j                            d                    t#          j        d          k     rt          d          d S )NzWUsing `bitsandbytes` 4-bit quantization requires Accelerate: `pip install 'accelerate>=z'`zrUsing `bitsandbytes` 4-bit quantization requires the latest version of bitsandbytes: `pip install -U bitsandbytes`r   )!validate_bnb_backend_availability)'is_bitsandbytes_multi_backend_availableT)raise_exceptionfrom_tfF	from_flaxzConverting into 4-bit or 8-bit weights from tf/flax weights is currently not supported, please make sure the weights are in PyTorch format.
device_mapc                 4    i | ]}|j         v||         S  )r!   ).0keyr-   r"   s     r%   
<dictcomp>z;Bnb4BitHfQuantizer.validate_environment.<locals>.<dictcomp>`   s4     * * *),SPTPkEkEkZ_EkEkEkr&   cpudiska  Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. r   z0.39.0zYou have a version of `bitsandbytes` that is not compatible with 4bit inference and training make sure you have the latest version of `bitsandbytes` installed)r   ImportErrorr   r   integrationsr(   utilsr)   get
ValueError
isinstancedictr    llm_int8_enable_fp32_cpu_offloadkeyssetvaluesr
   parse	importlibmetadata)r"   argsr#   r(   r)   bnb_multibackend_is_enableddevice_map_without_lm_headr-   s   `      @r%   validate_environmentz'Bnb4BitHfQuantizer.validate_environmentD   s0   &(( 	 E  kA  E  E  E   )** 	 E   	EDDDDDCCCCCC&M&M&O&O#))$????::i'' 	6::k5+I+I 	;  
 ZZd33
":t,, #,M #* * * * *0:0A0A* * *& :$$&&''E7227R24;;====KeKlKlKnKnAnAn )   =+33NCCDDw}U]G^G^^^U   _^r&   target_dtypetorch.dtypereturnc                    t          j        t          j                             d                    t          j        d          k    r7ddlm} |t          j        k    rt          	                    d           |j
        S t          d          )Nr   z0.19.0r   )CustomDtypezXtarget_dtype {target_dtype} is replaced by `CustomDtype.INT4` for 4-bit BnB quantizationaU  You are using `device_map='auto'` on a 4bit loaded version of the model. To automatically compute the appropriate device map, you should upgrade your `accelerate` library,`pip install --upgrade accelerate` or install it from source to support fp4 auto device mapcalculation. You may encounter unexpected behavior, or pass your own device map)r
   r@   rA   rB   accelerate.utilsrK   torchint8loggerinfoINT4r9   )r"   rG   rK   s      r%   adjust_target_dtypez&Bnb4BitHfQuantizer.adjust_target_dtypeu   s    =+33LAABBW]S[E\E\\\444444uz))vwww##b  r&   modelr   param_valueztorch.Tensor
param_name
state_dictc                     dd l }t          ||          \  }}t          |j                            |d           |j        j                  rdS t          ||j        j                  r|dk    rdS dS )Nr   TbiasF)r   r   r:   _parametersr8   nn
Params4bit
Linear4bit)	r"   rS   rT   rU   rV   r#   bnbmoduletensor_names	            r%   check_quantized_paramz(Bnb4BitHfQuantizer.check_quantized_param   s     	#"""25*EEf(,,[$??ARSS 	4 122 	{f7L7L 45r&   Ntarget_deviceztorch.deviceunexpected_keysc                    ddl }t          ||          \  }}	|	|j        vrt          | d|	 d          t	          ||	          }
|	dk    r_||
                    |          }n|                    |          }t          j                            ||
j	                  }||j        |	<   dS t          |j        |	         |j        j                  st          d          |
j        t          j        d          k    r/|dt          j        d          fvr|t          |	 d	| d          | j        r| j        st          d
          |dz   |vr|dz   |vrt          d| d          i }|                                D ],\  }}|dz   |v r |||<   |||v r|                    |           -i }| j        r||d<    |j        j        j        d||d|d|}ne|                    d          }t'          |j        t*                    r|j        }|
j        } |j        j        |fddi|                    |          }||j        |	<   dS )z
        combines logic from _load_state_dict_into_meta_model and .integrations.bitsandbytes.py::set_module_quantized_tensor_to_device()
        r   Nz- does not have a parameter or a buffer named .rX   )requires_gradz0this function only loads `Linear4bit components`metaz7 is on the meta device, we need a `value` to put in on zDetected int4 weights but the version of bitsandbytes is not compatible with int4 serialization. Make sure to download the latest `bitsandbytes` version. `pip install --upgrade bitsandbytes`.z.quant_state.bitsandbytes__fp4z.quant_state.bitsandbytes__nf4zSupplied state dict for zT does not contain `bitsandbytes__*` and possibly other `quantized_stats` components.r^   F)dataquantized_statsre   devicer3   re   r/   )r   r   rY   r9   getattrtorM   rZ   	Parameterre   r:   r[   ri   pre_quantizedis_serializableitemsremove$is_bnb_supports_quant_storage_modulefrom_prequantized
issubclass
source_clsr   T__dict__)r"   rS   rT   rU   ra   rV   rb   r]   r^   r_   	old_value	new_valuerh   kvparam_kwargsr#   s                    r%   create_quantized_paramz)Bnb4BitHfQuantizer.create_quantized_param   s    	#"""25*EEf000ccU`cccdddFK00	&  "%LL77		'NN=99	**9ID[*\\I.7F{+F&,[936;LMM 	QOPPPV 4 444fel6.B.B%CCC#ttdqtttuuu  ,	f '  u  
 ==ZOO==ZOO  Pz  P  P  P   !O"((** 2 21#q(()*OA&&2qO7K7K'..q111L8 0)/X&;);   /#$	 
  II $u--I &+V44 (%K	'F)))SS5SFSSVVWdeeI*3;'''r&   
max_memoryc                 B    d |                                 D             }|S )Nc                      i | ]\  }}||d z  S )g?r/   )r0   r1   vals      r%   r2   z8Bnb4BitHfQuantizer.adjust_max_memory.<locals>.<dictcomp>   s"    III(#sc3:IIIr&   )ro   )r"   r}   s     r%   adjust_max_memoryz$Bnb4BitHfQuantizer.adjust_max_memory   s'    IIj6F6F6H6HIII
r&   torch_dtypec                 X    |'t                               d|           t          j        }|S )Na  Overriding torch_dtype=%s with `torch_dtype=torch.float16` due to requirements of `bitsandbytes` to enable model loading in 8-bit or 4-bit. Pass your own torch_dtype to specify the dtype of the remaining non-linear layers or pass torch_dtype=torch.float16 to remove this warning.)rO   rP   rM   float16)r"   r   s     r%   update_torch_dtypez%Bnb4BitHfQuantizer.update_torch_dtype   s8    KKE     -Kr&   c                 0   |t           j                                        r!dt           j                                        i}n6t	                      r$ddt           j                                         i}nddi}t                              d| d           |S )N zxpu:r3   z:The device_map was not initialized. Setting device_map to zL. If you want to use the model for inference, please set device_map ='auto' )rM   cudais_availablecurrent_devicer   xpurO   rP   )r"   r-   s     r%   update_device_mapz$Bnb4BitHfQuantizer.update_device_map  s    z&&(( ) %*";";"="=>

')) ) "E)A)A)C)C"E"EF

 %[
KK])3] ] ]  
 r&   keep_in_fp32_modulesc                    ddl m}m} | j        j        }| j        j         ||          | _        n| j        j        | _        t          | j        t                    s| j        g| _        | j        	                    |           t          |t                    rt          |                                          dk    r\d |                                D             }t          |          dk    r|st          d          | j        	                    |            ||| j        | j                  }| j        |j        _        d S )Nr   )get_keys_to_not_convertreplace_with_bnb_linearr   c                      g | ]\  }}|d v 	|S ))r4   r3   r/   )r0   r1   values      r%   
<listcomp>zKBnb4BitHfQuantizer._process_model_before_weight_loading.<locals>.<listcomp>/  s'    ```:3uP_G_G_3G_G_G_r&   r   zIf you want to offload some keys to `cpu` or `disk`, you need to set `llm_int8_enable_fp32_cpu_offload=True`. Note that these modules will not be  converted to 8-bit but kept in 32-bit.)r!   r   )r6   r   r   r   r<   r    r!   r:   listextendr;   lenr=   ro   r9   config)	r"   rS   r-   r   r#   r   r   r<   keys_on_cpus	            r%   $_process_model_before_weight_loadingz7Bnb4BitHfQuantizer._process_model_before_weight_loading  si    	TSSSSSSS+/+C+d( #9A*A*A%*H*HD''*.*B*XD'$5t<< 	H+/+F*GD'#**+?@@@ j$'' 		<C
0A0A,B,BQ,F,F``1A1A1C1C```K;!##,L# >  
 '..{;;;''$*E[_[s
 
 

 ,0+C(((r&   c                 F    d|_         |                                 |_        |S NT)is_loaded_in_4bitrn   is_4bit_serializable)r"   rS   r#   s      r%   #_process_model_after_weight_loadingz6Bnb4BitHfQuantizer._process_model_after_weight_loadingA  s#    "&%)%9%9%;%;"r&   c                     t          j        t          j                             d                    t          j        d          k    }|st                              d           dS dS )Nr   z0.41.3zYou are calling `save_pretrained` to a 4-bit converted model, but your `bitsandbytes` version doesn't support it. If you want to save 4-bit models, make sure to have `bitsandbytes>=0.41.3` installed.FT)r
   r@   rA   rB   rO   warning)r"   safe_serialization_is_4bit_serializables      r%   rn   z"Bnb4BitHfQuantizer.is_serializableF  se     'i.@.H.H.X.X Y Y]d]jks]t]t t$ 	NNh   5tr&   c                     t          j        t          j                             d                    t          j        d          k    S )z
        determines if the current version of bitsandbytes supports
        the `module` parameter in `Params4bit.from_prequantized`
        :return:
        r   z0.43.3)r
   r@   rA   rB   r"   s    r%   rq   z7Bnb4BitHfQuantizer.is_bnb_supports_quant_storage_moduleR  s5     }Y/77GGHHGMZbLcLcccr&   c                     dS r   r/   r   s    r%   is_trainablezBnb4BitHfQuantizer.is_trainable[  s    tr&   c                 B    ddl m}  ||| j        | j                  }|S )Nr   )dequantize_and_replace)r   )r6   r   r!   r   )r"   rS   r   s      r%   _dequantizezBnb4BitHfQuantizer._dequantize_  s?    999999&&4.DD\
 
 
 r&   )rG   rH   rI   rH   r   )r   rH   rI   rH   )rS   r   )!__name__
__module____qualname____doc__use_keep_in_fp32_modules requires_parameters_quantizationrequires_calibrationrequired_packagesr   rF   rR   strr   r   boolr`   r   r   r|   r	   intr   r   r   r   r   rn   r   rq   propertyr   r   __classcell__)r$   s   @r%   r   r   -   s          $'+$ '6Y Y Y Y Y/ / /b     $ 	
 cN 
   8 04W4 W4 W4 $W4 	W4
 &W4 cNW4 "$s),W4 W4 W4 W4tDeCHo1E,F 4PSUZ[^`c[cUdPdKe         ( +-	'D 'D 'D #3i	'D 'D 'D 'DT   

 
 
 
 dd d d d _d d    X      r&   r   ) rA   	functoolsr   typingr   r   r   r   r   r	   	packagingr
   baser   quantizers_utilsr   modeling_utilsr   r7   r   r   r   r   r   r   rM   pytorch_utilsr   
get_loggerr   rO   r   r/   r&   r%   <module>r      su       % % % % % % B B B B B B B B B B B B B B B B             2 2 2 2 2 2  1000000                 'LLL&&&&&&		H	%	%x x x x x x x x x xr&   