
    g!                         d dl Z d dlmZmZmZmZmZmZ d dlm	Z	 ddl
mZ ddlmZ erddlmZ dd	lmZmZmZmZmZ dd
lmZ  e            rd dlZ ej        e          Z G d de          ZdS )    N)TYPE_CHECKINGAnyDictListOptionalUnion)version   )HfQuantizer)get_module_from_name   )PreTrainedModel)is_accelerate_availableis_optimum_quanto_availableis_quanto_availableis_torch_availablelogging)QuantoConfigc            
       r    e Zd ZdZddgZdZdZdef fdZd Z	d	 Z
d
 Zd&dZdee         dedee         fdZdddddedeeef         def
dZdeeeeef         f         deeeeef         f         fdZdddddeddfdZd'dZg fddd ee         fd!Zd" Zed(ded         fd$            Zd(d%Z xZS ))QuantoHfQuantizerz*
    Quantizer for the quanto library
    quanto
accelerateTFquantization_configc                 d     t                      j        |fi | |                                  d S N)super__init__	post_init)selfr   kwargs	__class__s      d/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/quantizers/quantizer_quanto.pyr   zQuantoHfQuantizer.__init__3   s9    ,77777    c                 N    | j         j        | j        st          d          dS dS )z 
        Safety checker
        NzWe don't support quantizing the activations with transformers library.Use quanto library for more complex use cases such as activations quantization, calibration and quantization aware training.)r   activationspre_quantized
ValueError)r   s    r"   r   zQuantoHfQuantizer.post_init7   s>     #/;DDV;O   <;;;r#   c                     t                      st                      st          d          t                      st          d          d S )NzhLoading an optimum-quanto quantized model requires optimum-quanto library (`pip install optimum-quanto`)z`Loading an optimum-quanto quantized model requires accelerate library (`pip install accelerate`))r   r   ImportErrorr   )r   argsr    s      r"   validate_environmentz&QuantoHfQuantizer.validate_environmentA   sb    +-- 	1D1F1F 	z   '(( 	r  	 	r#   c                 F    |ddi}t                               d           |S )N cpuzThe device_map was not initialized. Setting device_map to {'':'cpu'}. If you want to use the model for inference, please set device_map ='auto')loggerinfo)r   
device_maps     r"   update_device_mapz#QuantoHfQuantizer.update_device_mapK   s5    eJKK\  
 r#   torch_dtypetorch.dtypereturnc                 V    |&t                               d           t          j        }|S )NzVYou did not specify `torch_dtype` in `from_pretrained`. Setting it to `torch.float32`.)r/   r0   torchfloat32)r   r3   s     r"   update_torch_dtypez$QuantoHfQuantizer.update_torch_dtypeU   s(    KKpqqq-Kr#   missing_keysprefixc                    t                      rddlm} n.t                      r t                              d           ddlm} g |                                D ]f\  }}t          ||          rQ|D ]N}||v s	|| d| v r?|	                    d          s*|	                    d          s
                    |           Ogfd|D             S )Nr   QModuleMixinuImporting from quanto will be deprecated in v4.47. Please install optimum-quanto instrad `pip install optimum-quanto`.z.weightz.biasc                     g | ]}|v|	S  rB   ).0knot_missing_keyss     r"   
<listcomp>z9QuantoHfQuantizer.update_missing_keys.<locals>.<listcomp>n   s$    EEEa14D+D+D+D+D+Dr#   )r   optimum.quantor>   r   r/   warning_oncer   named_modules
isinstanceendswithappend)	r   modelr:   r;   r>   namemodulemissingrE   s	           @r"   update_missing_keysz%QuantoHfQuantizer.update_missing_keys[   s-   &(( 	,3333333 "" 	, H   ,+++++!//11 	9 	9LD&&,// 9+ 9 9GDv4I4I4I4I,I,I ' 0 0 ; ; -J ' 0 0 9 9 -J )//888EEEE<EEEEr#   rM   r   param_valueztorch.Tensor
param_name
state_dictc                    t                      rddlm} n.t                      r t                              d           ddlm} |                    dd          }|                    dd          }|M|Kt          |	                                          }	|dk    r$t          |	          dk    r|	dhk    s
|	dd	hk    sd
S t          ||          \  }
}t          |
|          rd|v r|
j         S d
S )z=
        Check if a parameter needs to be quantized.
        r   r=   r?   r1   Nparam_devicer.   r
   diskFweight)r   rG   r>   r   r/   rH   r   getsetvalueslenr   rJ   frozen)r   rM   rR   rS   rT   r    r>   r1   rV   device_map_valuesrO   tensor_names               r"   check_quantized_paramz'QuantoHfQuantizer.check_quantized_paramp   s2    '(( 	,3333333 "" 	, H   ,+++++ZZd33
zz.$77!l&> #J$5$5$7$7 8 8u$$->)?)?!)C)C)eW448IeU[_8\8\ 525*EEfl++ 	K0G0G}$$5r#   
max_memoryc                 B    d |                                 D             }|S )Nc                      i | ]\  }}||d z  S )g?rB   )rC   keyvals      r"   
<dictcomp>z7QuantoHfQuantizer.adjust_max_memory.<locals>.<dictcomp>   s"    III(#sc3:IIIr#   )items)r   ra   s     r"   adjust_max_memoryz#QuantoHfQuantizer.adjust_max_memory   s'    IIj6F6F6H6HIII
r#   target_deviceztorch.devicec                     ddl m}  |||||           t          ||          \  }}	|                                 d|j        _        dS )ze
        Create the quantized parameter by calling .freeze() after setting it to the module.
        r   )set_module_tensor_to_deviceFN)accelerate.utilsrk   r   freezerX   requires_grad)
r   rM   rR   rS   ri   r*   r    rk   rO   _s
             r"   create_quantized_paramz(QuantoHfQuantizer.create_quantized_param   s^     	A@@@@@##E:}kRRR(
;;	&+###r#   target_dtypec                 "   t          j        t          j                             d                    t          j        d          k    r:ddlm} t          j        |j        |j	        |j
        d}|| j        j                 }|S t          d          )Nr   z0.27.0r   )CustomDtype)int8float8int4int2zYou are using `device_map='auto'` on an optimum-quanto quantized model. To automatically compute the appropriate device map, you should upgrade your `accelerate` library,`pip install --upgrade accelerate` or install it from source.)r	   parse	importlibmetadatarl   rs   r7   rt   FP8INT4INT2r   weightsr'   )r   rq   rs   mappings       r"   adjust_target_dtypez%QuantoHfQuantizer.adjust_target_dtype   s    =+33LAABBW]S[E\E\\\444444 
%/#(#(	 G #4#;#CDLP  r#   keep_in_fp32_modulesc                 L   ddl m}m} | j        j         ||          | _        n| j        j        | _        t          | j        t                    s| j        g| _        | j                            |            ||| j        | j                  \  }}| j        |j        _        d S )Nr   )get_keys_to_not_convertreplace_with_quanto_layers)modules_to_not_convertr   )	integrationsr   r   r   r   rJ   listextendconfig)r   rM   r   r    r   r   ro   s          r"   $_process_model_before_weight_loadingz6QuantoHfQuantizer._process_model_before_weight_loading   s     	WVVVVVVV #:B*A*A%*H*HD''*.*B*YD'$5t<< 	H+/+F*GD'#**+?@@@--$*E[_[s
 
 
q ,0+C(((r#   c                     |S r   rB   r   rM   s     r"   #_process_model_after_weight_loadingz5QuantoHfQuantizer._process_model_after_weight_loading   s    r#   Nc                     dS )NTrB   r   s     r"   is_trainablezQuantoHfQuantizer.is_trainable   s    tr#   c                     dS )NFrB   )r   safe_serializations     r"   is_serializablez!QuantoHfQuantizer.is_serializable   s    ur#   )r3   r4   r5   r4   )rq   r4   r5   r4   r   ) __name__
__module____qualname____doc__required_packages requires_parameters_quantizationrequires_calibrationr   r   r   r+   r2   r9   r   strrQ   r   r   boolr`   r   intrh   rp   r   r   r   propertyr   r   r   __classcell__)r!   s   @r"   r   r   *   sZ         "<0'+$ L               FtCy F# FRVWZR[ F F F F*" " $" 	"
 cN" 
" " " "HDeCHo1E,F 4PSUZ[^`c[cUdPdKe    , , $, 	,
 &, , , ,&   ( KMD D&D>B3iD D D D*    (+<"=    X       r#   r   )ry   typingr   r   r   r   r   r   	packagingr	   baser   quantizers_utilsr   modeling_utilsr   utilsr   r   r   r   r   utils.quantization_configr   r7   
get_loggerr   r/   r   rB   r#   r"   <module>r      s[       B B B B B B B B B B B B B B B B             2 2 2 2 2 2  1000000              5 4 4 4 4 4  LLL		H	%	%q q q q q q q q q qr#   