
    g                         d dl Z d dlmZmZ d dlmZ ddlmZ ddlm	Z	 erddl
mZ d d	lmZmZmZ dd
lmZmZmZ  e            rd dlZ e            rd dlmZ  ej        e          Zd Z G d de          ZdS )    N)TYPE_CHECKINGUnion)version   )HfQuantizer)get_module_from_name   )PreTrainedModel)AnyDictList)is_torch_availableis_torchao_availablelogging)	quantize_c                 h    |                     d          d d         }| }|D ]}|j        |         }|S )N.)split_modules)modelnamemodule_treeparentms        e/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/quantizers/quantizer_torchao.pyfind_parentr   )   sA    **S//#2#&KF $ $#M    c                   (    e Zd ZdZdZdZdgZ fdZd Zd Z	ddZ
deeeeef         f         d
eeeeef         f         fdZddZdddddedeeef         d
ef
dZdddddedddeeef         dee         fdZd Zd dZed             Z xZS )!TorchAoHfQuantizerz?
    Quantizer for torchao: https://github.com/pytorch/ao/
    TFtorchaoc                 <     t                      j        |fi | d S N)super__init__)selfquantization_configkwargs	__class__s      r   r%   zTorchAoHfQuantizer.__init__:   s)    ,7777777r   c                 >   t                      st          d          d| _        |                    dd           }t	          |t
                    rKd|                                v sd|                                v r!| j        rt          d          d| _        d S d S d S )NzSLoading an torchao quantized model requires torchao library (`pip install torchao`)F
device_mapcpudiskzYou are attempting to perform cpu/disk offload with a pre-quantized torchao model This is not supported yet . Please remove the CPU or disk device from the device_map.T)	r   ImportErroroffloadget
isinstancedictvaluespre_quantized
ValueError)r&   argsr(   r+   s       r   validate_environmentz'TorchAoHfQuantizer.validate_environment=   s    #%% 	ustttZZd33
j$'' 	(
))++++v9J9J9L9L/L/L% ($p  
 $(DLLL	( 	(/L/Lr   c                     | j         j        dk    rX|.|t          j        k    rt                              d| d           |&t                              d           t          j        }|S )Nint4_weight_onlyzSetting torch_dtype to zu for int4_weight_only quantization, but only bfloat16 is supported right now. Please set the torch_dtype to bfloat16.zSetting torch_dtype to torch.bfloat16 for int4_weight_only quantization since only bfloat16 is supported right now. Please set torch_dtype=torch.bfloat16 to remove this warning.)r'   
quant_typetorchbfloat16loggerwarning_once)r&   torch_dtypes     r   update_torch_dtypez%TorchAoHfQuantizer.update_torch_dtypeM   s    #.2DDD&;%.+H+H## qk  q  q  q   "## H   $nr   target_dtypetorch.dtypereturnc                    t          j        t          j                             d                    t          j        d          k    r7ddlm} |j        t          j        t          j        d}|| j	        j
                 S t          d          )N
acceleratez0.19.0r   )CustomDtype)r9   int8_weight_only#int8_dynamic_activation_int8_weightzYou are using `device_map='auto'` on a torchao quantized model. To automatically compute the appropriate device map, you should upgrade your `accelerate` library with `pip install --upgrade accelerate`)r   parse	importlibmetadataaccelerate.utilsrF   INT4r;   int8r'   r:   r5   )r&   rA   rF   map_to_target_dtypes       r   adjust_target_dtypez&TorchAoHfQuantizer.adjust_target_dtypeZ   s    =+33LAABBW]S[E\E\\\444444 %0$4$)J7<z# #
 't'?'JKK5  r   
max_memoryc                 B    d |                                 D             }|S )Nc                      i | ]\  }}||d z  S )g? ).0keyvals      r   
<dictcomp>z8TorchAoHfQuantizer.adjust_max_memory.<locals>.<dictcomp>m   s"    HHHcc39HHHr   )items)r&   rQ   s     r   adjust_max_memoryz$TorchAoHfQuantizer.adjust_max_memoryk   s'    HHZ5E5E5G5GHHH
r   r   r
   c                     ddl m}  ||          | _        | j        j        $| j                            | j        j                   d S )Nr	   )get_keys_to_not_convert)integrationsr\   modules_to_not_convertr'   extend)r&   r   r(   r\   s       r   $_process_model_before_weight_loadingz7TorchAoHfQuantizer._process_model_before_weight_loadingp   sU    ::::::&=&=e&D&D##:F'..t/G/^___r   param_valueztorch.Tensor
param_name
state_dictc                    |                     dd           }t          fd| j        D                       rdS |dk    r	| j        rdS t	          |          \  }}t          |t          j        j                  o|dk    S )Nparam_devicec              3   2   K   | ]}|d z   v p|k    V  dS )r   NrT   )rU   rV   rb   s     r   	<genexpr>z;TorchAoHfQuantizer.check_quantized_param.<locals>.<genexpr>   s6      ggCc	Z'?SJ->ggggggr   Fr,   weight)	popanyr^   r/   r   r1   r;   nnLinear)	r&   r   ra   rb   rc   r(   re   moduletensor_names	      `     r   check_quantized_paramz(TorchAoHfQuantizer.check_quantized_paramz   s     zz.$77gggg4Kfggggg 	U5U""t|"5 #7uj"I"IFKfeho66TK8<STr   target_deviceztorch.deviceunexpected_keysc                     t          ||          \  }}t          j                            |                              |          |j        |<   t          || j                                                   dS )z
        Each nn.Linear layer that needs to be quantized is processsed here.
        First, we set the value the weight tensor, then we move it to the target device. Finally, we quantize the module.
        )deviceN)	r   r;   rk   	Parameterto_parametersr   r'   get_apply_tensor_subclass)	r&   r   ra   rb   rp   rc   rq   rm   rn   s	            r   create_quantized_paramz)TorchAoHfQuantizer.create_quantized_param   sl     35*EE*/(*<*<[*I*I*L*LTa*L*b*b;'&$2LLNNOOOOOr   c                     dS )z/No process required for torchao quantized modelNrT   )r&   r   s     r   #_process_model_after_weight_loadingz6TorchAoHfQuantizer._process_model_after_weight_loading   s    r   Nc                    |rt                               d           dS t          j        t          j                            d                    t          j        d          k    }|st                               d           |S )Nzetorchao quantized model does not support safe serialization, please set `safe_serialization` to FalseFhuggingface_hubz0.25.0zMtorchao quantized model is only serializable after huggingface_hub >= 0.25.0 )r=   warningr   rI   rJ   rK   )r&   safe_serialization_is_torchao_serializables      r   is_serializablez"TorchAoHfQuantizer.is_serializable   s     	NN;   5#*=1C1K1KL]1^1^#_#_cjcpd
 d
 $
  ( 	lNNjkkk''r   c                 &    ddg}| j         j        |v S )NrG   rH   )r'   r:   )r&   "supported_quant_types_for_trainings     r   is_trainablezTorchAoHfQuantizer.is_trainable   s&     1.
* '26XXXr   )rA   rB   rC   rB   )r   r
   r#   )__name__
__module____qualname____doc__ requires_parameters_quantizationrequires_calibrationrequired_packagesr%   r7   r@   rP   r   strr   intrZ   r`   r   boolro   r   rx   rz   r   propertyr   __classcell__)r)   s   @r   r    r    1   s         (,$ "8 8 8 8 8( ( (      "DeCHo1E,F 4PSUZ[^`c[cUdPdKe    
   U U $U 	U
 cNU 
U U U U(P P $P 	P
 &P cNP cP P P P"  ( ( ( ( Y Y XY Y Y Y Yr   r    )rJ   typingr   r   	packagingr   baser   quantizers_utilsr   modeling_utilsr
   r   r   r   utilsr   r   r   r;   torchao.quantizationr   
get_loggerr   r=   r   r    rT   r   r   <module>r      sa       ' ' ' ' ' ' ' '             2 2 2 2 2 2  1000000 " " " " " " " " " " E E E E E E E E E E  LLL /......		H	%	%  FY FY FY FY FY FY FY FY FY FYr   