
    g,                         d dl mZmZmZmZ ddlmZ ddlmZm	Z	m
Z
mZ ddlmZ ddlmZ erddlmZ  e            rd d	lmZ  e
            rd d
lZ ej        e          Zd Z G d de          Zd
S )    )TYPE_CHECKINGAnyDictList   )prepare_for_hqq_linear)is_accelerate_availableis_hqq_availableis_torch_availablelogging   )HfQuantizer)get_module_from_name)PreTrainedModel)remove_hook_from_moduleNc                 h    |                     d          d d         }| }|D ]}|j        |         }|S )N.)split_modules)modelnamemodule_treeparentms        a/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/quantizers/quantizer_hqq.pyfind_parentr   %   sA    **S//#2#&KF $ $#M    c                   `    e Zd ZdZdZdZdZdgZ fdZd Z	ddd	e
e         d
ede
e         fdZddde
e         de
e         de
e         fdZdddddedeeef         def
dZdddddedddeeef         de
e         fdZd Z	 d ddde
e         fdZd!dZd dZedefd            Z xZS )"HqqHfQuantizerz
    HQQ quantizer base HF class.
    nn.Linear modules are first tagged with quant_config in _process_model_before_weight_loading().
    The actual quantization and offloading to the GPU is done in check_quantized_param().
    FThqqc                 X     t                      j        |fi | d | _        d| _        d S )NF)super__init__torch_dtypeusing_multi_gpu)selfquantization_configkwargs	__class__s      r   r$   zHqqHfQuantizer.__init__9   s9    ,77777$r   c                    t                      st          d          |                    dd          s|                    dd          rt          d          t          j                                        st          d          | j        =d|v r|d         | _        n+t          j	        | _        t                              d           |                    d	d           }t          |t                    rtd
|                                v sd|                                v rt          d          t          t!          |                                                    dk    | _        d S d S )NzA valid HQQ version (>=0.2.1) is not available. Please follow the instructions to install it: `https://github.com/mobiusml/hqq/`.from_tfF	from_flaxzwConverting weights from tf/flax weights is currently not supported, please make sure the weights are in PyTorch format.z/No GPU found. A GPU is needed for quantization.r%   zUSetting torch_dtype to torch.float32 as the default value since it was not specified.
device_mapcpudiskzYou are attempting to use an HQQ model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.r   )r
   ImportErrorget
ValueErrortorchcudais_availableRuntimeErrorr%   float32loggerinfo
isinstancedictvalueslensetr&   )r'   argsr)   r.   s       r   validate_environmentz#HqqHfQuantizer.validate_environment>   s}    "" 	 T   ::i'' 	6::k5+I+I 	;  
 z&&(( 	RPQQQ#&&#)-#8  #(= stttZZd33
j$'' 	I
))++++v9J9J9L9L/L/L h  
 (+3z/@/@/B/B+C+C'D'Dq'H$$$	I 	Ir   r   r   missing_keysprefixreturnc                 ,    | j         rd |D             S |S )Nc                     g | ]}d |v|	S )weight ).0keys     r   
<listcomp>z6HqqHfQuantizer.update_missing_keys.<locals>.<listcomp>b   s"    IIICHC4G4GC4G4G4Gr   )pre_quantized)r'   r   rB   rC   r)   s        r   update_missing_keysz"HqqHfQuantizer.update_missing_keys^   s)      	 II<IIIIr   expected_keysloaded_keysc                    | j         s|S fdt          |          }t                      r?ddlm} |                                D ]\  }}||_        t                      } ||           |t          |j        j        d                   z  } |d d t          j
        d                                          dhz
  }	t                      }
|D ]2t          fd|D                       r|
                               3||
z  }|D ]bd	z   |v r|                    d	z              n!|                    fd
|	D                        dz   |v r|                    dz              ct          |          S )Nc                     |                                  D ]J\  }}t          |t          j        j                  r|                    |j                    ||           Kd S N)named_childrenr;   r4   nnLinearaddr   )r   layersr   module_find_hqq_quantizable_layerss       r   rY   zIHqqHfQuantizer.update_expected_keys.<locals>._find_hqq_quantizable_layersn   sl     % 4 4 6 6 = =ffux88 ,JJv{+++,,VV<<<<= =r   r   	HQQLinearskip_modulesr/   linear_layerquant_configcompute_dtypedevicebiasc              3       K   | ]}|v V  	d S rR   rH   )rI   _modulerJ   s     r   	<genexpr>z6HqqHfQuantizer.update_expected_keys.<locals>.<genexpr>   s'      DD'w#~DDDDDDr   z.weightc                      h | ]
}d z   |z   S )r   rH   )rI   _ref_keyrd   s     r   	<setcomp>z6HqqHfQuantizer.update_expected_keys.<locals>.<setcomp>   s"    $X$X$X(Ws]X%=$X$X$Xr   z.bias)rL   r?   r
   hqq.core.quantizer[   named_modulesr   configr(   r4   float16state_dict_keysanyrV   updatelist)r'   r   rN   rO   new_keysr[   r   rX   _valid_modules	_ref_keys_rm_keysrY   rd   rJ   s              @@@r   update_expected_keysz#HqqHfQuantizer.update_expected_keysg   s    ! 	!  	= 	= 	= 	= 	= }%%  	4333333 !& 3 3 5 5 # #f" !UUN((???c%,"B>"RSSSN "	!EMZ_  o6(+I
 uuH & &DDDD^DDDDD &LL%%% H * 4 4Y&+55LL9!45555OO$X$X$X$Xi$X$X$XYYYW$33LL7!2333H~~r   param_valueztorch.Tensor
param_name
state_dictc                    t                      rddlm} t          ||          \  }}| j        r;t          |t          j        j                  st          ||          o|dk    o|dk    S t          |t          j        j                  o|dk    S )Nr   rZ   rG   rb   )	r
   ri   r[   r   rL   r;   r4   rT   rU   )	r'   r   rv   rw   rx   r)   r[   rX   tensor_names	            r   check_quantized_paramz$HqqHfQuantizer.check_quantized_param   s      	433333325*EE 	SFEHO44U
698U8U *8+*6) feho66R;(;RRr   target_deviceztorch.deviceunexpected_keysc           	         t                      rddlm} t          ||          \  }}	d                    |                    d          dd                   }
t          ||
          }|
                    d          d         }i }|                                D ]E\  }}|
dz   |v r9|||                    d          d         <   |||v r|                    |           F| j	        rt          ||          rdS  |dd| j        |          }|                    |           |j        Ht          |j        t          j                  r)t          j                            |j                  |_        | j        r|                     |          }t)          |||           |`~t          j                                         dS |D ]6}t)          ||t          j                            ||                              7t1          |d          r |||j        | j        |d	          }|j        Ht          |j        t          j                  r)t          j                            |j                  |_        | j        r|                     |          }t)          |||           n-|                    | j        |
          }t)          |||           t          j                                         dS )a  
        Each nn.Linear layer is processsed here.
        We first check if the corresponding module state_dict contains already HQQ quantized parameters.
        If not, we create a temp linear layer with the module state_dict params and use it for quantization
        r   rZ   r   Nr   r]   r_   T)r`   ra   del_orig)dtypera   )r
   ri   r[   r   joinr   r   itemsremoverL   r;   r%   load_state_dictrb   r4   TensorrT   	Parameterr&   _patch_layer_for_multigpusetattr__dict__r5   empty_cachehasattrr_   to)r'   r   rv   rw   r|   rx   r}   r[   rX   rz   
layer_nameparent_modulenodemodule_state_dictkv	hqq_layerrJ   s                     r   create_quantized_paramz%HqqHfQuantizer.create_quantized_param   s     	433333325*EEXXj..s33CRC899
#E:66$$R( $$&& 	. 	.DAqC1$$67!!''#,,r"23".13G3G#**1--- 	&),, %I!%!%"&"2(	  	 %%&7888~)j.V.V)!&!3!3IN!C!C	# F ::9EE	M4333 J""$$$F % 	M 	MCFC!3!34Ec4J!K!KLLLL 6>** 	1!	#".$  I ~)j.V.V)!&!3!3IN!C!C	# F ::9EE	M43333 YYT%5mYLLFM4000
     r   c                 D    t                    d fd_        S )Nc                     t          j        |                    | j                  |                                                                           }| j        
|| j        z  }|S rR   )r4   matmulr   ra   
dequantizetrb   )r'   xouts      r   forward_with_devicezEHqqHfQuantizer._patch_layer_for_multigpu.<locals>.forward_with_device  sP    ,qttDK00$//2C2C2E2E2G2GHHCy$ty Jr   c                      |           S rR   rH   )r   r   r   s    r   <lambda>z:HqqHfQuantizer._patch_layer_for_multigpu.<locals>.<lambda>  s    &9&9)Q&G&G r   )r   forward)r'   r   r   s    `@r   r   z(HqqHfQuantizer._patch_layer_for_multigpu  s=    +I66		 	 	 HGGGG	r   Nkeep_in_fp32_modulesc                 >    ||ng }t          || j                  }d S )N)r(   )r   r(   )r'   r   r.   r   r)   s        r   $_process_model_before_weight_loadingz3HqqHfQuantizer._process_model_before_weight_loading  s2     8L7W33]_ 'u$BZ[[[r   c                 F    d|_         |                                 |_        |S NT)is_hqq_quantizedis_serializableis_hqq_serializable)r'   r   r)   s      r   #_process_model_after_weight_loadingz2HqqHfQuantizer._process_model_after_weight_loading  s#    !%$($8$8$:$:!r   c                     dS r   rH   )r'   safe_serializations     r   r   zHqqHfQuantizer.is_serializable#  s    tr   c                     dS r   rH   )r'   s    r   is_trainablezHqqHfQuantizer.is_trainable&  s    tr   rR   )r   r   )__name__
__module____qualname____doc__use_keep_in_fp32_modules requires_parameters_quantizationrequires_calibrationrequired_packagesr$   rA   r   strrM   ru   r   r   boolr{   r   r   r   r   r   propertyr   __classcell__)r*   s   @r   r    r    -   sB          %'+$ % % % % %
I I I@ & 6:3i IL 	c       0&07;Cy0OSTWy0	c0 0 0 0dS S $S 	S
 cNS 
S S S S*T! T! $T! 	T!
 &T! cNT! cT! T! T! T!n
 
 
  +/	\ \ \ #3i	\ \ \ \   
    d    X    r   r    )typingr   r   r   r   integrationsr   utilsr	   r
   r   r   baser   quantizers_utilsr   modeling_utilsr   accelerate.hooksr   r4   
get_loggerr   r9   r   r    rH   r   r   <module>r      s?   2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 Z Z Z Z Z Z Z Z Z Z Z Z       2 2 2 2 2 2  1000000  9888888 LLL		H	%	%  { { { { {[ { { { { {r   