
    קgw                     b   d dl Z d dlZd dlZd dlZd dlZd dlmc mc mZ	 d dl
mZ d dlmZ d dlmZ d dlmZmZmZmZmZmZ d dlmZmZmZmZmZmZmZmZ d dlm Z m!Z! d dl"m#Z# dd	l$m%Z%m&Z& g d
Z'eZ(ej)        ej*        j)        ej+        ej*        j+        iej*        j)        ej        j)        ej*        j+        ej        j+        idZ,d Z-	 	 	 d#dZ.d$dZ/d Z0d Z1d%dZ2	 	 	 	 d&dZ3d Z4d Z5	 	 	 	 d'dZ6d Z7d Z8d(dZ9dej:        ddfdZ;d(dZ<d%dZ=	 	 	 	 	 	 d)dZ>	 	 	 	 	 d*d Z?	 d%d!Z@d+d"ZAdS ),    N)_FusedModule)_is_activation_post_process)_activation_is_memoryless_add_module_to_qconfig_obs_ctrdefault_dynamic_qconfigfloat16_dynamic_qconfig!float_qparams_weight_only_qconfig&float_qparams_weight_only_qconfig_4bit)_get_special_act_post_process_has_special_act_post_process)get_default_dynamic_quant_module_mappingsget_default_qat_module_mappings$get_default_qconfig_propagation_list(get_default_static_quant_module_mappings2get_default_static_quant_reference_module_mappingsno_observer_set)DeQuantStubQuantWrapper)type_before_parametrizations   )get_qparam_dict)has_no_children_ignoring_parametrizations)
get_default_custom_config_dictpropagate_qconfig_add_quant_dequantpreparequantizequantize_dynamicprepare_qatquantize_qatconvertswap_module)%float_to_observed_custom_module_class)observed_to_quantized_custom_module_classc                      t           S )z'Defines the default custom config dict.)_DEFAULT_CUSTOM_CONFIG_DICT     Z/var/www/html/ai-engine/env/lib/python3.11/site-packages/torch/ao/quantization/quantize.pyr   r   B   s    &&r(    c                 
   |                     t          |           |          }|                     ||          }t          | d|          }t          j        j        j                            ||            t          ||           }|| _        | 	                                D ]b\  }}|r|dz   |z   n|}	|=||                     dg           v s7t          |          |                     dg           v st          ||||	           cdS )a  This is a helper function for `propagate_qconfig_`

    Args:
        module: input module
        qconfig_dict: dictionary that maps from name of submodule to quantization
                     configuration
        qconfig_parent: quantization config of parent module, we will fallback to
                       this config when there is no specified config for current
                       module
        prefix: corresponding prefix of the current module, used as key in
                qconfig_dict
        prepare_custom_config_dict: dictionary for custom handling of modules
                                    see docs for :func:`~torch.ao.quantization.prepare_fx`

    Return:
        None, module is modified inplace with qconfig attached
    qconfig.Nnon_traceable_module_namenon_traceable_module_class)getr   getattrtorchaoquantizationr,   _assert_valid_qconfigr   named_childrentype_propagate_qconfig_helper)
moduleqconfig_dictqconfig_parentprefixprepare_custom_config_dictmodule_qconfigqconfig_with_device_checknamechildmodule_prefixs
             r)   r8   r8   G   s0   2 "%%$V,,n N "%%fn==NVY??N	H!77OOO >~v V V.FN,,.. 
 
e/5?t++4%-.223NPRSSSSE{{)--.JBOOP P &|%>  
 
r(   c                 :    |i }|i }t          | ||           dS )a  Propagate qconfig through the module hierarchy and assign `qconfig`
    attribute on each leaf module

    Args:
        module: input module
        qconfig_dict: dictionary that maps from name or type of submodule to
            quantization configuration, qconfig applies to all submodules of a
            given module unless qconfig for the submodules are specified (when
            the submodule already has qconfig attribute)
        prepare_custom_config_dict: dictionary for custom handling of modules
            see docs for :func:`~torch.ao.quantization.prepare_fx`

    Return:
        None, module is modified inplace with qconfig attached
    N)r=   )r8   )r9   r:   r=   s      r)   r   r   x   sD      !)%'"9S     r(   c                 ,    |                      |          S )z.Forward hook that calls observer on the outputactivation_post_process)selfinputoutputs      r)   _observer_forward_hookrJ      s    ''///r(   c                 8    |                      |d                   S )z2Forward pre hook that calls observer on the outputr   rE   )rG   rH   s     r)   _observer_forward_pre_hookrL      s    ''a111r(   Fc                     t          | d          s
J d            |r|                     t          d          }d S |                     t          d          }d S )NrF   zGExpect activation_post_process attribute already attached to the moduleT)prepend)hasattrregister_forward_pre_hookrL   register_forward_hookrJ   )r9   pre_hookhandles      r)   &_register_activation_post_process_hookrT      s    )  Q QPQ Q Q  T11& 2 
 
 --.Dd-SSr(   c                    |t                      }|i }`t          |           }t          |          dk    sJ d|             t          |          dk    rt          t	          |                    ndddd dfd	}|                                 D ]\  }}t          |          t          j        fv r#t          t          |          t          j        t          j        f          rM |          rAt          |d          sJ d	t          |           d
             |j                  |_        t!          |t"                    r |          r ||           |(t          |          |v r |          r ||           t%          |          rt'          |          }	 |||	           % |          rzt          |          |v ri|t          |                                       |          }
t+          | ||
           |t          |                   t-                      vr ||
           t/          ||||           t1          |           r;t!          | t2          j        j                  st          |           |v r ||            t          | d          r=t!          | t2          j        j                  s t          |           |v r ||            dS dS dS dS )as  Add observer for the leaf child of the module.

    This function insert observer module to all leaf child module that
    has a valid qconfig attribute.

    Args:
        module: input module with qconfig attributes for all the leaf modules that we want to quantize
        qconfig_propagation_list: a list of quantizable modules that will have observers added to them
            if they are leaf nodes
        device: parent device, if any
        non_leaf_module_list: list of non-leaf modules we want to add observer

    Return:
        None, module is modified inplace with added observer modules and forward_hooks
    Nr   zR_add_observer_ only works with cpu or single-device CUDA modules, but got devices r   c                 t    ||                                  n	 |            }||                    |           |S N)
activationto)r,   devicespecial_act_post_processrX   s       r)   get_activation_post_processz3_add_observer_.<locals>.get_activation_post_process   sO     (/    ))++ 	
 MM&!!!r(   c                 4    t          | d          o| j        d uS )Nr,   rO   r,   )ms    r)   needs_observationz)_add_observer_.<locals>.needs_observation   s    q)$$>$)>>r(   c                      |           r`t          | t                    sM|                     d | j        |                     t	          | t          | j                             dS dS dS )zmAdds an activation post process module and register
        a pre or post hook that calls the module
        rF   rR   N)
isinstancer   
add_moduler,   rT   r   )r_   r[   rZ   r\   r`   s     r)   insert_activation_post_processz6_add_observer_.<locals>.insert_activation_post_process   s    
 Q 	
1k(B(B 	LL)++Iv'?    35ai@@     	 	 	 	r(   rF   zfunctional class z- has no pre-defined `activation_post_process`weight_fake_quantrW   )r   _get_unique_devices_lennextiterr6   r   nnDropout
issubclassnnqFloatFunctionalQFunctionalrO   r,   rF   rc   r   r   r   
from_floatsetattrr   _add_observer_r   r2   
Sequential)r9   qconfig_propagation_listnon_leaf_module_listrZ   custom_module_class_mappingdevicesre   r@   rA   r[   observed_childr\   r`   s      `       @@r)   rs   rs      s   ,  '#G#I#I "*&(# ~&v..LLAi`gii (+Gq(8(8d7mm$$$d   ? ? ?       & ,,.. 1 1e'..2:,>>(//#2Es1W
 
 -	 ! '' 4  z zy'CE'J'Jyyyz z z 1L0KM61 1- |,, #	  '' 6..u555 ,,U337KKK  '' 6..u555*511 	'DU'K'K$**52JKKKKe$$	,U337RRR8,U33j  FD.111 ,,H,O,OP&(() ) /.~>>>($+    	2&99/658#677/ )004LLL&&v... 	+,,/658#677/ )004LLL&&v...../ / / / MLr(   c                 |    d |                                  D             d |                                 D             z  S )Nc                     h | ]	}|j         
S r'   rZ   .0ps     r)   	<setcomp>z'_get_unique_devices_.<locals>.<setcomp>3  s    222AH222r(   c                     h | ]	}|j         
S r'   r|   r}   s     r)   r   z'_get_unique_devices_.<locals>.<setcomp>3  s'     6 6 66 6 6r(   )
parametersbuffers)r9   s    r)   rg   rg   2  sO    22f//11222 6 6 ..**6 6 6  r(   c                     t          |           r&t          | d          r| j        rt          |           S |                                 D ]\  }}t          |          | j        |<   | S )a{  Wrap the leaf child module in QuantWrapper if it has a valid qconfig
    Note that this function will modify the children of module inplace and it
    can return a new module which wraps the input module as well.

    Args:
        module: input module with qconfig attributes for all the leaf modules
        that we want to quantize

    Return:
        Either the inplace modified module with submodules wrapped in
        `QuantWrapper` based on qconfig or a new `QuantWrapper` module which
        wraps the input module, the latter case only happens when the input
        module is a leaf module and we want to quantize it.
    r,   )r   rO   r,   r   r6   r   _modules)r9   r@   rA   s      r)   r   r   8  s}      	2&99$FI&&$ N$
 F###,,.. 9 9e 1% 8 8Mr(   c                    t           j                            d           |t                      }|                    di           }|st          j        |           } |}|t                      }t          | d           t          d | 
                                D                       st          j        d           t          | |||           | S )a  Prepares a copy of the model for quantization calibration or quantization-aware training.

    Quantization configuration should be assigned preemptively
    to individual submodules in `.qconfig` attribute.

    The model will be attached with observer or fake quant modules, and qconfig
    will be propagated.

    Args:
        `model`: input model to be modified in-place
        `inplace`: carry out model transformations in-place, the original module is mutated
        `allow_list`: list of quantizable modules
        `observer_non_leaf_module_list`: list of non-leaf modules we want to add observer
        `prepare_custom_config_dict`: customization configuration dictionary for prepare function

    .. code-block:: python

       # Example of prepare_custom_config_dict:
       prepare_custom_config_dict = {
           # user will manually define the corresponding observed
           # module class which has a from_float class method that converts
           # float custom module to observed custom module
           "float_to_observed_custom_module_class": {
               CustomModule: ObservedCustomModule
           }
        }

    z!quantization_api.quantize.prepareNr#   r:   c              3   D   K   | ]}t          |d           o|j        V  dS )r,   Nr^   )r~   r_   s     r)   	<genexpr>zprepare.<locals>.<genexpr>  s3      LLqwq)$$2LLLLLLr(   zNone of the submodule got qconfig applied. Make sure you passed correct configuration through `qconfig_dict` or by assigning the `.qconfig` attribute directly on submodules)rw   )r2   _C_log_api_usage_oncer   r0   copydeepcopyr   r   anymoduleswarningswarnrs   )modelinplace
allow_listobserver_non_leaf_module_listr=   rw   ru   s          r)   r   r   S  s    F 
H  !DEEE!)%C%E%E""<"@"@/# #  %e$$  *#G#I#I u40000 LLEMMOOLLLLL 
K	
 	
 	
  %$?	    Lr(   c                      t           d          r$t           j                  rt           d           d fd	} |d            |d           d S )NrF   Fc                     | rj         nj        }| rt          nt          }t	                      }|                                D ]\  }}||u r|                    |           |D ]}|                    |           d S rW   )_forward_pre_hooks_forward_hooksrL   rJ   setitemsaddpop)rR   hook_mapobserver_hookhandle_ids_to_remove	handle_idhook_fnr9   s         r)   remove_hooksz5_remove_activation_post_process.<locals>.remove_hooks  s    08S6,,f>S*2N&&8N 	  #uu"*.."2"2 	4 	4Iw-''$((333- 	$ 	$ILL####	$ 	$r(   Trb   F)rO   r   rF   delattr)r9   r   s   ` r)   _remove_activation_post_processr     s     v011 36Q&7 7 3 	1222
$ 
$ 
$ 
$ 
$ 
$ L$L%      r(   c                     |                                  D ]}t          |           t          | d          r| `t	          |            dS )zClean up the qconfig left in the module so that new qconfig can be
    propagated.

    Args:
        module: module to be cleaned up
    r,   N)children_remove_qconfigrO   r,   r   )r9   rA   s     r)   r   r     sY     ""  vy!! N#F+++++r(   c                    t           j                            d           |t                      }|st	          j        |           } |                                  t          | d            || g|R   t          | |d           | S )a  Quantize the input float model with post training static quantization.

    First it will prepare the model for calibration, then it calls
    `run_fn` which will run the calibration step, after that we will
    convert the model to a quantized model.

    Args:
        model: input float model
        run_fn: a calibration function for calibrating the prepared model
        run_args: positional arguments for `run_fn`
        inplace: carry out model transformations in-place, the original module is mutated
        mapping: correspondence between original module types and quantized counterparts

    Return:
        Quantized model.
    z"quantization_api.quantize.quantizeNTr   )	r2   r   r   r   r   r   evalr   r!   )r   run_fnrun_argsmappingr   s        r)   r   r     s    " 
H  !EFFF:<< %e$$	JJLLLE4    
F58E7D))))Lr(   c                 4   t           j                            d           |`|t           j        k    rjt          j        t          t          j        t          t          j        t          t          j	        t          t          j
        t          t          j        t          i}n|t           j        k    rjt          j        t          t          j        t          t          j        t          t          j	        t          t          j
        t          t          j        t          i}n%|t           j        k    r%t          j        t           t          j        t           i}n|t           j        k    rt          j        t&          i}nt)          d| d          t+          |t,                    r|t           j        u rt          }n_|t           j        u rt          }nI|t           j        u rt           }n3|t           j        u rt&          }nt/          dt1          |                    t3          t5          |t7          j        |                              }|t;                      }|st=          j        |           } |                                   tC          | |           tE          | |d           | S )av  Converts a float model to dynamic (i.e. weights-only) quantized model.

    Replaces specified modules with dynamic weight-only quantized versions and output the quantized model.

    For simplest usage provide `dtype` argument that can be float16 or qint8. Weight-only quantization
    by default is performed for layers with large weights size - i.e. Linear and RNN variants.

    Fine grained control is possible with `qconfig` and `mapping` that act similarly to `quantize()`.
    If `qconfig` is provided, the `dtype` argument is ignored.

    Args:
        model: input model
        qconfig_spec: Either:

            - A dictionary that maps from name or type of submodule to quantization
              configuration, qconfig applies to all submodules of a given
              module unless qconfig for the submodules are specified (when the
              submodule already has qconfig attribute). Entries in the dictionary
              need to be QConfig instances.

            - A set of types and/or submodule names to apply dynamic quantization to,
              in which case the `dtype` argument is used to specify the bit-width

        inplace: carry out model transformations in-place, the original module is mutated
        mapping: maps type of a submodule to a type of corresponding dynamically quantized version
            with which the submodule needs to be replaced

    z*quantization_api.quantize.quantize_dynamicNz5Don't know how to quantize with default settings for z. Provide full qconfig pleasez.Unknown dtype specified for quantize_dynamic: Tr   )#r2   r   r   qint8rk   Linearr   LSTMGRULSTMCellRNNCellGRUCellfloat16r   quint8EmbeddingBagr	   	Embeddingquint4x2r
   
ValueErrorrc   r   RuntimeErrorstrdictzip	itertoolsrepeatr   r   r   r   r   r!   )r   qconfig_specdtyper   r   default_qconfigs         r)   r   r     s   > 
H  !MNNNEK	20/4
3
3LL em##	20/4
3
3LL el""!B?LL en$$!GLL llll   
L#	&	& REK5OOem##5OOel""?OOen$$DOO@#e**   Ci.>.O.OPPQQ;== %e$$	JJLLLul+++E7D))))Lr(   c                 \   t           j                            d           | j        s
J d            |t	                      }|st          j        |           } t          | d           t          | |dd           t          | t          |                                          d           | S )	a  
    Prepares a copy of the model for quantization calibration or
    quantization-aware training and converts it to quantized version.

    Quantization configuration should be assigned preemptively
    to individual submodules in `.qconfig` attribute.

    Args:
        model: input model to be modified in-place
        mapping: dictionary that maps float modules to quantized modules to be
                 replaced.
        inplace: carry out model transformations in-place, the original module
                 is mutated
    z%quantization_api.quantize.prepare_qatz1prepare_qat only works on models in training modeNr   TF)r   r   remove_qconfig)r   r   )r2   r   r   trainingr   r   r   r   r!   r   r   values)r   r   r   s      r)   r   r   8  s     
H  !HIII>NNNNNN133 %e$$u40000E7DGGGGEW^^5E5E1F1FPTUUUULr(   c                     t           j                            d           |st          j        |           } |                                  t          | d            || g|R   t          | d           | S )ag  Do quantization aware training and output a quantized model

    Args:
        model: input model
        run_fn: a function for evaluating the prepared model, can be a
                function that simply runs the prepared model or a training
                loop
        run_args: positional arguments for `run_fn`

    Return:
        Quantized model.
    z&quantization_api.quantize.quantize_qatTr   )r2   r   r   r   r   trainr   r!   )r   r   r   r   s       r)   r    r    U  s     
H  !IJJJ %e$$	KKMMMt$$$$
F58E4    Lr(   Tc                     t           j                            d           |st          j        |           } t          | |d|||           |rt          |            | S )a  Converts submodules in input module to a different module according to `mapping`
    by calling `from_float` method on the target module class. And remove qconfig at the
    end if remove_qconfig is set to True.

    Args:
        `module`: prepared and calibrated module
        `mapping`: a dictionary that maps from source module type to target
                   module type, can be overwritten to allow swapping user defined
                   Modules
        `inplace`: carry out model transformations in-place, the original module
                   is mutated
        `convert_custom_config_dict`: custom configuration dictionary for convert function
        `use_precomputed_fake_quant`: a flag to enable use of precomputed fake quant

    .. code-block:: python

       # Example of convert_custom_config_dict:
       convert_custom_config_dict = {
           # user will manually define the corresponding quantized
           # module class which has a from_observed class method that converts
           # observed custom module to quantized custom module
           "observed_to_quantized_custom_module_class": {
               ObservedCustomModule: QuantizedCustomModule
           }
       }

    z!quantization_api.quantize.convertT)r   is_referenceconvert_custom_config_dictuse_precomputed_fake_quant)r2   r   r   r   r   _convertr   )r9   r   r   r   r   r   r   s          r)   r!   r!   l  sy    H 
H  !DEEE 'v&&!#=#=      Mr(   c           	         ||rt                      nt                      }|t                      }|                    di           }|st	          j        |           } i }|                                 D ]U\  }}	t          |	t                    s&t          |	          |vrt          |	|d|||           t          |	|||          ||<   V|                                D ]\  }
}|| j        |
<   | S )ao  Converts submodules in input module to a different module according to `mapping`
    by calling `from_float` method on the target module class

    Args:
        module: input module
        mapping: a dictionary that maps from source module type to target
                 module type, can be overwritten to allow swapping user defined
                 Modules
        inplace: carry out model transformations in-place, the original module
                 is mutated
        is_reference: a flag to enable quantized reference module
        use_precomputed_fake_quant: a flag to enable use of precomputed fake quant

    Nr$   Tr   )r   r   r   r0   r   r   r6   rc   r   r   r   r"   r   r   )r9   r   r   r   r   r   rw   reassignr@   modkeyvalues               r)   r   r     s?   ,  <>@@@9;; 	
 ")%C%E%E""<"@"@3R# #  'v&&H**,, 
 
	c 3--	,S119TTT*+E    %57Q
 
 nn&& % %
U$Mr(   c                 L   | }t          | d          r| j        d}t          |           |v r+|t          |                                        |           }d}nt          |           |v r|t          |                    }t          |d          r_|j        rX| j        J | j                                        } || j                   t          |          }|                    | |          }nOt          j	        |j                  }	d|	j
        v r|                    | |          }n|                    |           }d}|r| j                                        D ]}
|                    |
           | j                                        D ] }|t          ur|                    |           !t#          |           }t%          |          dk    sJ d	|             t%          |          d
k    rt'          t)          |                    nd}|r|                    |           |S )a	  Swaps the module if it has a quantized counterpart and it has an
    `observer` attached.

    Args:
        mod: input module
        mapping: a dictionary that maps from nn module to nnq module

    Return:
        The corresponding quantized module of `mod`
    r,   NFT_IS_REFERENCEr   r   r   zOswap_module only works with cpu or single-device CUDA modules, but got devices r   )rO   r,   r   from_observedr   weightr   rq   inspect	signaturer   r   r   rP   r   rJ   rQ   rg   rh   ri   rj   rY   )r   r   rw   r   new_modswappedqmodweight_post_processweight_qparamssigpre_hook_fnr   rx   rZ   s                 r)   r"   r"     sY    GsI *#3;#:',,0KKK1,S11mC    GG)#..'997<<=Dt_-- 3$2D 3{...&)k&8&8&:&:###CJ///!01D!E!E//#~>>'88/3>AA"oo8R .  GG #ooc22GG 	#"5<<>> ? ?11+>>>> -4466 ; ;"88811'::: +3//GG!!!jahjj "!!,/LL1,<,<T$w--((($F #

6"""Nr(   c                     d }t          | d          r| j        | ||          dz   <   |                                 D ](\  }}|r ||          |z   n|}t          |||           )dS )a,  Traverse the modules and save all observers into dict.
    This is mainly used for quantization accuracy debug
    Args:
        mod: the top module we want to save all observers
        prefix: the prefix for the current module
        target_dict: the dictionary used to save all the observers
    c                     | dk    r| n| dz   S )Nr*   r-   r'   )r<   s    r)   
get_prefixz&_get_observer_dict.<locals>.get_prefix#  s    2vv6C<7r(   rF   N)rO   rF   r6   _get_observer_dict)r   target_dictr<   r   r@   rA   rB   s          r)   r   r     s    8 8 8 s-.. ( ' 	Jv!::	
 ))++ > >e5;E

6**T115+}====> >r(   )Nr*   N)NNr   )NNNN)FNNN)NF)NFTFNF)NFFNF)r*   )Br   r   r   r   r2   torch.ao.nn.quantizedr3   rk   	quantizedrn   torch.nntorch.ao.nn.intrinsicr   torch.ao.quantization.observerr   torch.ao.quantization.qconfigr   r   r   r   r	   r
   +torch.ao.quantization.quantization_mappingsr   r   r   r   r   r   r   r   torch.ao.quantization.stubsr   r   torch.nn.utils.parametrizer   utilsr   r   __all__is_activation_post_processr   quantizableMultiheadAttentionr&   r   r8   r   rJ   rL   rT   rs   rg   r   r   r   r   r   r   r   r   r    r!   r   r"   r   r'   r(   r)   <module>r      s           # # # # # # # # # # # #       . . . . . . F F F F F F               	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 B A A A A A A A C C C C C C M M M M M M M M   9 
 	$
r~@.
 	R\.
)2<+J2	 	 ' ' ' #. . . .b   20 0 0
2 2 2
	T 	T 	T 	T " $H/ H/ H/ H/V    : "&#A A A AH! ! !4, , ,    < EKuW W W Wt   :   2 #$1 1 1 1l #$; ; ; ;~ KP9 9 9 9x> > > > > >r(   