
    קg$                        d dl Z d dlZd dlZd dlmZmZmZmZmZm	Z	m
Z
mZmZ d dlZd dlmZmZ d dlmZmZ d dlmZmZmZmZ d dlmZ d dlmZmZ d dlmZ d d	l m!Z! d d
l"m#Z# d dl$m%Z% d dl&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z- d dl.m/Z/ d dl0m1Z1m2Z2m3Z3 d dl4m5Z5 ddl6m7Z7 ddl8m9Z9m:Z: ddl;m<Z<m=Z= ddl>m?Z?m@Z@ ddlAmAZA ddlBmCZCmDZDmEZEmFZFmGZG ddlHmIZImJZJmKZKmLZLmMZMmNZNmOZOmPZPmQZQ g dZRejS        ejT        ejU        ejV        ejW        ejX        ejY        ejZ        ej[        g	Z\ej]        ej^        j_        j`        ja        ejb        ej^        j_        jc        ja        iZddeje        j/        de3deefejg        jh        f         deefe
efeif         f         deefef         ddfdZjdeje        j/        de3deefejg        jh        f         deefe
efeif         f         deefef         ddfd Zkde3d!e2ddfd"Zld#ejg        jh        demfd$Znde1deefef         demfd%Zod&e/d'eddfd(Zpd)ede3d!e2ddfd*Zqd+e3deefe
efeif         f         deefef         de
efeff         fd,Zrde3d!e2ddfd-Zsde3deefejg        jh        f         deejg        jh                 fd.Ztde3deefejg        jh        f         deje        j/        d/emd'ee         ddfd0Zu	 	 d@de3deefejg        jh        f         d2e	ef         deefef         d'ed3emd/emddfd4Zvde3d5e3d!e2ddfd6Zwde3d!e2deefejg        jh        f         d7ee!eeef         f         d8e	e3         ddfd9Zx	 	 	 	 	 	 	 dAde/d/emd;ee<eefef         df         d<emd=emd>eeeefef         df         d'eeeefef         df         d3emde/fd?ZydS )B    N)	AnyCallableDictListOptionalSetTupleTypeUnion)
CUSTOM_KEYNUMERIC_DEBUG_HANDLE_KEY)BackendConfigget_native_backend_config)get_fused_module_classesget_pattern_to_dtype_configsget_qat_module_classes-get_root_module_to_quantized_reference_module)_is_activation_post_process)qconfig_equals
QConfigAny)QConfigMapping)	QuantType)_remove_qconfig)DeQuantStub)_parent_name"activation_is_statically_quantizedget_qparam_dictget_swapped_custom_module_classis_per_channelto_underlying_dtypeweight_is_quantized)GraphModule)ArgumentGraphNode)type_before_parametrizations   )quantized_decomposed_lib)convert_eq_obsupdate_obs_for_equalization)ConvertCustomConfigPrepareCustomConfig)_is_observed_module_is_observed_standalone_module)lower_to_fbgemm))_compare_prepare_convert_qconfig_mappings_generate_node_name_to_qconfig&_is_qconfig_supported_by_dtype_configs_update_qconfig_for_fusion_update_qconfig_for_qat)	_get_module_is_custom_module_lstm_is_custom_module_mhaassert_and_get_unique_devicecollect_producer_nodescreate_getattr_from_valueget_custom_module_class_keys graph_module_from_producer_nodesnode_arg_is_weight)convertconvert_custom_moduleconvert_standalone_moduleconvert_weighted_modulemodelnodemodulesnode_name_to_scopenode_name_to_qconfigreturnc           	      $  ' | j         }|J t          j        t                    sJ t	          |          \  }}|j                 }t          |d          r|                    |            dS t          fdt          j	                  t          j
                                                  z   D                       }	|	st          |          sd|                              5                      j	        d                    |                               ddd           n# 1 swxY w Y   dS |j        }
d}t          |d          r|j        }d }|
t$          v r|sd}d}|                                \  }}t)          |j                  rqt-          |j                  }t0          j        j        j        j        }t0          j        j        j        j        }|j        }|j        }tA          |
          }||||||d	}nyt0          j        j        j!        j        }t0          j        j        j"        j        }tG          |          }t-          |          }|j        }|j        }tA          |
          }|||||d
}|                              5  j	        d         }|g}|$                                D ]h\  }}|dv rJt          |tF          t,          f          s.tK          | |||z   |z   |          }|&                    |           S|&                    |           i|'                    ||tQ          |          i           }|g|dd         z   }|)                    |tQ          |           |||                    '                    '           tT          j+        v rktX          j+        tT                   v rRtT          'j+        vri 'j+        tT          <   j+        tT                   tX                   'j+        tT                   tX          <   |                               ddd           dS # 1 swxY w Y   dS |r'd}t0          j        j        j!        j-        }tA          |
          }|t0          j.        t0          j/        fv s
J d            |j        }|j        }ta          |dt0          j1                  }ta          |dt1          j2        t0          j3                  j4                  }||||d}tj          |         } |                              5  j	        d         }j	        d         g}!|$                                D ]\  }}"|!&                    |"           |'                    d| tQ          |!          i           }#|'                    dtl          j7        |#dfi           }$|'                    dtl          j7        |#dfi           }%|d         }|d         }|d         }
|$|%|||
d
}ddd           n# 1 swxY w Y   |                              5  j	        d         }|g}|$                                D ]6\  }}|dv r|}|&                    |           !|&                    |           7|'                    ||tQ          |          i           }|g|dd         z   }t0          j        j        j"        j-        }|)                    |tQ          |           |||                    ''fd}&                    '           tX          j+        v rj+        tX                   'j+        tX          <   |                               ddd           dS # 1 swxY w Y   dS |
t0          j8        k    rts          d          dS )au  Replace activation_post_process module call node with quantize and
    dequantize node working with decomposed Tensor

    Before:
    ... -> observer_0(x) -> ...
    After:
    ... -> torch.ops.quantized_decomposed.quantize_per_tensor(x, ...) ->
    torch.ops.quantized_decomposed.dequantize_per_tensor() -> ...

    or quantize_per_channel and dequantize_per_channel
    Nr>   c              3   8   K   | ]}t          |          V  d S N_has_none_qconfig.0nrF   s     \/var/www/html/ai-engine/env/lib/python3.11/site-packages/torch/ao/quantization/fx/convert.py	<genexpr>zM_replace_observer_with_quantize_dequantize_node_decomposed.<locals>.<genexpr>x   B         	!122         r   F
is_dynamicc                 h    i }d|j         v r&|j         d         j        }|t          j        k    rd|i}|S )Nval	out_dtype)metadtypetorchfloat32)dequantize_op
input_nodedequantize_op_kwargsdq_out_dtypes       rP   add_dequantize_op_kwargsz\_replace_observer_with_quantize_dequantize_node_decomposed.<locals>.add_dequantize_op_kwargs   sB    !JO##%?517Lu},,(3\'B$##rS   call_function)_scale__zero_point__axis__quant_min__quant_max__dtype_)rb   rc   re   rf   rg   rb   rc   r'   zVonly uint8 and int8 are supported in reference flow for dynamic quantization right nowqschemeeps)re   rf   _eps_rg   re   rf   rg   c                     | u rn| S rJ    )xdequantized_noderC   s    rP   remap_fnzL_replace_observer_with_quantize_dequantize_node_decomposed.<locals>.remap_fnM  s    +,99''!;rS   z,decomposed to float16 op not implemented yet):graph
isinstancetargetstr_get_module_path_and_prefixhasattrr>   alllistargsuserskeys_is_conversion_supportedinserting_beforereplace_all_uses_with
erase_noderY   rT   SUPPORTED_QDTYPEScalculate_qparamsr   ri   intch_axisrZ   opsquantized_decomposedquantize_per_channeldefaultdequantize_per_channel	quant_min	quant_maxr    quantize_per_tensordequantize_per_tensorfloatitemsr:   appendcreate_nodetuplera   r   rX   r   tensoruint8int8getattrper_tensor_affinefinfor[   rj   _QSCHEME_TO_CHOOSE_QPARAMS_OPoperatorgetitemfloat16NotImplementedError)(rB   rC   rD   rE   rF   rq   module_pathprefixactivation_post_processskip_replacementrY   rT   r`   	node_typequantize_opscale
zero_pointr   r\   r   r   dtype_qparamsr]   quantize_op_inputskeyvalue_or_nodeqparam_nodequantized_node	dq_inputsri   rj   choose_qparams_opchoose_qparams_op_inputsvaluechoose_qparams_node
scale_nodezero_point_noderp   ro   s(    `  `                                  @rP   :_replace_observer_with_quantize_dequantize_node_decomposedr   Z   st	   $ KEdk3'''''5 "6 K &dk2&	22 ''t444     di4
(9(9#:#::      78OPP  ##D)) 	# 	#&&ty|444T"""	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	 $)EJ&55 8,7
$ $ $ !!!:! $	*.3EEGGz19:: 	19::G)8MUK	.EM  09I/9I(//F  *!((! GG  )8LTK!I:PXM%LLEZJ/9I/9I(//F  *((! G ##D)) /	# /	#1J",&-mmoo = ="] 555"=5#,?? 6 #<ukF&:S&@-# #K '--k:::: '--m<<<<"..;.@(A(A2 N ((+=abb+AAI$22i  ((
CC    &&'7888 di'',	*0EEE%5%:::8:$)*5NRiO*O, %j12JK T"""_/	# /	# /	# /	# /	# /	# /	# /	# /	# /	# /	# /	# /	# /	# /	# /	# /	# /	#` 
 fR $	i4HO
 %U++%+uz2222- 322 ,5	+5	19e>UVV-uek%-6P6P6TUU
 %$	
 
 :'B##D)) 	 	1J(,	!~$%mmoo 7 7
U )//6666"'"3"3!2E:R4S4SUW# # **!14G3KR J $//!14G3KR O  .I.II&E% /((  G)	 	 	 	 	 	 	 	 	 	 	 	 	 	 	: ##D)) (	# (	#1J",&-mmoo = ="] 555 #0K&--k:::: '--m<<<<"..;.@(A(A2 N ((+=abb+AAI "I:PWM$22i  ((
CC   < < < < < < &&'7888'4944BF),C %&>? T"""Q(	# (	# (	# (	# (	# (	# (	# (	# (	# (	# (	# (	# (	# (	# (	# (	# (	# (	#R 
%-		!"PQQQ 
 	sK   .6D00D47D4FP::P>P>CXX	X"D5]$$](+](c                 	   |J t          |j        t                    sJ | j        }t	          ||          \  }}||j                 }t          fdt          |j                  t          |j        	                                          z   D                       }	|	st          |          sd|                    |          5  |                    |j        d                    |                    |           ddd           n# 1 swxY w Y   dS |j        }
d}t          |d          r|j        }|
t"          j        t"          j        t"          j        t"          j        t"          j        fv r|sd}d}|                                \  }}t1          |j                  r(t5          |j                  }||||
d}t"          j        }n0t;          |          }t5          |          }|||
d}t"          j        }|                    |          5  |j        d         }|g}|                                D ]L\  }}|d	v r.tA          | |||z   |z   |          }|!                    |           7|!                    |           M|"                    ||tG          |          i           }|$                    d
|f          }|                    |           |                    |           ddd           dS # 1 swxY w Y   dS |rd}t"          j%        }t"          j&        j'        j(        dv }|
|d}|                    |          5  |j        d         }|g}|                                D ]\  }}|!                    |           |"                    ||tG          |          i           }|$                    d
|f          }|                    |           |                    |           ddd           dS # 1 swxY w Y   dS |
t"          j)        k    rd}d}d|
i}|                    |          5  |j        d         }|g}|                                D ]\  }}|!                    |           |"                    ||tG          |          i           }|$                    d
|f          }|                    |           |                    |           ddd           dS # 1 swxY w Y   dS dS )zReplace activation_post_process module call node with quantize and
    dequantize node

    Before:
    ... -> observer_0(x) -> ...
    After:
    ... -> torch.quantize_per_tensor(x, ...) -> x.dequantize() -> ...
    Nc              3   8   K   | ]}t          |          V  d S rJ   rK   rM   s     rP   rQ   zB_replace_observer_with_quantize_dequantize_node.<locals>.<genexpr>v  rR   rS   r   FrT   ra   )rb   rc   rd   rg   )rb   rc   rg   rh   
dequantize)ry   )fbgemmx86)rg   _reduce_range_call_methodtorg   )*rr   rs   rt   rq   ru   rw   rx   ry   rz   r{   r|   r}   r~   r   rY   rv   rT   rZ   quint8qint8qint32float8_e5m2float8_e4m3fnr   r   ri   r   r   r   r   r   r   r:   r   r   r   r   quantize_per_tensor_dynamicbackends	quantizedenginer   )rB   rC   rD   rE   rF   rq   r   r   r   r   rY   rT   r   r   r   r   r   r   r]   r   r   r   r   r   ro   reduce_ranger   s       `                      rP   /_replace_observer_with_quantize_dequantize_noder   ^  s    dk3'''''KE5 "6 K &dk2     di4
(9(9#:#::      78OPP  ##D)) 	# 	#&&ty|444T"""	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	 $)EJ&55 8,7
    $	*.3EEGGz19:: 	419::G  *! 	 G  4KK%LLEZJ"'PUVVG3K ##D)) 	# 	#1J",&-mmoo = ="] 555 #<ukF&:S&@-# #K '--k:::: '--m<<<<"..;.@(A(A2 N  %00^DU0VV&&'7888T"""-	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	#. 
 '# $	7 ~/6:KK#|DD##D)) 	# 	#1J",%mmoo 1 1
U"))%0000"..;.@(A(A2 N  %00^DU0VV&&'7888T"""	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 
%-		!	e$##D)) 	# 	#1J",%mmoo 1 1
U #))%0000"..;.@(A(A2 N  %00^DU0VV&&'7888T"""	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	#	 
 	sK   6DD
D
CK44K8;K8B'O88O<?O<2B'S&&S*-S*rq   c                     | j         d         }t          |t                    sJ d|             |                     |           |                    |            t          ||           d S )Nr   z@Expecting the for call custom module node to be a Node, but got )ry   rr   r%   r~   r   _insert_dequantize_node)rC   rq   call_custom_module_nodes      rP   6_replace_observer_or_dequant_stub_with_dequantize_noder     s     #il  d dcJaccd d d 	6777	T3U;;;;;rS   r   c                 ~    | j         }d}t          | d          r| j        }|t          v r| p|p|t          j        k    S )NFrT   )rY   rv   rT   r   rZ   r   )r   rY   rT   s      rP   r|   r|     sX    #)EJ&55 8,7
 
#	#	8Z 	"	"EM!rS   c                 \    t          | t                    o| j        |v o|| j                 du S )z\Check if a node has a qconfig of None, i.e. user requested to not quantize
    the node
    N)rr   r%   name)rC   rF   s     rP   rL   rL     s:     	4 	4I--	4 +t3rS   observedbackend_configc                     | j         j        D ]V}|j        dk    r|j        D ]@}|r<t	          ||          r,t          |          }|&t          | |          } |             AWdS )zExtract the subgraph that produces the weight for dynamic quant
    or weight only quant node and run the subgraph to observe the weight.
    Note that the observers of dynamic quant or weight only quant ops are
    run during the convert step.
    ra   N)rq   nodesopry   r=   r9   r<   )r   r   rC   node_argweight_observer_nodesweight_observer_modules         rP   _run_weight_observersr     s     $ ) )7o%%	 
	) 
	)H ).tX>> )(>x(H(H%(0)I3* *& '&(((
	)) )rS   argc                    t          | t                    r;| j        dk    r0| j        dk    r%| j        d         }|                    | |           dS t          | t          t          f          r| D ]}t          |||           dS t          | t                    r*| 
                                D ]}t          |||           dS t          j        dt          |                       dS )zIf the arg is a dequantize Node, or a list/tuple/dict of dequantize Node,
    we'll recursively remove the dequantize Node
    r   r   r   z6Unsupported node type in recursive remove dequantize: N)rr   r%   r   rs   ry   replace_input_withrx   r   "_maybe_recursive_remove_dequantizedictvalueswarningswarntype)r   rC   rq   quantize_nodearg_elements        rP   r   r   0  s    #t 
=!8!8SZ<=W=W 	]33333	C$	'	' 	
 	I 	IK.{D%HHHH	I 	I	C		 
::<< 	I 	IK.{D%HHHH	I 	I 	PT#YYPP	
 	
 	
 	
 	
rS   obs_nodec                    | j         d         }t          |t                    sJ d|             |j        |v r||j                 du nd}|r\t	          | j                  }|r|d         nd}d}|D ]0}|j        dk    r#|j        t          j	        j
        j        k    r|} n1|r|}d}	n|}d}	|r|j        |v r||j                 \  }
}nd}
|
|	fS )ai  Given and observer node, get the `Scope` or the fully qualified name for
    the submodule containing the observed node, also return a prefix of "_input"
    when the observed node is an input of a F.linear op, and not the output of another
    quantized op.
    TODO: this logic is hacky, we should think about how to remove it or make it more
    general
    r   z.Expecting observed node to be a Node, but got Nra   _input )ry   rr   r%   r   rx   rz   r   rs   rZ   nn
functionallinear)r   rE   rF   observed_nodeis_input_observer_onlyrz   first_linear_use_or_first_uselinear_noderO   r   r   _s               rP   ru   ru   E  sJ    M!$M
 t  H HGGGH H H
 !555 	]/0D88 
   X^$$49(Cat% 	 	At&&18ux7J7Q+Q+Q 	8,7) )6% 	&	).2DDD+,I,NOQQ
 rS   c                     |                     |           5  |                    d| f          }t          | j                  D ]}||ur|                    | |           	 ddd           dS # 1 swxY w Y   dS )z-Inserts dequantize node for `node` in `graph`r   N)inserting_afterr   r   rz   r   )rC   rq   dequantize_node	user_nodes       rP   r   r   ~  s    			t	$	$ D D++L4'BBdj)) 	D 	DI//,,T?CCC	DD D D D D D D D D D D D D D D D D Ds   A	A--A14A1c                     | j                                         D ]:}|j        dk    r-|t          |j                           }t          |          r|c S ;dS )z\
    If the node is observed, return the observer
    instance. Otherwise, return None.
    call_moduleN)rz   r{   r   rt   rs   r   )rC   rD   maybe_obs_node	maybe_obss       rP   _maybe_get_observer_for_noder     sd     *//++ ! !--N$9 : :;I*955 !    4rS   is_referencec                 j   |rt           j        j        j        j        }nt           j        j        j        j        }|t          | j                           }|j        d         j	        }t          | j                  }t          t          |                    D ]y}	|	|v rs||	         }
|
j        dk    r`|
j        dk    rU|
j        d         }|                     |
|           t          |
j                  dk    r|j                            |
           z|j        d         j        }t          |          dk    r,|d         dk    s
J d            	 t)          | |j                    |||          }t+          | j                  \  }}t-          ||         ||           ||t          | j                  <   dS )a  Converts a observed standalone module to a quantized standalone module by calling
    the fx convert api, currently using the same `is_reference` flag as parent, but we may
    changing this behavior in the future (e.g. separating quantization and lowering for
    standalone module as well)

    Args:
      - node: The call_module node of the observed standalone module
      - modules: named_module of original model
      - model: original model
      - is_reference: a flag from parent provided by user to decide if we want to
        produce a reference model or a fbgemm/qnnpack model
      - backend_config: backend configuration of the target backend of quantization
    _observed_graph_module_attrsr   r   r   zCurrently only quantized)r   N)rZ   aoquantizationquantize_fxconvert_to_reference_fx
convert_fxrt   rs   rX   &standalone_module_input_quantized_idxsrx   ry   rangelenr   r   rz   rq   r   'standalone_module_output_quantized_idxsr   r   setattr)rC   rD   rB   r   r   
convert_fnobserved_standalone_modulesm_input_quantized_idxsry   idxr   r   sm_output_quantized_idxsquantized_standalone_moduleparent_namer   s                   rP   r@   r@     s   *  BX*6N

X*6A
 /6c$+6F6F.G8=&,  	??DSYY 0 0)))s)Cv&&3:+E+E #''];;;sy>>Q&&K**3///9>& -  #$$q(('*a///1K///( 	 ek222 #-*"># # # %T[11KGK $(CDDD ;GCrS   Fobserved_node_namesis_decomposedc                 T   |t          | j                           }|j        }d}	t          |          }
t	          ||
          rI|j        }	|                                }t          | j                  \  }}t          ||         ||           | j	        |v }|t          | |          s|sdS t          |          }|                    t          |          g           }t          ||          sdS t          |          }|sdS d}|}t	          |t           j        j        j        j                  r
|}|d         }d|i}t	          |t           j        j                  r|                                }|                                } ||j                    ||j                   t3          |          }t3          |          }|                    ||d           n1t	          |t           j        j        t           j        j        f          r|j        D ]}}t=          ||          rk|                    d          rVtA          ||          }|                                }	|	j!        t           j"        k    r |	|           t3          |	          ||<   ~n{|	du }|r:|                                }	tG          |          }|r|	$                    |           | }|r|r|s |	|j                   |                    t3          |	                     tK          |          }|                    tM          |          d          }|J dtM          |                       |'                    ||          }|||d<   dS t          | j                  \  }}t          ||         ||           dS )a  Convert a weighted module to reference quantized module in the model
    If the QConfig of a QAT module is not set, the module will still be converted to
    a float module.

    Args:
      - node: The call_module node of the observed standalone module
      - modules: named_module of original model
      - observed_node_names: names for the set of observed fx node, we can skip
        this conversion if the node is not observed
    Nr   r  )	weight_ih	weight_hhweightz3No reference quantized module class configured for )(rt   rs   qconfigr   rr   weight_fake_quantto_floatr   r  r   rL   r   getr   r2   r!   rZ   r   r   	intrinsic_FusedModuleRNNCellBaser  r  r  r   updateLSTMGRU_flat_weights_namesrv   
startswithr   rY   r   r8   r   r   r&   
from_float) rC   rD   r  rF   r   r  r   original_moduler  weight_post_processqat_module_classesr
  r   is_observedpattern_to_dtype_configsdtype_configsis_weight_quantizedfused_modulefloat_modulewq_or_wq_dictweight_post_process_ihweight_post_process_hhweight_qparams_ihweight_qparams_hhwnr  is_ptqdeviceis_qat)root_module_to_quantized_reference_moduleref_qmodule_clsref_qmodules                                    rP   rA   rA     s#   & c$+../O)1G/??/#566 = .?)2244(55T$dO<<<)22K 	T#788 	 	 	  <NKK,00o1F1FKKM1'=II  .g66  L"L/58;#8#EFF '##A %m4M, 455 7C!(!1!1!(!1!1|5666|5666+,BCC+,BCC.. 	
 	
 	
 	
 
L58=%(,"?	@	@ *C 2 	I 	IB|R(( IR]]8-D-D I r22&-nn&6&6#&,;;''///$34G$H$Hb!	I %, 	/").."2"21,??F /#&&v...(  	5, 	56 	5 3444_-@AABBB 	6nEE . @CC$\22D O 	##i=YZf=g=gii 	$##!,,\=IIK%Q(55T$dK88888rS   	prev_nodec                    t          |t                    sJ d|             |j        dk    r[|j        dk    rR|                     ||j        d                    t          |j                  dk    r|                    |           dS dS dS dS )z
    Given a custom module `node`, if the previous node is a dequantize, reroute the custom as follows:

    Before: quantize - dequantize - custom_module
    After: quantize - custom_module
                 \ - dequantize
    zDExpecting the argument for custom module node to be a Node, but got r   r   r   N)	rr   r%   r   rs   r   ry   r  rz   r   )rC   r3  rq   s      rP   ,_remove_previous_dequantize_in_custom_moduler5  f  s     4  Z ZYiYYZ Z Z |}$$)9\)I)I	9>!+<===y1$$Y'''''	 %$)I)I %$rS   custom_module_class_mapping(statically_quantized_custom_module_nodesc                 2   |t          | j                           }t          | |          }|j        }t	          |          r|                    |            t          | |          rt          | j                  dk    r>t          | j        d         t                    rt          | j        d                   dk    sJ | j        \  }\  }	}
t          |t                    sJ t          |	t                    sJ t          |
t                    sJ t          | ||           t          | |	|           t          | |
|           nt          | |          rt          | j                  dk    sJ | j        \  }}}t          |t                    sJ t          |t                    sJ t          |t                    sJ t          | ||           t          | ||           t          | ||           nP| j        d         }t          |t                    sJ t          | ||           t          | |          }|J ||_        t          |||          }|                    |          }t#          | j                  \  }}t%          ||         ||           dS )a  Converts an observed custom module to a quantized custom module based on
    `custom_module_class_mapping`
    For static quantization, we'll also remove the previous `dequantize` node and
    attach the observer node for output to the module, the observer for the node
    will be converted to a dequantize node instead of quantize-dequantize pairs
    later in the graph. In the end we would have a quantized custom module that
    has the same interface as a default quantized module in nn.quantized namespace,
    i.e. quantized input and quantized output.

    Args:
      - node: The call_module node of the observed standalone module
      - graph: The graph containing the node
      - modules: named_module of original model
      - custom_module_class_mapping: mapping from observed custom module class to
        quantized custom module class, used to swap custom modules
      - statically_quantized_custom_module_nodes: we'll add the custom module node
        if we find it is statically quantized, this will be used later when converting
        observers to quant/dequant node pairs, if the observed node is a statically
        quantized custom module nodes, we'll convert the observer to a dequantize node,
        this is to keep the interface the same as the default quantized module.
        TODO: maybe we want to redesign this part to align with reference model design
        as well, but there has been some discussions around the interface, so we can do
        it later.
       r'      r   N)rt   rs   r   r  r   addr6   r  ry   rr   r   r%   r5  r7   r   r   from_observedr   r  )rC   rq   rD   r6  r7  observed_custom_moduler   r  inputshidden0hidden1queryr   r   r   r   quantized_custom_module_classquantized_custom_moduler
  r   s                       rP   r?   r?   {  s   > %S%5%56,T7;;I$,G)'22 (U044T:::!$00 &	U DI!##ty|U33 $	!%%****+/9(V'gwfd+++++gt,,,,,gt,,,,,8vuMMM8wNNN8wNNNN"411 	U ty>>Q&&&& $	E3eT*****c4(((((eT*****8ueLLL8sEJJJ8ueLLLL )A,Cc4(((((8sEJJJ&B4&Q&Q#*666=T": %D ;W% %! <II  %T[11KGK $(?@@@@@rS   Tconvert_custom_configis_standalone_module_remove_qconfig_flagqconfig_mappingc           
         |t                      }t          |t                    r0t          j        dt
          d           t          j        |          }t          |t                    r4t          j        dt
          d           |rt          j        |          nd}t          j	        |          }|t          |t                    sJ t          |t                    r0t          j        dt
          d           t          j        |          }|t                      }t          |           s
J d            | j        d         }|j        }	|j        }
|j        }|j        }t          |                     d	
                    }|r|j        }t          j	        |          }|j        rt+          ||           t-          | |           t/          ||           t1          | || j        ||	          }|                                D ]N\  }}||v sJ d| d            ||         /t7          |||                   sJ d| d| d||                      O|}t9          |j                  }|j        }|j        !t?          | |          }tA          | ||           tC          | |           g }| j        j"        D ]'}|j#        dk    r|$                    |j%                   (d}|
j&        }|
j'        }tQ          |          }tS          |*                                          }tW          |          }tY          |          }t[                      }t]          | j        j"                  D ]x}|j#        dk    r!|} |dz  }| |v rt_          || j                   /|j#        dk    rta          |          dk    rN|}!|j1        d         }"t          |"t\          tR          f          r"|D ]}#te          |"|#         |!| j                   t          |"tf          t          f          rd|v rte          |"|!| j                   t          j        dti          |"                      |j#        dk    rutk          ||          }$|$J tm          |$          rT|j1        d         }%|%|v rto          || j                   N|rtq          | |||	|           ets          | |||	|           zt          |$tt                    rto          || j                   tw          |$          rty          ||| ||           t{          |$          t[          |          >                    |          >                    |          v rAt{          |$          |v rt{          |$d                   |vr8t          |||||||           Ot{          |$          |v rt          || j        |||           z| j        A                                 t          | | j                  } |st          | ||	          } |rt          |            | E                                 | j        F                    dd           | S )a  
    We will convert an observed model (a module with observer calls) to a reference
    quantized model, the rule is simple:
    1. for each observer module call in the graph, we'll convert it to calls to
       quantize and dequantize functions based on the observer instance
    2. for weighted operations like linear/conv, we need to convert them to reference
       quantized module, this requires us to know whether the dtype configured for the
       weight is supported in the backend, this is done in prepare step and the result
       is stored in observed_node_names, we can decide whether we need to swap the
       module based on this set

    Args:
       * `is_standalone_module`: when this flag is True, it means we are quantizing
       a submodule that is not inlined in parent module, and will be quantized
       separately as one unit.

       * `is_decomposed`: a boolean flag to indicate whether we want to use the
        quantize operator for decomposed quantized tensor
        (torch.ops.quantized_decomposed.quantize_per_tensor) or default/standalone
        quantized tensor (torch.quantize_per_tensor)

    Returns:
         a quantized standalone module, whether input/output is quantized is
         specified by prepare_custom_config, with
         input_quantized_idxs, output_quantized_idxs, please
         see docs for :func:`~torch.ao.quantization.prepare_fx` for details
    NzPassing a convert_custom_config_dict to convert is deprecated and will not be supported in a future version. Please pass in a ConvertCustomConfig instead.r9  )
stacklevelzPassing a QConfig dictionary to convert is deprecated and will not be supported in a future version. Please pass in a QConfigMapping instead.zPassing a backend_config_dict to prepare is deprecated and will not be supported in a future version. Please pass in a BackendConfig instead.z-incoming model must be produced by prepare_fxr   F)remove_duplicatezExpected key z  in convert node_name_to_qconfigzExpected k zD to have the same value in prepare and convert QConfigMappings, but z was updated to placeholderr   r'   outputz1Unsupported node type for output_quantized_idxs: r   )Gr+   rr   r   r   r   FutureWarning	from_dictr   copydeepcopyr   r   r-   rX   rE   prepare_custom_configr  rF   named_modulesrG  r/  r4   r3   r0   r1   rq   r   r   r;   observed_to_quantized_mapping!equalization_node_name_to_qconfigr*   r)   r   r   r   r   r   input_quantized_indexesoutput_quantized_indexesr   r   r{   r   r   setrx   r   r  ry   r   r%   r   r5   r   r   r   r   r   r.   r@   r&   unionrA   r?   eliminate_dead_coder"   r/   r   delete_all_unused_submodulespop)&rB   r   rD  rE  rF  rG  r   r  observed_graph_module_attrsrE   rQ  r  rF   rD   prepare_qconfig_mappingmodules_copyconvert_node_name_to_qconfigkvcustom_module_classesr6  weight_eq_obs_dictgraph_inputsrC   placeholder_node_seen_cntinput_quantized_idxsoutput_quantized_idxsr0  root_module_classesr   fused_module_classesr7  cur_placeholder_node_idxreturn_noderL  r  modr   s&                                         rP   r>   r>     s   J $ 3 5 5'.. UQ		
 	
 	
 	
 !4 =>S T T/4(( 	
L		
 	
 	
 	
 :IRN$_555d 	 mO44O"j.&Q&Q""".$'' AK		
 	
 	
 	
 '0@@244u%%VV'VVVV"'*-K"L 	$6  	$9  %@$S2M2b 5&&&>>??G  <2M2]}W--&- 	E#O^DDD"5/:::12I?[[['E<o?Q(
 (
$ )..00 	 	DAq1111BqBBB 211+A.:%a)Ea)HII  P! P PP P.J1.MP P    <8;  #8"U"DP 9HHug'9::: %000 L! + +7m##	*** !"&;&S'<'U 	6nEE .   I N N P PQQ/??3NCC:=%%,U[&'' a a7m##'@$%*%'+???
 (ek:::W  ())Q.. KYq\F&4-00 0  C6s[%+    FT4L11 	 ---6v{EKXXXVVVV    W%%dG,,C???*3// ; $	! $LLLJek    % R! #.0    H! #.0    C-- $F%+    044  )'5,   
 .c22c:M6N6N6T6T"7 7e())* * 1559MMM4SV<<DWWW''("!     .c226KKK%K/<   
K##%%%u{++E  Q';=OPP
  	&&(((	JNN14888LrS   )FF)FNFTNNF)zrO  r   r   typingr   r   r   r   r   r   r	   r
   r   rZ   torch.ao.quantizationr   r   $torch.ao.quantization.backend_configr   r   *torch.ao.quantization.backend_config.utilsr   r   r   r   torch.ao.quantization.observerr   torch.ao.quantization.qconfigr   r   %torch.ao.quantization.qconfig_mappingr    torch.ao.quantization.quant_typer   torch.ao.quantization.quantizer   torch.ao.quantization.stubsr   torch.ao.quantization.utilsr   r   r   r   r   r    r!   torch.fxr"   torch.fx.graphr#   r$   r%   torch.nn.utils.parametrizer&   _decomposedr(   	_equalizer)   r*   custom_configr+   r,   graph_moduler-   r.   r/   qconfig_mapping_utilsr0   r1   r2   r3   r4   utilsr5   r6   r7   r8   r9   r:   r;   r<   r=   __all__r   r   r   r   r   int16int32r   r   r   r   r   r   choose_qparamsr   per_tensor_symmetricchoose_qparams_symmetricr   fxrt   r   Moduler   r   r   r   boolr|   rL   r   r   ru   r   r   r@   rA   r5  r?   r>   rm   rS   rP   <module>r     s      O O O O O O O O O O O O O O O O O O O O O O  F F F F F F F F                   G F F F F F D D D D D D D D @ @ @ @ @ @ 6 6 6 6 6 6 : : : : : : 3 3 3 3 3 3                  !           0 0 0 0 0 0 0 0 0 0 C C C C C C 2 1 1 1 1 1 B B B B B B B B C C C C C C C C M M M M M M M M , , , , , ,             
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
   
L	K	L	K	J	K	K		
  
UY;JQ		 > W ^! ~R8~R
~R #ux&'~R S%T	"223	~R
 sJ/~R 
~R ~R ~R ~RHJ#8J#
J# #ux&'J# S%T	"223	J#
 sJ/J# 
J# J# J# J#j	<
	<	<		< 	< 	< 	<eho $    


*.sJ*?
	
 
 
 
)K ) )SW ) ) ) ),
C 
t 
E 
d 
 
 
 
*66S%T	"2236 sJ/6 38_	6 6 6 6rD$ Du D D D D D
c58?23eho   =<
=<#ux&'=< 8=< 	=<
 ]+=< 
=< =< =< =<L  M9 M9
M9#ux&'M9 SM9 sJ/	M9
 "M9 M9 M9 
M9 M9 M9 M9`(
(((-(	( ( ( (*TA
TATA #ux&'TA "&idDj1A&A!B	TA
 /2$iTA 
TA TA TA TAr NR!&!%CGAER RRR !!4d38nd!JKR 	R
 R >4S>4?@R -c3h=>R R R R R R R RrS   