
    קg5A                        d dl mZ d dlmZmZ d dlmZ d dlmZm	Z	m
Z
mZmZmZmZmZ d dlZd dlmZ d dlmc mZ d dlmZ d dlZd dlmZ d dlmc mZ d dlZd dl Z d dl!Z!d dl"m#Z# de$d	ee$e$f         fd
Z%dee         dej&        de	e$e
f         fdZ'dej&        de	e$e
f         dej        j(        fdZ)d*dej        j(        d	ej        j(        fdZ*dej(        d	ej(        fdZ+dej(        deej&                 deej&                 deej&                 fdZ,ej-        ej.        ej/        ej0        ej1        ej2        ej3        ej4        ej5        ej6        ej4        ej7        ej8        gZ9ej:        ej;        gZ<ej-        ej=        ej.        ej>        ej/        d iZ?deej&                 de	e$ej(        f         fdZ@deej&                 de	e$ej(        f         de	ej(        ej(        f         fdZA G d d          ZBd+d"ZCd#eBd	eDfd$ZE G d% d&          ZFdejG        fdej        j(        d'ee	e$e
f                  d(eejG                 d	ej        j(        fd)ZHdS ),    N)ArgumentTarget)fuse_conv_bn_eval)TypeDictAnyTupleIterableOptionalListcast)	ShapeProp)defaultdict)Enumtargetreturnc                 P    |                      dd          ^ }}|r|d         nd|fS )zp
    Splits a qualname into parent path and last atom.
    For example, `foo.bar.baz` -> (`foo.bar`, `baz`)
    .   r    )rsplit)r   parentnames      ^/var/www/html/ai-engine/env/lib/python3.11/site-packages/torch/fx/experimental/optimization.py_parent_namer      s3    
 MM#q))MVT&6!99B,,    patternnodemodulesc                 j   t          |j                  dk    rdS |j        d         |f}t          | |          D ]x\  }}t          |t          j                  s dS |j        dk    r dS t          |j        t                    s dS |j        |vr dS t          ||j                           |ur dS ydS )Nr   Fcall_moduleT)
lenargszip
isinstancefxNodeopr   strtype)r   r   r   nodesexpected_typecurrent_nodes         r   matches_module_patternr.      s    
49~~u"&)A,!5E'*7E':': 
 
#|,00 	55?m++55,-s33 	55g--55+,--]BB55 C4r   
new_modulec                     t          | j        t                    sJ t          | j                  \  }}||| j        <   t	          ||         ||           d S N)r%   r   r)   r   setattr)r   r   r/   parent_namer   s        r   replace_node_moduler4   -   sW    dk3'''''$T[11K%GDKGK $
33333r   Fmodelc                 n   t           j        t           j        ft           j        t           j        ft           j        t           j        fg}|st          j        |           } |rt          | t          j        j                  st          j        |           }n| }t          |                                          }t          j        |j                  }|D ]}|j        D ]}t%          |||          rt'          |j        d         j                  dk    r7||j        d         j                 }	||j                 }
|
j        sdt1          |	|
          }t3          |j        d         ||           |                    |j        d                    |                    |           ƌt          j        ||          S )z
    Fuses convolution/BN layers for inference purposes. Will deepcopy your
    model by default, but can modify the model inplace as well.
    r   r   )nnConv1dBatchNorm1dConv2dBatchNorm2dConv3dBatchNorm3dcopydeepcopyr%   torchr&   GraphModulesymbolic_tracedictnamed_modulesgraphr+   r.   r"   r#   usersr   track_running_statsr   r4   replace_all_uses_with
erase_node)r5   inplaceno_tracepatternsfx_modelr   	new_graphr   r   convbn
fused_convs               r   fuserR   3   s   
 BN+BN+BN+-H  %e$$ :eUX-ABB $U++8))++,,Ghn--I + +O 	+ 	+D%gtW== 
+ty|)**Q..ty|23T[)- .tR88
#DIaL':FFF**49Q<888$$T***	+ >(I...r   c                     t          j        |           } G d dt          j         j                  } ||                                          S )z5
    Removes all dropout layers from the module.
    c                   P     e Zd Zdedeedf         deeef         def fdZ	 xZ
S )&remove_dropout.<locals>.DropoutRemoverr   r#   .kwargsr   c                     t          | j        |         t          j                  rt	          |          dk    sJ |d         S t                                          |||          S )Nr   r   )r%   
submodulesr7   Dropoutr"   superr!   )selfr   r#   rV   	__class__s       r   r!   z2remove_dropout.<locals>.DropoutRemover.call_moduleZ   sW    $/&12:>> A4yyA~~~~Awww**64@@@r   )__name__
__module____qualname__r   r	   r   r   r)   r   r!   __classcell__)r\   s   @r   DropoutRemoverrU   Y   s~        	Av 	AeHcM6J 	AUYZ]_bZbUc 	Ahk 	A 	A 	A 	A 	A 	A 	A 	A 	A 	Ar   ra   )r&   rB   r@   Transformer	transform)r5   rM   ra   s      r   remove_dropoutrd   S   si      ''HA A A A A- A A A >(##--///r   orig_moduler+   inputsoutputsc                 P   t          j                    }i |D ]!}|                    |j                  }||<   "|D ] }|                    |fd          }||<   !|                    fd|D                        |                                 t          j        | |          S )z
    Given lists of nodes from an existing graph that represent a subgraph, returns a submodule that executes that subgraph.
    c                     |          S r1    )xenvs    r   <lambda>z"extract_subgraph.<locals>.<lambda>l   s    s1v r   c                      g | ]
}|         S rj   rj   ).0outputrl   s     r   
<listcomp>z$extract_subgraph.<locals>.<listcomp>n   s    888fc&k888r   )r&   Graphplaceholderr   	node_copyrp   lintrA   )	re   r+   rf   rg   rN   inputnew_noder   rl   s	           @r   extract_subgraphrx   b   s     

I"$C  ((44E

  &&t-=-=-=-=>>D		8888888999NN>+y111r   c                 *    t          j        |           S r1   )	th_mkldnnMkldnnBatchNorm)a_s     r   rm   rm      s    !:1!=!= r   c                    i }| D ]}|j         dk    rt          |j        t                    sJ ||j                 }t	          |          t
          v rqt          t	          |                   |t          j                  }t          |t          j	                  sJ t          j        |          ||<   t          |||           |S )z
    For each node, if it's a module that can be preconverted into MKLDNN,
    then we do so and create a mapping to allow us to convert from the MKLDNN
    version of the module to the original.
    r!   )r(   r%   r   r)   r*   
mkldnn_mapr@   floatr7   Moduler>   r?   r4   )r+   r   old_modulesr   
cur_moduler/   s         r   modules_to_mkldnnr      s     /1K ? ?7m##dk3///// -JJ:--'Z(8(89*ekRR
!*bi88888*.-
*C*CJ'#D':>>>r   r   c                     | D ]Q}|j         dk    rDt          |j        t                    sJ ||j                 }||v rt	          ||||                    RdS )za
    Maps each module that's been changed with `modules_to_mkldnn` back to its
    original.
    r!   N)r(   r%   r   r)   r4   )r+   r   r   r   r   s        r   reset_modulesr      su    
  L L7m##t{C00111 -J[((#D';z3JKKKL Lr   c                   $    e Zd Zdej        fdZdS )MklSubgraphfx_graphc                 >    || _         g | _        g | _        g | _        d S r1   )r   r+   start_nodes	end_nodes)r[   r   s     r   __init__zMklSubgraph.__init__   s#     $&
*,(*r   N)r]   r^   r_   r&   rr   r   rj   r   r   r   r      s1        + + + + + + +r   r   
   r   c                 H     dddt           dt          f fd}|S )aW  
    This generates a heuristic that can be passed into `optimize_for_inference` that
    determines whether a subgraph should be run in MKL by running it with the example_inputs.

    Example usage:
        heuristic = gen_mkl_autotuner(example_inputs, iters=10)
        fast_model = optimization.optimize_for_inference(model, heuristic)
    NrE   r   c                    | j         }	:| j        j        	| j        j        t	          	                                         d |D             t          t          t          j	                 d | j
        D                       }t          	| j        ||          
fd} |fd          }t          j        j        t                                                                |fd          }||k     S )Nc                 @    g | ]}t          j        |j                  S rj   )r@   randnshapero   r   s     r   rq   z@gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<listcomp>   s$    IIITTZ00IIIr   c                 (    g | ]}|j         d          S )r   )r#   r   s     r   rq   z@gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<listcomp>   s    *T*T*TD49Q<*T*T*Tr   c                     t                    D ]} |              t          j                    }t                    D ]} |             }t          j                    |z
  S r1   )rangetime)fr}   beginoutiterswarmups       r   	benchmarkz?gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.benchmark   sc    6]]  IKKE5\\  acc9;;&&r   c                  6    d  d  D              D             S )Nc                 6    g | ]}|                                 S rj   )to_densero   is     r   rq   zRgen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>.<locals>.<listcomp>   s     %o%o%oqajjll%o%o%or   c                 6    g | ]}|                                 S rj   )	to_mkldnnr   s     r   rq   zRgen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>.<locals>.<listcomp>   s!    GmGmGmZ[GmGmGmr   rj   sample_inputs	submodules   r   rm   z>gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>   s-    %o%oIIGmGm_lGmGmGm<n%o%o%o r   c                         S r1   rj   r   s   r   rm   z>gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>   s    		=(A r   )r   r   owning_moduler   r   	propagater   r   r&   r'   r   rx   r+   r   rE   rC   rD   )rE   input_nodesoutput_argsr   mkl_timeno_mkl_timer   r   example_inputsrM   r   r   r   s         @@r   use_mkl_heuristicz,gen_mkl_autotuner.<locals>.use_mkl_heuristic   s   '~3H.4Kh)).999II[III4=*T*TEO*T*T*TUU$Xu{KUU		' 	' 	' 	' 	' 	' 9oooooppio+T)2I2I2K2K-L-LkZZZi A A A A ABB+%%r   )r   bool)r   r   r   r   rM   r   s   ``` @@r   gen_mkl_autotunerr      s[     HK& & & & & & & & & & & &0 r   rE   c                 2    t          | j                  dk    S )z
    This is a heuristic that can be passed into `optimize_for_inference` that
    determines whether a subgraph should be run in MKL by checking if there
    are more than 2 nodes in it
       )r"   r+   )rE   s    r   use_mkl_lengthr      s     u{ar   c                   @    e Zd Zd ZdefdZdedefdZdedefdZd	S )
	UnionFindc                 2    d g|z  | _         dg|z  | _        d S )Nr   r   size)r[   ns     r   r   zUnionFind.__init__   s     ,06A: !sQw			r   vc                 .    || j         |<   d| j        |<   d S )Nr   r   )r[   r   s     r   make_setzUnionFind.make_set   s    A	!r   r   c                     | j         |         }||k    r|S |J |                     |          | j         |<   t          t          | j         |                   S r1   )r   findr   int)r[   r   pars      r   r   zUnionFind.find   sO    k!n88H3ACQ(((r   r|   bc                     |                      |          |                      |          }}||k    r|S | j        |         | j        |         k     r||}}|| j        |<   | j        |xx         | j        |         z  cc<   d S r1   )r   r   r   )r[   r|   r   s      r   joinzUnionFind.join   sz    yy||TYYq\\166H9Q<$)A,&&aqAA	!	!$r   N)r]   r^   r_   r   r   r   r   r   rj   r   r   r   r      s        ' ' '#    )c )c ) ) ) )%c %c % % % % % %r   r   pass_configtracerc                    dddt           id}|i }|                    |           |d         rt          |           } |d         rt          |           } |d         du r| S t	          |d         t
                    st          d	          d|d         vrt          d
          |d         d         } |            }|                    t          j	        |                     t          j        |j                  }t          |                                           } G d dt                    }t          j                  D ]}	|j        }
|	j        dk    r||	j                 }t)          |          t*          v rq|j        }
t/          |                                d          }|F|j        t4          j        k    s
J d            |j        t5          j        d          k    s
J d            n6|	j        dk    r+|	j        t*          v r|j        }
n|	j        t:          v r|j        }
|
|j        k    r|
|j        k    r t?          d |	j         D                       s!                    |	          5  t          j"        |	j         fd          }ddd           n# 1 swxY w Y   tG          tH          t          j%        j&                 |          |	_         '                    |	          5  (                    dd|	f          }|	)                    |           |	f|_         ddd           n# 1 swxY w Y   tU          t          j                  |          }|_+        j        D ]}	|	j        dk    r|	j        dk    r|	j         d         }t          |	j,                  }|D ]B}|j        dk    r5|j        dk    r*|)                    |           -                    |           Ct]          |	j,                  dk    r-                    |	           t]          j                  }t_          |          fdta          j                  D ]
\  }}	|	j        dk    r(|	j        dk    r||	_1        2                    |           9|	j        dk    r?|	j        dk    r4 |	j         d                   J  |	j         d                   |	_3        fd|	j4        D             }t]          |          dk    rt?          d |D                       rJ tk          |          }|d         |	_6        |dd         D ]}7                    |d         |           tq          fd          }j        D ]}	ts          |	d          r8|:                    |	j6                           j        ;                    |	           ts          |	d          r8|:                    |	j1                           j<        ;                    |	           ts          |	d          r8|:                    |	j3                           j=        ;                    |	           |>                                D ]l} ||          s_|j<        |j=        z   D ]9}	|	j         d         }|	)                    |           -                    |	           :t          |j        ||           md}j        D ]}	|	j        dk    s|	j        dk    r|dz  }t          jA        t                    C                    d |           D                                 t          j        |           }|S )!a  
    Performs a set of optimization passes to optimize a model for the
    purposes of inference. Specifically, the passes that are run are:
    1. Conv/BN fusion
    2. Dropout removal
    3. MKL layout optimizations

    The third optimization takes a function `use_mkl_heuristic` that's used
    to determine whether a subgraph should be explicitly run in MKL layout.

    Note: As FX does not currently handle aliasing, this pass currently
    assumes nothing aliases. If that isn't true, use at your own risk.
    T	heuristic)conv_bn_fuserd   mkldnn_layout_optimizeNr   rd   r   Fz+mkldnn_layout_optimize config is not a dictz4Heuristic not found in mkldnn_layout_optimize configc                       e Zd ZdZdZdZdS )*optimize_for_inference.<locals>.MklSupportr   r      N)r]   r^   r_   NOYESUNKNOWNrj   r   r   
MklSupportr     s        r   r   r!   z)this pass is only for torch.float modulescpuz!this pass is only for CPU modulescall_functionc              3   ,   K   | ]}|j         d k    V  dS )r   N)r   )ro   args     r   	<genexpr>z)optimize_for_inference.<locals>.<genexpr>7  s)      II3:3IIIIIIr   c                 2                         d| f          S )Nr   )call_method)r   r   s    r   rm   z(optimize_for_inference.<locals>.<lambda>:  s    h>R>RS^ab`e>f>f r   r   r   r   r   c                     t          | d          r                    | j                  S t          | d          r                    | j                  S d S )Ncolorstart_color)hasattrr   r   r   )r   ufs    r   	get_colorz)optimize_for_inference.<locals>.get_colorW  sT    1g 	$7717###1m$$ 	*771=)))tr   c                 p    g | ]2}t          |t          j                   |          ' |          3S r1   )r%   r&   r'   )ro   r   r   s     r   rq   z*optimize_for_inference.<locals>.<listcomp>r  sG    {{{1
STVXV]H^H^{bkbklmbnbnbz))A,,bzbzbzr   c              3      K   | ]}|d u V  	d S r1   rj   r   s     r   r   z)optimize_for_inference.<locals>.<genexpr>v  s&      9919999999r   r   c                  "    t                     S r1   )r   )r   s   r   rm   z(optimize_for_inference.<locals>.<lambda>}  s    H@U@U r   r   r   	end_colorzmkldnn conversions: %s)Er   updaterR   rd   r%   rC   RuntimeErrortracer>   r?   r&   rA   rootrD   r   listr+   r   r(   r   r*   mkldnn_supportedr   next
parametersdtyper@   r   devicemkldnn_supported_unknownr   anyr#   inserting_beforemap_argr   r	   r   r   inserting_aftercreate_noderH   r   r   rF   rI   r"   r   	enumerater   r   r   all_input_nodessortedr   r   r   r   r   appendr   r   valuesr   logging	getLoggerr]   inforu   )r5   r   r   default_pass_configr   
cur_tracerrM   r   r   r   supports_mkldnnr   sample_parametermkldnn_argsdense_xr   prv_noderF   user	num_nodescur_idx
cur_colorsother_colormkldnn_graphsrE   prvmkldnn_conversionsresultr   r   r   s                               @@@r   optimize_for_inferencer     sJ   & #."? 
 {+++>* U+, &u%%34==)*BCTJJ JHIII-.FGGGQRRR+,DEkRJe 4 455H~jox88H$()<)<)>)>$?$?G    T    X^$$ ' '$-7m## -JJ#333",.#'
(=(=(?(?#F#F #/+1U[@@@Bm@@@+2el56I6IIIIKnIIIW''{...",. 888","4jm++*"444IItyIIIII **400 h h j4f4f4f4fggh h h h h h h h h h h h h h h U27#34kBBDI))$// ' '"..}j4'RR**7333 $w' ' ' ' ' ' ' ' ' ' ' ' ' ' ' $D$8$8'BBK&H  	* 	*7m##z(A(Ay|H$$E . .7m++{0J0J..x888''---4:!####D))) HN##I	9		B    & #8>22 4 47m##{(B(B&DKK    W%%$+*C*C9TYq\**666&Yty|44DNN{{{{0D{{{J:!##99j99999999
++J#ADJ)!""~ 4 4
1{33334 -88U8U8U8U,V,VM J J4!! 	B"''$*--.4;;DAAA4'' 	N"''$"2334@GGMMM4%% 	J"''$.112<CCDIII
 %%'' = =  '' 	=)EO; * *il**3///##D))))%+w<<< $ $;+%%
)B)B!#h$$%=?QRRRMMOOO^E8,,FMs$   4JJ"	%J"	-6L//L3	6L3	)FF)r   r   )Itorch.fxr&   torch.fx.noder   r   torch.nn.utils.fusionr   typingr   r   r   r	   r
   r   r   r   r@   torch.nnr7   torch.nn.functional
functionalFtorch.fx.passes.shape_propr   r>   collectionsr   torch.utils.mkldnnutilsmkldnnrz   operatorr   r   enumr   r)   r   r'   r.   r   r4   rR   rd   rx   r:   Linearr;   ReLU	MaxPool2d	AvgPool2dAdaptiveAvgPool2drelu	transposesigmoid
avg_pool2dadaptive_avg_pool2dr   addmulr   MkldnnConv2dMkldnnLinearr   r   r   r   r   r   r   r   Tracerr  rj   r   r   <module>r,     sb         * * * * * * * * 3 3 3 3 3 3 I I I I I I I I I I I I I I I I I I I I                 0 0 0 0 0 0  # # # # # # & & & & & & & & &         -# -%S/ - - - -HTN "' DQTVYQYN    $4bg 4S#X 4EHO 4 4 4 4/ / /58? / / / /@0") 0	 0 0 0 02") 2DM 24PRPW= 2cghjhocp 2 2 2 2" Iry"."'2<rOc	JFAL!/  %L(,7 Iy%Iy%N==
T"'] T#ry.5I    $
Lbg 
Lc29n1E 
LTXY[YbdfdmYmTn 
L 
L 
L 
L+ + + + + + + +$ $ $ $L +  $        % % % % % % % %8 -1 ii i8?i$sCx.)i Oi X_	i i i i i ir   