
    קg6                        d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZ d dlZd dlZd dlmZ d dlmZ d dlmc mZ d dlmZ d dlmZ d dlmZ dd	lmZmZmZ dd
lm Z  ddl!m"Z"m#Z#m$Z$  ej%        e&          Z'd Z(ed             Z)edej*        de
fd            Z+d"dZ,d Z-edej*        de
fd            Z. G d dej/                  Z0edej*        de
fd            Z1ed             Z2d Z3ej4        j5        Z5e5j6        e5j7        e5j8        e5j9        e5j:        e5j;        e5j<        e5j=        e5j>        e5j?        e5j@        e5jA        e5jB        e5jC        jD        e5jC        jE        e5jF        e5jG        e5jH        e5jI        e5jJ        e5jK        e5jL        hZM eeM          ZMed             ZNdee
ejO        f         fdZPd ZQd aRd ZSd ZTd#d!ZUdS )$    N)contextmanager)partial)CallableUnion)SymInt)get_decompositions)bind_symbols   )aot_function
aot_modulemake_boxed_compiler)strip_overloads)default_partition
draw_graph#min_cut_rematerialization_partitionc                     | j                             dt          j        j        j                  D ]}t          j        j        j        |_        |                                  | S )Ncall_functionoptarget)	graph
find_nodestorchopsaten_to_copytor   	recompile)fx_gnodes     V/var/www/html/ai-engine/env/lib/python3.11/site-packages/torch/_functorch/compilers.py_canonicalizer"   $   s[    
%%59>#: &   ( ( in'NNK    c               #      K   t           j                            d          } 	 d V  t           j                            |            d S # t           j                            |            w xY w)NF)r   _C_jit_set_autocast_mode)old_jit_autocast_flags    r!   _disable_jit_autocastr(   -   sd      !H;;EBB?''(=>>>>>''(=>>>>s   A !A)r   returnc                    t                      5  t          |            | j                            dt          j        j        j                  D ]V}t          |j	                  dk    r<t          |j
                  dk    r$d|j
        v rt          j        j        j        |_        W| j        j        D ]P}i }|j
                                        D ]+\  }}t          |t          j                  r|j        }|||<   ,||_
        Q| j                                         |                                  t          j                            |           }t          j                            |j                   t          j                            |                                          }t          j                            |          }t7          d |D                       s ||  ddd           n# 1 swxY w Y   |S )a  
    Compiles the :attr:`fx_g` with Torchscript compiler.

    .. warning::
        This API is experimental and likely to change.

    Args:
        fx_g(fx.GraphModule): The input Fx graph module to be compiled.

    Returns:
        Torch scripted model.
    r   r   r
   dtypec              3   T   K   | ]#}t          |t          j        j                  V  $d S N)
isinstancer   _subclasses
FakeTensor).0ts     r!   	<genexpr>zts_compile.<locals>.<genexpr>`   s2      MM1:a!2!=>>MMMMMMr#   N)r(   r   r   r   r   r   r   r   lenargskwargsr   r   nodesitemsr.   devicetypelintr   jitscriptr%   _jit_pass_remove_mutationfreezeevaloptimize_for_inferenceany)r   inpsr    
new_kwargskvfs          r!   
ts_compilerH   6   s    
	 	   J))uy~'> * 
 
 	0 	0D 49~~""s4;'7'71'<'<DKAWAW#in/J$ 	% 	%DJ))++ " "1a.. A !
1$DKK
IT""**17333IQVVXX&&I,,Q//MMMMMMM 	AtHH9              : Hs   GG--G14G1Tc                 R    t          | j                   t          | ||           | S )N)
clear_meta)printcoder   )r   _namerJ   s       r!   _draw_graph_compilerO   e   s,    	$)tTj1111Kr#   c                 H    t          t          t          |                     S )NrN   )r   r   rO   rQ   s    r!   draw_graph_compilerR   k   s    w':FFFGGGr#   c                     | S )z
    Returns the :attr:`fx_g` Fx graph module as it is. This is a no-op compiler
    and can be used to check accuracy.

    .. warning::
        This API is experimental and likely to change.

     r   rM   s     r!   noprV   o   s	     Kr#   c                   (     e Zd Z fdZ fdZ xZS )DebugInterpreterc                 b    t          | j        g|R  | _         t                      j        |  d S r-   )r	   modulesymbol_mappingsuperrun)selfr5   	__class__s     r!   r]   zDebugInterpreter.run}   s6    *4;>>>>Tr#   c                 P   
  fdfdfd

fd}t                                          |          }d|j        v rt          j        |j        d                   \  }}t          j        |          \  }}t          |          t          |          k    s)J t          |           dt          |                       t          t          t          |                    ||          D ]2\  }}	t          |	t          j
                  s! |||	 fd           3|S )Nc                     t          | t                    s| S t          j        | j        j                            j                            }|j        s
J |            t          |          S r-   )
r.   r   sympyexpandr    exprxreplacer[   	is_numberint)nirr^   s     r!   subst_symintz/DebugInterpreter.run_node.<locals>.subst_symint   s_    b&)) 	RW\2243FGGHHA;!!!!!q66Mr#   c                 :    t          fd| D                       S )Nc              3   .   K   | ]} |          V  d S r-   rT   )r1   rh   rj   s     r!   r3   zHDebugInterpreter.run_node.<locals>.subst_symint_tuple.<locals>.<genexpr>   s-      88bb))888888r#   )tuple)nisrj   s    r!   subst_symint_tuplez5DebugInterpreter.run_node.<locals>.subst_symint_tuple   s&    8888C888888r#   c                 ,    |                                            dk    rqt          | j                  D ]\} |                     |                    |                    |          k    r% |                     |                    dk    r dS ]dS )Nr   r
   FT)numelrangendimstridesize)abidxrj   s      r!   check_significant_stridesz<DebugInterpreter.run_node.<locals>.check_significant_strides   s    |AGGII&&** == % %C$QXXc]]33qxx}}DD(L5599$uu4r#   c           	         t          |          sJ | j        |j        k    s$J  |             d| j         d|j                      |                                           |                                k    s\J  |             d|                                  d |                                            d|                                              | |          }|s\J  |             d|                                  d |                                            d|                                             d S )Nz:  != z aka )callabler+   ru   rt   )nvrvdescsame_stridesry   ro   s       r!   checkz(DebugInterpreter.run_node.<locals>.check   s`   D>>!!!8rx'''DDFF)N)Nbh)N)NBH)N)N'''""27799--:::$&&ZZBGGIIZZ,>,>rwwyy,I,IZZrwwyyZZ ;::44R<<La a$&&``BIIKK``.@.@.M.M``SUS\S\S^S^``a a a a ar#   valr{   c                      d  dj          S )Nzoutput z where )r[   )ir^   s   r!   <lambda>z+DebugInterpreter.run_node.<locals>.<lambda>   s    &O&O&O$:M&O&O r#   )r\   run_nodemetapytreetree_flattenr4   ziprr   r.   r   Tensor)r^   nr   ri   n_valsn_specr_valsr_specr}   r~   ry   r   rj   ro   r_   s   `         @@@@r!   r   zDebugInterpreter.run_node   s{   	 	 	 	 		9 	9 	9 	9 	9	 	 	 	 			a 		a 		a 		a 		a 		a GGQAF??#0??NFF#033NFF v;;#f++---#f++/P/P3v;;/P/P--- s6{{!3!3VVDD Q Q	2r!"el33 b"OOOOOPPPPr#   )__name__
__module____qualname__r]   r   __classcell__)r_   s   @r!   rX   rX   |   sQ            / / / / / / / / /r#   rX   c                 *    t          |           j        S )z
    Returns a (slow) interpreter over the FX graph module that also checks
    various debugging properties (e.g., that tracing strides matched real
    strides.)
    )rX   r]   rU   s     r!   	debug_nopr      s     D!!%%r#   c                     t          |            t          j                            |           }t          j                            |                                          }|S r-   )r   r   r<   r=   r?   r@   )r   rM   rG   s      r!   simple_ts_compiler      sG    D	A	""AHr#   c                 ,    t          | t                    S r-   )r   r   )rG   s    r!   nnc_jitr      s    ,---r#   c                 .    t          | j                   | S r-   )rK   rL   rU   s     r!   print_compiler      s    	$)Kr#   fnc                     t           t           t          t          d}|                    |           t	          | t
          j        j                  rt          | fi |S t          | fi |S )a  
    Wrapper function over :func:`aot_function` and :func:`aot_module` to perform
    memory efficient fusion. It uses the
    :func:`min_cut_rematerialization_partition` partitioner to perform efficient
    recomputation. It uses NVFuser to compile the generated forward and backward
    graphs.

    .. warning::
        This API is experimental and likely to change.

    Args:
        fn (Union[Callable, nn.Module]): A Python function or a ``nn.Module``
            that takes one ore more arguments. Must return one or more Tensors.
        **kwargs: Any other overrides you want to make to the settings

    Returns:
        Returns a ``Callable``  or ``nn.Module`` that retains the eager behavior
        of the original :attr:`fn`, but whose forward and backward graphs have
        gone through recomputation optimizations, and the graphs have been
        compiled with nvfuser.

    fw_compilerbw_compilerpartition_fndecompositions)
rH   r   default_decompositionsupdater.   r   nnModuler   r   )r   r6   configs      r!   memory_efficient_fusionr      sr    6 "!;0	 F MM&"eho&& *"'''''B))&)))r#   c                     |                      d           t          dd |D              d           ddlm}   |                                            |  t          | |          S )NfooaQ  
##############################################################
# To minimize FX graph, copy and paste the below and run it  #
##############################################################

import torch
import torch.fx as fx
from functorch.compile import minifier, check_nvfuser_subprocess, check_nvfuser_correctness_subprocess

inps = c                 *    g | ]}|j         |j        fS rT   )shaper+   )r1   r   s     r!   
<listcomp>z!debug_compile.<locals>.<listcomp>  s!    ***!'17	***r#   a?  
inps = [torch.ones(shape, dtype=dtype, device='cuda') for (shape, dtype) in inps]
from foo import FxModule
mod = FxModule().cuda()

with torch.jit.fuser("fuser2"):
  # check_nvfuser_subprocess can be replaced with check_nvfuser_correctness_subprocess
  minifier(fx.symbolic_trace(mod), inps, check_nvfuser_subprocess)
r   )FxModule)	to_folderrK   r   r   cudarH   )r   rC   r   s      r!   debug_compiler     s    NN5		 	+*T***	 	 	  ( HHJJOOtdD!!!r#   c                 6   g }t          | d          5 }t          j        |          }g }|D ]}t          |          dk    r|} |t	          j                              }n|\  }}}}	}
|	t          j        t          j        t          j	        t          j
        t          j        t          j        t          t          hv rt          j        dd||	|
          }nt          j        ||	|
          }|                    |           	 ddd           n# 1 swxY w Y   |S )zZ
    Return a random input for the given inputs meta generated from _save_fx_default.
    rbr
   r   )r+   r9   N)openpickleloadr4   randomrandr   rg   int32int64booluint8floatrandintappend)input_data_pathinputsrG   inputs_metar   r:   inputr   rt   r+   r9   s              r!   
get_inputsr   3  sR    F	ot	$	$ !k!nn 	! 	!D4yyA~~V[]]++592eVUFIKKJIK	 	 	 "M!QU6RRREE!JuE&IIIEMM%    '	!! ! ! ! ! ! ! ! ! ! ! ! ! ! !. Ms   C.DDDc                 |    	
 ddl m} 	fd	 	fd

fd}
fd}
fd} ||||||t                    S )	aO  
    The forward, backward, and joint computation graph will be stored in
    {folder_name}/{current_name}/{current_name}_forward_{graph_index},
    {folder_name}/{current_name}/{current_name}_backward_{graph_index}, and
    {folder_name}/{current_name}/{current_name}_joint_{graph_index} respectively.
    The input shape of the graphs will be stored in the .input files.
    These files can be loaded with pickle,
    and is a list of format (type, shape, stride, dtype, device).
    In the case of type = int or float, it is just (type,).
    For joint graph input, it is a nested list [[],[]]
    where the two inner lists have the same format.
    If dump_example_input is True, example_inputs will be stored in .pt file.
    Since each function might produce multiple graphs,
    the graph_index is used to distinguish difference graphs
    r   )aot_module_simplifiedc                    g }t          |           dk    rEt          | d         t                    r*| | d                   z  }| | d                   z  }|S | D ]}t          |          t          k    st          |          t
          k    r$|                    t          |          f           V|                    t          |          |j        |                                |j	        |j
        f           |S )Nr   r
   )r4   r.   rm   r:   rg   r   r   r   rt   r+   r9   )r5   
input_metaargget_input_metas      r!   r   z(_save_fx_default.<locals>.get_input_metad  s    
t99q==ZQ77=..a111J..a111J 	 	CCyyC499#5#5!!499,////!!#YY	3::<<CJO    r#   c                    t          | j        j                  dk    r/t                              t          j        d|t                     d S t          j        |           }|j        	                    t          j        j                                                   |                                  |          }t          j         d d           |                     d d d| dt           	           t#          j        |t'           d d d| dt           d d| dt           dd                     r9t          j        | d d d| dt           d d| dt           d	           d S d S )
Nr   z!No nodes in graph {%s}_{%s}_{%s}./T)exist_okrM   z.inputwbz.pt)r4   r   r7   logloggingWARNINGgraph_indexcopydeepcopyset_codegenr   fxCodeGenr   osmakedirsr   r   dumpr   save)	
gm_to_saver5   	type_namegmr   current_namedump_example_inputfolder_namer   s	        r!   graph_saver_helperz,_save_fx_default.<locals>.graph_saver_helpers  sO   z%&&!++GG3   F]:&&
UX^3355666
#^D))

{33\33dCCCC
TT\TTLTT9TT{TT	
 	
 	
 	  H  H  H  H  H  Hy  H  H;  H  HYe  H  Hhq  H  Ht  H  H  H 	
 	
 	
  	J  E  E  E  E  E  Ey  E  E;  E  EYe  E  Ehq  E  Et  E  E  E    	 	r#   c                 "     | |d           | S )NforwardrT   )r   fw_argsr   s     r!   graph_saver_forwardz-_save_fx_default.<locals>.graph_saver_forward  s    2w	222	r#   c                 6     | |d           t           dz  a | S )Nbackwardr
   )r   )r   bw_argsr   s     r!   graph_saver_backwardz._save_fx_default.<locals>.graph_saver_backward  s'    2w
333q	r#   c                 >     | |d           t          | |          S )Njoint)r   )r   
joint_argsr   s     r!   graph_saver_jointz+_save_fx_default.<locals>.graph_saver_joint  s(    2z7333 Z000r#   r   )functorch.compiler   r   )r   r   r   r   example_inputsr   r   r   r   r   r   s   ```      @@r!   _save_fx_defaultr   R  s      877777    ! ! ! ! ! ! ! !F        1 1 1 1 1 ! 
'(&-   r#   Fc                 4    da t          t          | ||          S )as  
    Dump the forward, backward, and joint computation graph.
    Example Usage:
    save_fx_func = graph_dumper_aot(current_name, folder_name, dump_example_input = False)
    optimize_ctx = torchdynamo.optimize(
        save_fx_func
    )
    with torch.enable_grad():
        with optimize_ctx:
            result = forward_and_backward_pass(model, example_inputs)
    r   )r   r   r   )r   r   r   s      r!   graph_dumper_aotr     s     K#\;@RSSSr#   )T)F)Vr   r   r   r   r   
contextlibr   	functoolsr   typingr   r   rb   r   torch.fxr   torch.nnr   torch.utils._pytreeutils_pytreer   r   torch._decompr   %torch.fx.experimental.symbolic_shapesr	   aot_autogradr   r   r   compile_utilsr   partitionersr   r   r   	getLoggerr   r   r"   r(   GraphModulerH   rO   rR   rV   InterpreterrX   r   r   r   r   r   detachgelu_backwardleaky_relu_backwardsigmoid_backwardthreshold_backwardhardtanh_backwardhardsigmoid_backwardhardswish_backwardtanh_backwardsilu_backwardelu_backwardcudnn_batch_normcudnn_batch_norm_backwardmasked_fillScalarr   elu
leaky_reluhardtanh	hardswishhardsigmoidconj_physicalis_same_sizer   r   r   r   r   r   r   r   r   rT   r#   r!   <module>r     s     				   % % % % % %       " " " " " " " "               $ $ $ $ $ $ $ $ $       , , , , , , > > > > > > G G G G G G G G G G * * * * * *          g!!
   ? ? ? +R^ +h + + + +\   H H H 	bn 	H 	 	 	 	4 4 4 4 4r~ 4 4 4n &BN &( & & & &   . . . y~K"HOMN- 2 ,+,BCC    
$*h	!"$* $* $* $*N" " ":   >Y Y YzT T T T T Tr#   