
    קg>             	       v   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZmZmZ d dlZd dlmZ d dlZd dlZd dlZd dlm Z m!Z! d dl"m#Z# d d	l$m%Z%m&Z& d d
lm'Z'm(Z( d dl)m*Z* d dl+m,Z, d dlm-Z- d dl.m/Z/ d dl0m1Z1m2Z2 d dl3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9 d dl:m;Z; d dl<m=Z= d dl>m?Z? d dl@mAZA d dlBmCZC ddlDmEZEmFZF ddlGmHZHmIZImJZJmKZKmLZLmMZM ddlNmOZOmPZPmQZQmRZR ddlFmSZSmTZTmUZUmVZVmWZWmXZXmYZYmZZZm[Z[ ddl\m]Z]m^Z^m_Z_m`Z`maZambZbmcZcmdZd ddlemfZf ddlgmhZh ddlimjZjmkZkmlZlmmZmmnZnmoZo ddlpmqZqmrZr erd d lsmtZt dd!lumvZv d d"lwmxZx  ejy        ez          Z{ej|        }                    ezd#          Z~ej        j        Z ej                    Z eEj                    rd d$lmZ n
d%ed&ed'dfd(Zd)ej         d*ed'efd+Zd,ej        d'eej                 fd-Zd.ed'efd/Zd0e-d1ed'ee!ej        j        e-f         fd2Zd3e;d4eeedf                  d'dfd5Z G d6 d7ej        j                  ZdS )8    N)defaultdict)contextmanager)
ModuleType)AnyCallableDefaultDictDictIterableListNoReturnOptionalSequenceTupleTYPE_CHECKINGUnion)Expr)deviceTensor)get_decompositions)defakedynamo_timed)
LazyStringtrace_structured)make_channels_last_strides_for)
FakeTensor)GraphModule)BackwardState)magic_methodsmethod_to_operator)free_unbacked_symbolshas_free_symbolsresolve_unbacked_bindingsRuntimeAssertShapeEnvSymTypes)Graph)Node)no_dispatch)
OrderedSet)int_oo   )configir)BackendFeatureDeviceOpOverridesget_backend_featuresget_device_op_overridesget_wrapper_codegen_for_deviceinit_backend_registration)CppWrapperCodeGenErrorLoweringExceptionMissingOperatorWithDecompMissingOperatorWithoutDecomp)	ConstantFixedLayoutget_device_typeInputBuffer	Pointwise	Reduction
StorageBox	TensorBoxTorchBindObject)FALLBACK_ALLOW_LISTfallback_handler%fallback_node_due_to_unsupported_type	loweringsmake_fallbackmaybe_layout_constraintsneeds_realized_inputsunsupported_output_tensor)BaseSchedulerNode)SizeVarAllocator)convert_shape_to_inductorgather_origins get_cloned_parameter_buffer_nameget_sympy_Expr_dtype#maybe_get_suppress_shape_guards_ctxshould_assume_input_aligned)NullHandlerV)_EffectType)WrapperCodeGen)output_code_log
perf_hints)log_module_codeargskwargsreturnc                      d S N )rX   rY   s     Q/var/www/html/ai-engine/env/lib/python3.11/site-packages/torch/_inductor/graph.pyrW   rW   {   s        dtypecudac                 (   t           j        t           j        t           j        t           j        t           j        t           j        t           j        t           j        t           j	        t           j
        t           j        t           j        t           j        h}|r||                    t           j                   |                    t           j                   |                    t           j                   |                    t           j                   | |v S r\   )torchfloat32float64int64int32int16int8uint8boolbfloat16	complex32	complex64
complex128float16addfloat8_e4m3fnfloat8_e5m2float8_e4m3fnuzfloat8_e5m2fnuz)r`   ra   supported_dtypes      r^   supported_dtype_of_cpp_wrapperrw      s    

O  3E/000E-...E1222E1222O##r_   constant_bufferc                    t          | t          j        t          j        t          j        j        j        f          s
J d            t          | t          j        j        j                  rt          j        S t          | t          j                  rt          |           S | j
        rt          j        S | j        rt          j        S d S )Nzgget_constant_buffer_dtype only supports input of sympy.Symbol, sympy.Expr or sympy.core.numbers.Integer)
isinstancesympySymbolr   corenumbersIntegerrc   rf   rN   
is_integeris_floatrd   )rx   s    r^   may_get_constant_buffer_dtyper      s    %,
EJ4F4NO  q qpq q q /5:#5#=>> {/5:.. 5#O444! {		! }tr_   opc                 ,    d t           D             }| |v S )Nc                 ,    h | ]}t          |          S r]   )r   ).0ms     r^   	<setcomp>z"is_magic_method.<locals>.<setcomp>   s!    >>>1#A&&>>>r_   )r   )r   	magic_opss     r^   is_magic_methodr      s    >>>>>I?r_   objtargetc           	          |                     d          }| }t          |          D ]R\  }}t          ||          s-t          dd                    |d |                              t          ||          }S|S )N.z#Node referenced nonexistent target )split	enumeratehasattrRuntimeErrorjoingetattr)r   r   target_atomsattr_itriatoms         r^   getattr_recursiver      s     <<$$LH\** + +4x&& 	Rchh|BQB?O6P6PRR   8T**Or_   guser_visible_outputsc                    t           j        sdS t          j        t          j        h}t          j        t          j        t          j        t          j        t          j	        t          j
        t          j        t          j        t          j        t          j        t          j        t          j        h}dt"          j        j        dt(          t"          j        j                 fd}t/          | j                  D ]} ||          }|s||v r
d|j        d<   |j                            dd          r&|j        D ]} ||          }|s||vr
d|j        d<   t           j        s|r|j        |v r
d|j        d<   dS )a  
    Nodes like convolution/convolution_backward want its input to be dense.
    If we pad their inputs, we result in extra calls to copy kernels!  On the other hand, padding usually helps reduction.

    The pass finds nodes that dislike padding. These are nodes that can be reached
    from a convolution/convolution_backward in the backward direction without
    going thru a reduction.
    NnoderZ   c                     | j         dk    rEt          | j        t          j        j                  r!t          | j        d          r| j        j        nd S )Ncall_function_overloadpacket)r   rz   r   rc   _ops
OpOverloadr   r   )r   s    r^   _get_overload_packetz8mark_nodes_dislike_padding.<locals>._get_overload_packet   sX    
 w/))4;
(=>> * %677 * K''
 	
r_   Tdislike_paddingF)r,   comprehensive_paddingatenconvolutionconvolution_backwardvar_meansummeanprodanyaminamaxminmaxargminargmaxscatter_reducerc   fxr'   r   r   OpOverloadPacketreversednodesmetagetall_input_nodespad_outputsname)	r   r   ops_dislike_paddingops_like_paddingr   curr   priorprior_ops	            r^   mark_nodes_dislike_paddingr      s    ' ! 					

hm

	%*-	.

 

 

 

    / /!!#&& 	$$$*.CH&'8<<)511 	9, 9 9//66 #33348EJ01 "	/$	/ 000*.CH&'+/ /r_   c            !       	    e Zd ZU eej                 ed<   dej        de	e
ee         ee         f         e
ee         ee         f         f         fdZdej        de	eej                 eej                 f         fdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 dcdej        j        d	eeej                          d
ee         dee         dededeeedf                  dee         deeeej                 gef                  dededeeeef                  dee         ded          dee         ddf  fdZde
ej        j        j        ef         dedefdZedededefd            Z dedefdZ!dej        j        d	eej                 dedd fdZ"de#e$         fdZ%deddfd Z&dej        ddfd!Z'e(dej)        j*        j+        fd"            Z,d#edee
ej-        ej.        f                  fd$Z/d#ede
ej-        ej.        f         fd%Z0d#edej1        fd&Z2d#ede
eef         fd'Z3d(edef fd)Z4d*ej5        defd+Z6dd,d-ej.        d.edefd/Z7d0ee         defd1Z8d2e
e9ej                 ej        f         ddfd3Z:deddfd4Z;dedej        fd5Z<dee         d6e
e         defd7Z=	 ddd6edee         de-fd8Z>ded9eej                 defd:Z?d;ed(e	e@         d<eee@f         de
ee-df         f fd=ZAd;ed(ed<eeef         def fd>ZBed?ej        defd@            ZCd;ed(e	dA         d<eee@f         de
eDe-ejE        eFf         fdBZGd;ed(ed<edeHfdCZId;ed(ed<edeHfdDZJd;ed(e	e@         d<eee@f         ddf fdEZKdedFZLeMdGej        j$        fdH            ZNdIe
ej-        ejO        f         dJe	e
eejP        f         dKf         de
ej-        ejO        f         fdLZQdMej        j$        dNe	e         dOeeef         dPe	e         dQeeef         ddfdRZRdSej        j$        de@f fdTZSdedUZTdedVZUde	eee	ee$f                  f         fdWZVde	eee	ee$f                  f         fdXZWdfdZZXde	eee	eYef                  ee	eYeZf                  f         fd[Z[ed\eddfd]            Z\de]fd^Z^de]fd_Z_defd`Z`dee         fdaZadedefdbZb xZcS )gGraphLoweringgraph_outputsexrZ   c                 `   | j         rBt          |                                          t          |                                          fS ddlm}  |dt          | j        j                             }| j        	                    ||          \  }}}d |D             }d |D             }||fS )z
        Support dynamic shapes and dynamic strides by assigning variables
        to each dimension.  We duck-shape tensors, so if two tensors
        have the same size they get assigned the same symbolic variable.
        r   )ConstantSource__inductor_unknown_tensor_c                 ^    g | ]*}t          |t          j                  r|j        j        n|+S r]   rz   rc   SymIntr   exprr   r   s     r^   
<listcomp>z8GraphLowering.symbolic_sizes_strides.<locals>.<listcomp>&  s1    PPPaz!U\::APPPr_   c                 ^    g | ]*}t          |t          j                  r|j        j        n|+S r]   r   r   s     r^   r   z8GraphLowering.symbolic_sizes_strides.<locals>.<listcomp>'  s1    TTTAu|!<!<C!&++!TTTr_   )
reuse_shape_envrK   sizestridetorch._dynamo.sourcer   len
_shape_env
var_to_val,create_symbolic_sizes_strides_storage_offset)selfr   r   sourcer   r   _s          r^   symbolic_sizes_stridesz$GraphLowering.symbolic_sizes_strides  s      	,RWWYY779R		: :   <;;;;; $^NS1K-L-LNN F LL 	 QP4PPPTTVTTTV|r_   c                     d |                                 D             }d |                                D             }||fS )z+
        Primarily used to weights
        c                 6    g | ]}t          j        |          S r]   r{   r   r   s     r^   r   z6GraphLowering.static_sizes_strides.<locals>.<listcomp>0  s"    444Qa  444r_   c                 6    g | ]}t          j        |          S r]   r   r   s     r^   r   z6GraphLowering.static_sizes_strides.<locals>.<listcomp>1  s"    888q%-""888r_   )r   r   )r   r   r   r   s       r^   static_sizes_stridesz"GraphLowering.static_sizes_strides*  sD     54"''))44488BIIKK888V|r_   NFgmexample_inputs	shape_envgraph_idcpp_wrapperaot_moder   
layout_optextern_node_serializeris_inferenceis_const_graphconst_output_index
const_codeconst_moduler   c                    t                                          |           || _        ||n|                     ||
          | _        d| _        |
| _        || _        || _        || _	        d| _
        |t                      }d| _        n|| _        d| _        || _        |                                 |j                                        | _        t%                      | _        t)          |          | _        g | _        i | _        i | _        t%                      | _        |r|j        nt%                      | _        |r|j        nt%                      | _        d| _        g | _        g | _        |r|ni | _        |r!t%          |                                           nt%                      | _!        |r|j"        ni | _"        i | _#        i | _$        t%                      | _%        t%                      | _&        t%                      | _'        t%                      | _(        t%                      | _)        t%                      | _*        d | _+        d | _,        g | _-        ddl.m/} ta          j1                    r|	r|	n|| _2        d | _3        i | _4        t%                      | _5        g | _6        i | _7        tq          tr                    | _:        i | _;        ty          j<                    | _=        || _>        || _?        || _@        i | _A        || _B        || _C        t          t                    | _F        d | _G        | j        r| H                                nt%                      | _I        dh| _J        ||ni | _K        t          |jM        |           d| _N        d| _O        g | _P        d | _Q        i | _R        |S                                | _T        | jU        jV        W                    di           | _X        ||jY        ni | _Y        t                        t          j\        d           t                    | _]        i | _^        t%                      | __        t%                      | _`        t%                      | _a        d S )	N)r   r   FT)extern_node_json_serializerzaten.convolution_backward  dynamo_flat_name_to_original_fqn)bsuper__init__r   decide_layout_optr   num_channels_last_convr   r   r   r   extra_tracebackr$   r   r   freeze_runtime_assertsdeferred_runtime_assertscopyras_by_symbolr)   bound_unbacked_symbolsrJ   sizevarsgraph_input_namesgraph_inputsgraph_inputs_originalzero_dim_cpu_tensor_listdevice_typesdevice_idxsra   buffers
operationsr   keysfolded_constants	constantstorchbind_constantsconstant_reprsremoved_operationsremoved_buffersremoved_inplace_buffersmutated_buffersnever_reuse_buffersinplaced_to_remove
device_opswrapper_codeextern_kernel_nodes&torch._inductor.extern_node_serializerr   r,   	is_fbcoder   current_nodelistsmutated_inputsmutated_input_idxsname_to_bufferr   listname_to_users
name_to_optimecreation_timer   r   record_multi_kernel_choicemulti_kernel_to_choicer   r   next_post_grad_graph_counterpost_grad_graph_id	schedulerfind_nodes_prefer_channels_lastnodes_prefer_channels_last_warned_fallbackr   r   graph	cache_key
cache_pathcache_linemapdisable_cudagraphs_reasondevice_node_mapping__copy__orig_gmmoduler   r   r   allocated_constant_namer3   	functools	lru_cacher0   effectful_opsaligned_inputsno_fuse_buffer_namesall_codegen_kernel_names)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   	__class__s                    r^   r   zGraphLowering.__init__4  s`   ( 	, % J'''FF 	
 '(#(,$($ 

I#(D  'DO#'D # 	((*** .3355 	 AK#(33,.24=?"9C%)5GL%%:<< 	 )5FL$$*,, 	 	(*.0"4<" 	
 "J)..00111 	 '3:L"" 	 FH .03=<<0:8B$0:4>LL 3=<<-1,0>@ VVVVVV !!-&<-"", 	# ,0+-
/9||-/46@KD@Q@Q35!Y[[	&
 +6'68#  "&'?"@"@>B6:oWD00222:<< 	' "= =$8$D  " 	! 	#28-ABBB !  	 9=& GI -/[[]]040@0D0D.1
 1
- 5A4LL00RT 	$ 	"###$=I$7$=$=>R$S$S!;=/9||5?\\! :D%%%r_   r   featurec                     t          |t                    s
J |            ||                     t          |                    v S r\   )rz   r.   r0   r:   )r   r   r;  s      r^   has_featurezGraphLowering.has_feature  sC     '>22;;G;;;$33OF4K4KLLLLr_   c                :   t           j        sdS t           j        rdS d | j        j        D             }t          |          }|dk    rdS t          j        j        j	        r>t          j        j        
                                rt          d |D                       rdS t          t          | j        j                            d|z  k    rt                              d           dS t          d |D                       rt                              d	           dS d
t           dt"          fd}d
t          j        j        dt"          fd}d
t          j        j        dt"          fd}|rddlm} t-          t.                    }|D ]}	t          j        j                            |	          \  }
}}|
r |d          5 }t6          j        5   |	j        |i | ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   |                                } ||	          rd}n ||	          rd}n ||	          rd}nd}||xx         |z  cc<   t                              d           d}d}d}d}t?          |                                           }|d         |z  |d         |z  z   |d         |z  z   |d         |z  z   }||k    }|st                              d||           |S t          tC          ||                    rt                              d           dS t          tC          ||                    rt                              d           dS t          tC          ||                    rt                              d           dS dS )zl
        Decide if we should enable layout optimization for this graph based on
        heuristics.
        FTc                 Z    g | ](}|j         t          j        j        j        j        k    &|)S r]   )r   rc   opsr   r   default)r   ns     r^   r   z3GraphLowering.decide_layout_opt.<locals>.<listcomp>  s5     
 
 
UY^5O5W)W)WA)W)W)Wr_   r   c              3      K   | ]<}d D ]7}|j         |         j        d         j        t          j        d          k    V  8=dS )r   r+   valcpuN)rX   r   r   rc   r   rB  idxs      r^   	<genexpr>z2GraphLowering.decide_layout_opt.<locals>.<genexpr>  sm        !   s '.%,u2E2EE      r_   i,  z*Skipped layout opt because only a few convc              3   j   K   | ].}d D ])}t          |j        |         j        d                   V  */dS )rD  rE  N)r!   rX   r   rG  s      r^   rI  z2GraphLowering.decide_layout_opt.<locals>.<genexpr>  sc       
 

 
  QVC[-e455
 
 
 
 
 
 
r_   zeSee perf regression with dynamic shape. Follow up in https://github.com/pytorch/pytorch/issues/102670rB  rZ   c                     | j         d         j        d         }t          |t          j                  sJ | j         d         dk    o|                    d          dk    S )Nr+   rE  )rX   r   rz   rc   r   r   )rB  meta_vals     r^   
is_groupedz3GraphLowering.decide_layout_opt.<locals>.is_grouped  sR    vay~e,Hh555556":>:hmmA&6&6&::r_   c                    | j         d         j        d                             d          dz  | j         d         j        d                             d          k    o.| j         d         j        d                             d          dk    S )Nr+   rE  r      rX   r   r   rB  s    r^   is_in_out_channelz:GraphLowering.decide_layout_opt.<locals>.is_in_out_channel	  ss    q	u%**1--1QVAY^E5J5O5OPQ5R5RR 6F1IN5)..q11A5r_   c                     | j         d         j        d                             d          dk    o.| j         d         j        d                             d          dk    S )Nr+   rE  r   @   rQ  rR  s    r^   is_small_channelz9GraphLowering.decide_layout_opt.<locals>.is_small_channel  sR    q	u%**1--3 8F1IN5)..q11R7r_   )FlopCounterMode)displayNgroupedsmallin_outrA  zConv inputs meta not foundg|?5^?gtV?g333333?guV?zhSkipped layout opt in inference because weighted flops indicate slowdown, default: %d, channels last: %dzFSkip layout opt because found grouped convolution with >1 in_channels!zBSkip layout opt because some convolutions have smaller out_channelz>Skip layout opt because all convolution channels are too small)"r,   layout_optimizationforce_layout_optimizationr*  r   r   rc   backendsmkldnnenabledis_availableallr  logdebugr   r   rk   r   r'   torch.utils.flop_counterrW  r   float	_inductorfx_utilsget_fake_args_kwargsrR   	fake_moder   get_total_flopsr   valuesmap)r   r   
conv_nodesnconvrN  rS  rV  rW  flop_countsr   successrX   rY   flop_counter_modecounted_flops	node_typeGROUPED_MULTIPLIERDEFAULT_MULTIPLIERIN_OUT_MULTIPLIERSMALL_MULTIPLIERtotal_flopsweighted_flopsdo_layout_opts                          r^   r   zGraphLowering.decide_layout_opt  s	    ) 	5+ 	4
 
x~
 
 

 JA::5 N!)		%2244		   #    		 4
 tBHN##$$e33IIBCCC5 
 

 
 
 
 
 	
 IIw   5	;# 	;$ 	; 	; 	; 	;
	 	4 	 	 	 		 	$ 	 	 	 	  3	!@@@@@@,7,>,>K" < <(-(@(U(U) )%v  <(777 9;L[ 9 9'DK88889 9 9 9 9 9 9 9 9 9 9 9 9 9 99 9 9 9 9 9 9 9 9 9 9 9 9 9 9 %6$E$E$G$GM!z$'' .$-		))$// .$+		**400 .$,		$-		***m;****II:;;;;
 "'!& %$k002233K I&);;g&)99:h'*;;< i(+==>  +k9M  		~"  
 ! & s:z**++ 	IIX   5 s$j1122 	IIT   5 s#Z0011 	IIVWWW5ts6   =G;
G$G;$G((G;+G(,G;;G?	G?	c                 ,    | j         | j          d| S |S )z2Prepend the given name with the graph name if any.Nr   r   r   r   s     r^   qualify_namezGraphLowering.qualify_namew  s%    9 i(($(((r_   subgraph_namec                     t          ||| j        | j        | j        | j        | j        |                     |                    S )a  
        Make a subgraph of the current graph with all inherited
        parts, except the graph module (`gm`) and `example_inputs`.
        The subgraphs are lowered separately, but intended to be
        inlined in the parent graph's codegening. Hence the need
        for maintaining the same `shape_env` and other properties.
        The subgraph name is qualified by the parent graph's name.
        )r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r  )r   r   r   r  s       r^   make_subgraphzGraphLowering.make_subgraph}  sN     )o(]#'#>*""=11	
 	
 	
 		
r_   c                    t                      }t          | j        j        j                  D ]a}|j        t          j        j        j	        j
        k    r|                    |           <|j        D ]}||v r|                    |            nb| j        j        j        D ] }||v r|                    |j                   !|S )aC  
        The rule to decide if an node prefer channels last is simple.
        1. if it's input/output of a convolution
        2. if one of its user prefers channels last

        We have rule 1 because cudnn runs a faster convolution kernel for channels last inputs;
        Rule 2 is also important. It makes sure that indirect inputs to convolution also prefers
        channels last.

        Consider the scenario: conv -> batch-norm -> relu -> conv
        Without rule 2, batch-norm output may use a contiguous layout. That will cause 2 extra copies:
        1. the output of batch-norm should be channels last initially since its input is a conv's output.
           Forcing the batch-norm's output to be contiguous results in the first copy
        2. The second conv's input is initially contiguous. This layout is propagated from the batch-norm's output.
           We need convert it to channels last layout which results in the second copy.
        With rule 2, we makes sure all the tensors in the chain uses channels last layout. So both copies
        can be saved.
        )r)   r   r2  r*  r   r   rc   r@  r   r   rA  rq   usersupdate)r   
output_setrB  users       r^   r'  z-GraphLowering.find_nodes_prefer_channels_last  s    & (2||
$++122 	 	Ax59>5===q!!!  :%%NN1%%%E &$ "( 	+ 	+AJ!!!'***r_   c                     || j         vr7| j                             |           t                              d|           d S d S )NzUsing FallbackKernel: %s)r)  rq   perf_hint_loginfor~  s     r^   warn_fallbackzGraphLowering.warn_fallback  sL    t,,,!%%d+++94@@@@@ -,r_   c                     | j                             |j                   |j        | j                            |j                   t
          j        j        r$|| j        vrt
          j        j        | j        |<   d S d S d S r\   )	r  rq   typeindexr  rR   r*  r  r/  )r   r   s     r^   add_device_infozGraphLowering.add_device_info  s    fk***<#  ...7 	DF$2J$J$J/0w/CD$V,,,	D 	D$J$Jr_   c                     t           j        S r\   )rR   rj  r   s    r^   rj  zGraphLowering.fake_mode  s
    {r_   buffer_namec           	      8   || j         v r| j         |         S || j        v r| j        |         S || j        v rdt          j        j        |         }t          j        |t          j        |j        |j	        gt          j        
                    |          R            S d S r\   )r  r   r	  rR   r*  r-   ConstantBufferr9   r   r`   r   )r   r  datas      r^   try_get_bufferzGraphLowering.try_get_buffer  s     $---&{33$+++$[11$.((7$[1D$K./g.J.J4.P.P     tr_   c                 X    |                      |          }||S t          d|           )Nz$Failed to find buffer matching name )r  r   )r   r  bufs      r^   
get_bufferzGraphLowering.get_buffer  s6    !!+..?JO+OOPPPr_   c                 z   || j         v r| j         |         j        S || j        v r| j        |                                         S || j        v r| j        |                                         S t          j        d|          }|r(|                     |                    d                    S t          d|           )Nz1(as_strided|reinterpret_tensor)\(([a-zA-Z0-9_]+),r+   could not find )	r	  r`   r  	get_dtyper   rematchgroupKeyError)r   r  r   s      r^   r  zGraphLowering.get_dtype  s    $.((>+.44$---&{3==???$+++$[1;;===HI;WW 	.>>!''!**---666777r_   c                 h   ddl m} || j        v r| j        |                                         S || j        v rB| j        |         }t          t          |dd           |          rdS |                                S || j        v r| j        |                                         S t          d|           )Nr+   )MultiOutputLayoutlayoutr  )
r-   r  r	  numelr  rz   r   	get_numelr   r  )r   r  r  r  s       r^   r  zGraphLowering.get_numel  s    ))))))$.((>+.44666$---%k2C'#x668IJJ q==??"$+++$[1;;===666777r_   rX   c                     t          d          5   t                      j        | cd d d            S # 1 swxY w Y   d S )NzGraphLowering.run)r   r   run)r   rX   r:  s     r^   r  zGraphLowering.run
  s    -.. 	& 	&577;%	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	&s   488r   c                    |j         J d|             t          |t          j                  sJ |                     dt          | j                             }| j                            |           || j        |<   ||_         |S )NzOperation registered twice: r   )	operation_namerz   r-   	Operationr  r   r  appendr  )r   r   r   s      r^   register_operationz GraphLowering.register_operation  s     ((*M*M*M((("bl+++++  !<c$/&:&:!<!<==r""" " r_   set_namebufferr  c                   |                      dt          | j                             }| j                            |           || j        |<   t          |t          j                  r|                                s;|	                                '| 
                    |	                                           |r||_        |S )Nr  )r  r   r  r  r  rz   r-   ComputedBufferis_zero_elements
get_devicer  r   )r   r  r  r   s       r^   register_bufferzGraphLowering.register_buffer  s      !:s4<'8'8!:!:;;F###$*D! FB$566	6;A;R;R;T;T	6 !!##/  !2!2!4!4555 	FKr_   operation_namesc                 p    |                      dd                    |          z             }|| j        |<   |S )Nlist_r   )r  r   r  )r   r  r   s      r^   register_operation_listz%GraphLowering.register_operation_list&  s7      388O+D+D!DEE*
4r_   node_outputc                      dt           t          t          j                 t          j        f         dd f fd |           d S )NvaluerZ   c                    t          | t          t          f          r| D ]} |           t          | t          j                  r7|                                 D ]$}j        |                             |            #d S d S r\   )rz   r  tupler-   r?   get_read_namesr  r  )r  x	read_nameregisterr   s      r^   r  z1GraphLowering.register_users_of.<locals>.register.  s    %$//      AHQKKKK%.. @!&!5!5!7!7 @ @I&y188????@ @@ @r_   )r   r
   r-   IRNode)r   r  r  s   ` @r^   register_users_ofzGraphLowering.register_users_of+  si    	@E(29"5ry"@A 	@d 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	r_   c                     t          |t                    sJ | j                            |           || j        vrdS | j        |         D ]}|                                 dS )z
        When a buffer is mutated we need to make sure all the reads to
        the old version are realized before the mutation happens.
        N)rz   strr  rq   r  realize)r   r   r  s      r^   mark_buffer_mutatedz!GraphLowering.mark_buffer_mutated8  st    
 $$$$$$  &&&t)))F&t, 	 	DLLNNNN	 	r_   c                     || j         v r	|| j        v sJ d|z               t          | j         |                   }|| j        j        v r| j        j        |         n| j        |         S )z
        In AOTI, module buffers may have been mutated during the tracing and compilation.
        Thus we need to read from previously stored original buffers, to make sure the
        generated model.so uses correct initial values.
        z$Can not find the original value for )r3  r	  rM   r2  r   )r   r   	orig_names      r^   get_original_value_of_constantz,GraphLowering.get_original_value_of_constantF  s     t3338N8N8N2T9 9O8N8N 5T5QRV5WXX	 DK,,, KY''%	
r_   r  c                 p   |}t           j        j        s| j                                        D ]\  }}|j        s|                                |                                k    r|                                |                                k    r|j        |j        k    r|j	        |j	        k    r||
                                                                |
                                                                k    r.|                                |                                k    r|c S |dt          | j                   }|J |d                                         rd| }|                     |          }t!          j        dd|          }|}d}|| j        v r| d| }|dz  }|| j        v || j        |<   |j	        d|j        dt%          |                                          dt%          |                                          dt'          |          d	| j        |<   || j        |<   |S )	Nconstantr   	constant_z[^a-zA-Z0-9_]r   r+    r  )r,   aot_inductoruse_runtime_constant_foldingr	  items	is_mkldnnr   r   r`   r   untyped_storagedata_ptrstorage_offsetr   isdigitr  r  subr  hashr  r3  )r   r   r  r  constant_namer  prefixcnts           r^   allocate_non_dup_const_namez)GraphLowering.allocate_non_dup_const_nameV  sY    	"? 	)(,(<(<(>(> ) )$u
)		uzz||3377
ek11u|33,,..7799,,..7799: :++--1E1E1G1GGG((((<3c$.1133D7?? 	&%t%%D  && (#t44dn$$$$s$$D1HC dn$$  $t{  tz  TYY[[!! &+DKKMM&:&: Dzz  	D!
 .7$T*r_   c                     |                      ||          }t          j        t          j        |t          |j        |j        g|                     |          R                      S r\   )	r  r?   creater-   r  r9   r   r`   r   )r   r  r   new_names       r^   add_tensor_constantz!GraphLowering.add_tensor_constant  si     33D$??DKVd6O6OPT6U6UVVV 
 
 	
r_   device_overridec                 >   | j         |         j        |k    s||S t          j        j                                        5  |                     | d|j         |j        pd | j         |         	                    |                    cddd           S # 1 swxY w Y   dS )z
        We AOT copy constants to the devices they are needed on.
        If device_override doesn't match the constant's device, then
        copy it and return a different name.
        Nr   r   )
r	  r   rc   utils_python_dispatch_disable_current_modesr  r  r  to)r   r   r  s      r^   r  zGraphLowering.constant_name  s     >$&/99_=TK[)@@BB 	 	 33LL/.L0E0JLLt$''88 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   ABBBr   rY   c                    t                                          |||          }| j                            |           t	          |t
                    r|j        j        }|| j        |<   |S t	          |t          t          t          f          r t          j        |          }|| j        |<   |S |d S t	          |t                    rd S t	          |t          j                  s
J |            |j        s|                     |          \  }}n|                     |          \  }}|                     |          }t+          j        t/          |t1          |j        |j        ||                              }|| j        |<   |j        j        | j        |<   | j        j        r|                     |j                   tA                      5  tC          |          r| j"        #                    |           d d d            n# 1 swxY w Y   |S r\   )$r   placeholderr   r  rz   r%   r   r   r   intrk   rf  r{   sympifyr   rc   r   _has_symbolic_sizes_stridesr   r   r  r?   r  r;   r9   r   r`   r  r  r  r  r  rO   rP   r7  rq   )
r   r   rX   rY   exampler   sizesstridestensorr:  s
            r^   r  zGraphLowering.placeholder  s@    ''%%fdF;;%%f---gx(( 		<$D(,Df%K#tU!344 	=))D(,Df%K_4g}-- 	 4'5<0099'999
 2 	B!66w??NE77!88AANE7""6**!GNGM5'JJ 
 
 %+&!-3[-="6*" 	1  000 122 	0 	0*733 0#''///	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 s   *G>>HHc                 D   |t           j        u rKt          |d         t          t          t
          f          r#t                                          |||          S t          |t          j	        j
                  st          |d          r ||i |S |t          vrt          |t          j	        j                  sJ | d            |                                                    d          d         }|t           v rt#          |           nt$          j        r^t)          |g          rt*          nt,          }t.                              d|                    |||                     t#          |           n2t)          |g          rt+          |||          t-          |||          	 t.                              dt          |                    t          |         |i |}|S # t6          $ r0}t9          ||||                              |j                  d d }~ww xY w)Nr   _inductor_lowering_functionz is not an OpOverloadr   z"Creating implicit fallback for:
%sz  via %s)operatorgetitemrz   r  r  dictr   r   rc   r   r   r   rD   r   r   r   rA   rE   r,   implicit_fallbacksr   r6   r7   rc  r  operator_strrd  	Exceptionr5   with_traceback__traceback__)	r   r   rX   rY   	base_nameerrorouter:  s	           r^   r   zGraphLowering.call_function  sG   X%%%*T!WtUD>Q*R*R%77((v>>> &%*"=>> 	+71D
 D
 	+ 64*6***""
-  0 0///0 0 0 ++C003I///f%%%%* I *6(336--5 
 9&&vtV<<   f%%%%#VH-- I 0fEEE264HHH	IIj)F"3444F#T4V44CJ 	 	 	#AvtV<<KK 	s   +9G% %
H/+HHtc                 T    t          | j                  dk    o| j        d         dk    S )zM
        True if this is a small constant attr that will be inlined.
        r+   r      )r   shape)r  s    r^   can_inline_constantz!GraphLowering.can_inline_constant  s&    
 17||q 4QWQZ1_4r_   r]   c                    t          | j        |          }t          |t          j        j                  rt          j        ||          S t          |t          j        j	                  r$|| j
        |<   d| j        |<   t          ||          S t          |t          j                  sJ t          j        j        st          j        st%          |          r|                     ||          S t)                      5  |j        dk    r9t-          |                                |j        |j                  cd d d            S |                     |          rdt6                              dt;          |                     ddlm}  ||                                 |j        |j                  cd d d            S 	 d d d            n# 1 swxY w Y   |                     ||          S )N)r   graph_moduler   r]   zInlining constant: %s r+   )r  )r`   r   )!r   r2  rz   rc   r   r   r-   Subgraph_CScriptObjectr
  r  r@   r   r,   r  r  always_keep_tensor_constantsrH   r  r(   r  r8   itemr`   r   r  rc  rd  r  loweringr  tolist)r   r   rX   rY   r  r  s         r^   get_attrzGraphLowering.get_attr  sZ    "$+v66eUX122 	@;F????eUX233 	2/4D$V,*,D'"65111%.....<	;2	; )//	;
 ++E6:::]] 	V 	V{b  

ek5<HH	V 	V 	V 	V 	V 	V 	V 	V ''.. V		2CKK@@@,,,,,,vellnnEKUUU	V 	V 	V 	V 	V 	V 	V 	VV	V 	V 	V 	V 	V 	V 	V 	V 	V 	V 	V 	V 	V 	V 	V ''v666s   :8G?A,GG	G	c                     t           r\   AssertionErrorr   r   rX   rY   s       r^   call_modulezGraphLowering.call_module-      r_   c                     t           r\   r  r  s       r^   call_methodzGraphLowering.call_method0  r  r_   c                    t                                          |||          }t          |t          t          f          s|f}t          |t          t          f          sJ t          |                      t          d |D                       s
J |            t          j        j	        j
        d         }t          |t          t          f          s|f}d |D             }g }t          |          t          |          k    sJ t          ||          D ]\  }}t          |t          j        t          j        f          s|                    |           A|                    |                     ||j        d                                                              || _        | j                                        D ]I\  }	}
t          |
t          t.          j        f          sJ dt          |
                       t          |
t                    sW|
                                 t          |
t                    sJ |
j        }
t          |
t          j                  sJ |
}|
j        }
t          |
t8                    r|
                                |	k    rnt          j                            |
| j         |	                    	 | j        !                    |          }| j         |	         | j        |<   8# tD          $ r Y Ew xY wK| #                                 tH          %                    d| j&        | j'        | j'        nd           d S )Nc              3      K   | ]o}t          |t          t          j        t	          d           t          j        t          j        t          j        j	        j
        t          t          j        f          V  pd S r\   )rz   r?   r-   r8   r  r  r{   r   logicboolalgBooleanr  EffectfulKernelr   r  s     r^   rI  z'GraphLowering.output.<locals>.<genexpr>;  sz       
 
  KJJ%JK'/&	 
 
 
 
 
 
r_   r   c                 L    g | ]!}t           j                            |          "S r]   )r-   ExternKernelrealize_inputr  s     r^   r   z(GraphLowering.output.<locals>.<listcomp>P  s(    CCCq"///22CCCr_   rE  z'Unsupported inductor graph input type: zGForce channels last inputs for %d conv for the current graph with id %drL  )(r   outputrz   r  r  r  rb  rR   r*  r  rX   r   zipr-   r?   BaseViewr  try_match_insignificant_stridesr   r   r   r   r  r{   r   r  r  r>   r;   get_nameMutationLayoutSHOULDREMOVErealize_intor  r  
ValueErrorfinalizerc  rd  r   r   )r   r   rX   rY   resultfx_node_argsresult_correct_stridesrfx_noder   r  value_storage_boxindr:  s                r^   r  zGraphLowering.output3  sx    f55&5$-00 	YF&5$-00>>$v,,>>> 
 
 
 
 
 
 
 	 	 	 	 	" w+03,66 	+(?LCCFCCC!#<  CKK////fl33 
	 
	JAwa",!<== 	&--a0000 '--887<.5577     4,2244 	 	KD%	5:.  G GFeFFG G G eY// MMOOOeY/////JEeR]33333 %JEe[11 
U^^5E5E5M5M-::45d;  ,223DEEC.2.H.ND&s++!   D 6N 			U'!]6DMMB	
 	
 	
 	
 	
s   /K55
LLc                 B    | j         D ]}|                                 d S r\   )r  decide_layout)r   r  s     r^   r'  zGraphLowering.finalize  s2    < 	  	 C	  	 r_   r   c              #   V   K   | j         }	 || _         d V  || _         d S # || _         w xY wr\   )r  )r   r   olds      r^   set_current_nodezGraphLowering.set_current_node  sF      	$ $DEEE #DD####s    	(r  meta_strides_inp.c           	      0    t           j        j                            |          sJ d |D             }t	           fdt          ||                                          D                       r|S dt          t          t          t          f                  dt          t          t          t          f                  dt          t          t          t          f                  dt          f fd} ||                                ||                                          s|S t           j        j                            |          \  }}t          |j                  }t!          |                                          D ]+\  }}	 j                            |	d          r||         ||<   ,t           j        j                            |j        |j        |j        ||j                  }
t          j        t           j        j                            ||
                    S )	a  
        Tries to match the strides of the tensor to those in the meta_strides. Strides of insignificant
        dimensions - size 0 or 1 - will be updated.

        If there are real stride differences (NHWC vs NCHW) then the input will be returned.
        c                 ^    g | ]*}t          |t          j                  r|j        j        n|+S r]   r   r   ss     r^   r   zAGraphLowering.try_match_insignificant_strides.<locals>.<listcomp>  s>     
 
 
BC:a66=AFKKA
 
 
r_   c              3   T   K   | ]"\  }}j                             ||          V  #d S r\   )r   statically_known_equals)r   s1s2r   s      r^   rI  z@GraphLowering.try_match_insignificant_strides.<locals>.<genexpr>  sM       
 
B M11"b99
 
 
 
 
 
r_   r  meta_stridestensor_stridesrZ   c                     t          | ||          D ]@\  }}}j                            |d          r"j                            ||          s dS AdS )Nr+   FT)r   r   statically_known_leqr:  )r  r=  r>  dimr;  r<  r   s         r^   significant_strides_equalzPGraphLowering.try_match_insignificant_strides.<locals>.significant_strides_equal  sp    
  #5,GG ! !R=55c1== }<<RDD ! 55! 4r_   r+   )rc   rg  r-   is_storage_and_layoutrb  r   
get_strider   r   r   r  rk   get_sizeas_storage_and_layoutr  r   r   r   r@  r9   r   r`   r   offsetr?   ReinterpretView)r   r  r4  r=  rB  storage
old_layout
new_strider   r8  
new_layouts   `          r^   r"  z-GraphLowering.try_match_insignificant_strides  s    !77?????
 
GW
 
 
  
 
 
 
lF,=,=,?,?@@
 
 
 
 
 	 M	E$),-	"5s#34	 %U49%56	 		 	 	 	 	 	 )(OO|V->->-@-@
 
 	 M#o0FFvNN*+,,
foo//00 	0 	0DAq}11!Q77 0 ,Q
1_'33O
 

 |EO.>>w
SSTTTr_   r,  old_args
old_kwargsnew_args
new_kwargsc                 p    t          |j        t          j        j                  sJ t          |          t          |          k    sJ t          |          t          |          k    sJ dt          j        j        dt          j	        dt          j	        ddf fd}|j        j
        }t          t          ||                    D ]"\  }\  }	}
|j        |         } |||	|
           #d |j        D             }|                                D ]'}||         }	||         }
||         } |||	|
           (dS )ax  Propagate mutations on new_args/new_kwargs back to old_args/old_kwargs.

        Assumes we may have cloned old_args/old_kwargs into new_args/new_kwargs
        and then called fx_node(*new_args, **new_kwargs).

        If fx_node mutates any of new_args/new_kwargs, and they are different from
        old_args/old_kwargs, then we need to update the original tensor.
        
schema_argold_argnew_argrZ   Nc                     ||u rd S | j         @| j         j        r6                    t          j        j        j        j        ||fi            d S d S d S r\   )
alias_infois_writer   rc   r@  r   copy_rA  )rR  rS  rT  r   s      r^   maybe_propagatez9GraphLowering.propagate_mutation.<locals>.maybe_propagate  sg     '!!$0Z5J5S0 ""59>#7#?'7ASUWXXXXX 1000r_   c                     i | ]
}|j         |S r]   r}  )r   args     r^   
<dictcomp>z4GraphLowering.propagate_mutation.<locals>.<dictcomp>  s    CCC33CCCr_   )rz   r   rc   r   r   r   r  Argumentr-   r  _schemar   r   	argumentsr  )r   r,  rM  rN  rO  rP  rY  schemarH  rS  rT  rR  schema_kwargskeys   `             r^   propagate_mutationz GraphLowering.propagate_mutation  st     '.%**?@@@@@8}}H----:#j//1111	Y)	Y46I	YHJ		Y	Y 	Y 	Y 	Y 	Y 	Y ''0Xx1H1H'I'I 	: 	:#C#'7)#.JOJ9999CC&2BCCC??$$ 	: 	:C oG oG&s+JOJ9999		: 	:r_   rB  c                 2   ()* dt           dd ffd}t           j                  )t           j                  *h}j        dk    }|r+                               \  }}|t          ||          z  }t          j        	                    |          5   
                              5  t          j
                  5  j        dk    rKj        t          j        ur8t                    r) |d            t!          j        d          |i |}n6j        dk    rjt#          j                  x}rT |d           |}	|}
 |g|R i |\  }}                     j        ||          }                     |	|
||           nt)          j                  r |d	           t+          j        d
         t.          j        t.          j        t.          j        f          rj        d
         j        j        }nNt;                                                    }n, |d           t;                                                    }t.          j        j         j!        j"        t.          j        j         j#        j"        t.          j        j         j$        j"        t.          j        j         j%        j"        t.          j        j         j&        j"        g(tO          d j(        D                       }tO          (fdj(        D                       }j        )                    dd          rt+          |tT                    r|+                                 j        d
         ,                                }t/          j-        j.        j/        | }ta          |d          r|1                                |k    r6|s4t          j2        |          }t          j3        4                    ||          }|rHt+          |tT                    r3t+          |j5        t          j6                  r|+                                 |s|rt+          j        d
         t.          j7                  rƉj        d
         ,                                }t          |          rtp          j9        pj:         j;        vo| }t.          j<        =                    j        d
                   }t          t}          |                    dk    }|s||rzt          |?                                          dk    rU j@        v rLj:         j;        vr>|s<t          jA        B                    |?                                t.          jC                  }|st          |          rj        d
         D                                st+          |j5        t          j6                  r5t          j3        4                    |t          j2        |          |          }n.d |D             }t          j3        E                    |||          }t          t          j(                            }|dk    rt+          |tT                    rj(        D ]Q}|j        t          v r|H                                 t.          j        j         jI        j"        t.          j        j         jJ        j"        t.          j        j         jK        j"        g}g } jL        s.|M                    t.          j        j         jN        j"                   t.          jO        jP        r|t.          j        jQ        jR        j"        t.          j        jQ        jR        jS        t.          j        j         jT        j"        t.          j        jU        jV        j"        t.          j        jU        jV        jW        t.          j        jU        jV        jS        t.          j        jU        jV        jX        gz  }|t.          j        jQ        jY        j"        t.          j        jQ        jY        jS        t.          j        jQ        jZ        jS        t.          j        jQ        j[        j"        t.          j        jU        j\        j"        t.          j        jU        j\        jS        gz  }t.          jO        j]        r|t.          j        j^        j_        j"        gz  }|j        |v rQt          j3        4                    |t          j2        j        d
         ,                                          d          }|j        |v r^|j`        d         u rOt          j3        4                    |t          j2        t          j        d
         jb                                      }|j        dk    r:t+          |j5        j5        t          t          f          r|+                                 S|e                    t          j(                             t+          |tT                    r(|f                                r|H                                 t+          |tT                    rct+          |j5        t                    rI|j5        j5        }t+          |t                    r(|h                                r|+                                 d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   t+          |tT                    rpt+          |j5        t          jg                  rPt+          |j5        j5        t          ji                  r|j5        j5        _j        nt+          |j5        j5        t          jk                  r|j5        j5        _j        t+          |j5        j5        t          jl                  r@t+          |j5        j5        j5        t          ji                  r|j5        j5        j5        _j        nt+          |j5        j5        t          jm                  r\|j5        j5        jn        sKt+          |j5        j5        jo        d         t          jk                  r|j5        j5        jo        d         _j         p                    |           t                      } j        )d          D ]}||q                                z  } j        *d          D ]}||q                                z  }dt           f)* fd}j        dk    r"t          jr        js        jt        }dt          dt           dd f fd}|D ]C} jv        w                    |g           }|jx        |         } |y                                z                    |           spdt          dt          fd}! |!| j|                  r ||| j|        k    | d| j|                     |!| j}                  r ||| j}        k    | d| j}                    |D ]~}"t}          |"j                  }#|# j~        z
  }$|$rEt          |$t                     }% jv                            |%g           M                    |"           g ||"j        |"j                    E xj~        |z  c_~        t          t          jr        js        jt        j        )                    d i                     }&t          d! |&                                D                       }'||'k    s0J d"| d|' d#                                 d$ |                         |S )%NmsgrZ   c                 d    t                               dt          j                  |            d S )Nzlowering %s %s)rc  rd  r   format_node)re  rB  s    r^   rd  z%GraphLowering.run_node.<locals>.debug  s*    II&
1=(A(A3GGGGGr_   r   rB   F)add_to_fallback_setlayout_constraintsr   rE  r   c              3   ,   K   | ]}|j         d k    V  dS )r  N)r   )r   r  s     r^   rI  z)GraphLowering.run_node.<locals>.<genexpr>7  s)      DDDDGx/DDDDDDr_   c              3   *   K   | ]}|j         v V  d S r\   )r   )r   r  as_strided_opss     r^   rI  z)GraphLowering.run_node.<locals>.<genexpr>8  s;       * *26~-* * * * * *r_   inductor_realize_to_stridesrD  r      )allow_paddingc                 ^    g | ]*}t          |t          j                  r|j        j        n|+S r]   r   r7  s     r^   r   z*GraphLowering.run_node.<locals>.<listcomp>w  sB     ' ' '$% 0:!U\/J/J QPQ' ' 'r_   r+   Tr  c                  2   g } j         d          D ]0}|                     d|                                 d| d           1j        d          D ]0}|                     d|                                 d| d           1d                    |           S )Nzunbacked_symbol_defs=z in:

z***
)r  r  get_unbacked_symbol_defsr  r   )r+  r  r   buffer_watermarkoperation_watermarkr   s      r^   format_new_defsz/GraphLowering.run_node.<locals>.format_new_defs  s    A|$4$5$56  YC,H,H,J,JYYRUYYY    o&9&:&:;  WB,G,G,I,IWWQSWWW    <<??"r_   r  r   c                     t          j        | |          }                    |d                               |           d S )NTr  )r-   AssertScalarr  r  )r   re  	assert_opr   s      r^   make_assertz+GraphLowering.run_node.<locals>.make_assert  sG    OD#66	$$Y$>>>''	22222r_   r8  c                 n    | t           t            fv rdS 	 t          |            dS # t          $ r Y dS w xY w)NFT)r*   r  	TypeError)r8  s    r^   is_convertiblez.GraphLowering.run_node.<locals>.is_convertible"  sP    & 111#(5)FFF#'4( ) ) )#(55)s   & 
44z >= z <= )rb  unbacked_bindingsc              3   j   K   | ].}t           j        j        j                            ||          V  /d S r\   )rR   rj  r   unbacked_renamingsr   r7  s     r^   rI  z)GraphLowering.run_node.<locals>.<genexpr>L  sN       3 3 %8<<QBB3 3 3 3 3 3r_   zfailed z (inductor >= fx)
fx node is: z
new operations are:

)r  r   r  r  r   fetch_args_kwargs_from_envrL   r-   r  current_originsr3  rR   r   r  r  rC   rB   rF   r   rc  r   rz   r   rc   r   SymFloatSymBoolr   r   r   run_noder@  r   
as_stridedrA  as_strided_as_strided_scatterresize	resize_asr   r  r   r?   r  r   rg  r  any_is_symbolicr   rD  get_stride_orderr  require_stride_orderr  r!  r   r,   r   r   r   _prims_commonis_non_overlapping_and_denser    rE  r(  FlexibleLayout stride_ordered_for_memory_formatchannels_last_is_viewrequire_exact_stridesr)   rG   realize_hintr   mm_int_mmr   r  r   r  _has_mkldnnr_  _linear_pointwisebinarymkldnn_rnn_layeronednnqlinear_pointwiser  binary_tensor_convolution_pointwise_convolution_pointwise_ _convolution_transpose_pointwiseqconv2d_pointwisehas_mklmkl_mkl_linearrX   r   r  r<   r=   
mark_reusehas_exceeded_max_readsr>   has_large_inner_fnLoopsorigin_nodeBufferr  MultiOutputindicesinputsr  rs  r*  r   r   r   r   popvar_to_range _default_unspecified_value_rangeissubsetrk   lowerupperr   r   
setdefaultr"   r  rg  ),r   rB  rd  originsis_call_functionrX   rY   r(  ri  rM  rN  	is_outputis_input_for_as_stridedr  sym_stridesstride_orderro  denseunbacked_symbols_in_strides	num_usersr  need_fixed_layoutneed_fixed_channels_last_layoutcurrnew_unbacked_defsr  r   rv  r   rz  i0rasvrr}  rafvsmissingi1r~  renamed_unbacked_bindingsrl  rt  ru  r:  s,   ``                                      @@@r^   r  zGraphLowering.run_node  s   	Hs 	Ht 	H 	H 	H 	H 	H 	H t|,,!$/22#4?2 	4::1==LD&~dF333GY&&w// Q	) Q	)1F1F2
 2
 Q	) Q	)
 
Q	) Q	) ''HH$4449!<< 5 ()))N)!(NNN#  ((&>qx&H&HH" ) *+++#
11!EdEEEfEEf++AHdFCC ''8ZvNNNN ** - '(((F5MEL%.%-#P  1 VE]/4FF"WW--a00FFb			))!,, 	)1	*2	19	%-	(0N DDAGDDDDDI&) * * * *:;'* * * ' '# vzz7?? XJ	E E X    &-..00#o3CWM55X((**g55' 6 $&#6w#?#?L_AA&,WWF!vy11! v{BK88!     ,4 ,*uu|; ; , &-..00w<< (*UafD<U.U%655 " "/LLu E 1'::;;a? 0 8
!
   1 122a77!@@@F$*CCC 7 D #%"3"T"T"OO--u/B# # 7 3w<<  6%=1133 z"K8 8  &(_%I%I & " 3G < <.; &J & &FF' ')0' ' 'G &(_%J%J &} &K & &F Jqw//00I1}}FI!>!>}G ;- ;-D{&;;;++--- "IN?G!IN-5!IN2:-)
 ;=7# Y-44UY^5O5WXXX 8/ Y- %	 0 B J %	 0 B I %	 ? G %	 0 B J %	 0 B I %	 0 B I %	 0 B P2 - < %	 0 G O %	 0 G N %	 0 H O %	 0 Q Y %	 0 B J %	 0 B I@ ;  %x/ Y 1eim6O6W5X X 1;*;;;%'_%I%I & " 3AF5M4H4H4J4J K K.2 &J & &F !K+JJJ !TYq\ 1 1%'_%I%I & " 3$B16%=CV$W$W!" !"& &F w(**%fk&6I8NOO -"NN,,, !!#ag,,/// &),, &1N1N1P1P & ##%%% &),, )FK1T1T ){'dI.. )..00 )(((cQ	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	) Q	)v fi(( 	CZR]-S-S 	C&+*BH55 C/0 ,,FK,bi88 C/0 ,fk.0ABB 
CzK$)28H H 
C 9:FK$)55 v{/@@C"K,4C "&+"2"9!"<biHH CAB(/2>v&&&6@ll< 0 1 12 	@ 	@C!=!=!?!??/"5"6"67 	? 	?B!<!<!>!>>
	# 
	# 
	# 
	# 
	# 
	# 
	# 
	# 
	# 4=  * (2I3$ 3S 3T 3 3 3 3 3 3
 ( ; ;(,,R44+B/ AACCLLRPP K)$ )4 ) ) ) ) &~bh// K#B"(Nr4I4Irx4I4IJJJ%~bh// K#B"(Nr4I4Irx4I4IJJJ ; ;B/88C!D$??G ; c222*55b"==DDRHHHH#BG\::::; ''+<<'' 9 *AFJJ7JB,O,O! !" )3 3 3*//113 3 3 ) )% %(AAAA>+ > >1J > > }}> >*9/*;*;> > BAA s[   k.j=gj&j=&j**j=-j*.j=1k=k	kk	kkkc                    t           j        rt          d          t          j        dvrt          dt          j                   | j                                        D ]}d }t          |t                    r|	                                }nJt          |t          j        t          j        t          j        j        j        f          rt!          |          }t#          || j                  st          d|           d S )NzC++ codegen is disabled)linuxdarwinwin32zUnsupported platform zUnsupported input dtype )r,   disable_cpp_codegenr4   sysplatformr   rl  rz   r?   r  r{   r|   r   r}   r~   r   r   rw   ra   )r   r  r`   s      r^   !validate_can_generate_cpp_wrapperz/GraphLowering.validate_can_generate_cpp_wrapperX  s   % 	D()BCCC<;;;()O)O)OPPP&--// 
	Q 
	QEE%++ =))ej%*2D2LM  = 6e<<1%CC Q,-O-O-OPPPQ
	Q 
	Qr_   c                    d| j         v | _        | j        r|                                  | j                                         }|                    d           |                    d           t          |          dk    s0J d                    d                    |                                t          |          dk    }|rdn|	                                }t          |          | _        t          || j                  }|J d| d	             |            | _        | j        r8| j        j        j        | j        _        | j        j        j        | j        _        d S d S )
Nra   rF  r   r+   zDoes not support mixing {}+r   zDevice z not supported)r  ra   r   r  r   discardr   formatr   r  r1   r  r2   r  r   _names_itersrc_to_kernel)r   r  only_cpudevice_typewrapper_code_gen_clss        r^   init_wrapper_codezGraphLowering.init_wrapper_codek  s{   d//	 	522444(--//U###V$$$<  A%%%'C'J'JHH\""(
 (
%%% |$$)'?ee\-=-=-?-?1+>>=) 
  
 $//1V;1V1V1V///0022 	 -1,=,J,VD)!.< +++		 	r_   c                    	 d j         v r(d _        t          j        ddi          5                                   j        }ddd           n# 1 swxY w Y   t          j        j        sdt          t          j
        t          j        t          j        f         dt          t          t          t          j        f         fdt          j        j                                        }||t%          t&          j        t*                    s]|j        r|j                                         d	 |j        D             }fd
t3          j        |t&          j                  D             	n>fdt%          t&          j        t*                    r j        nt&          j        D             	 j        r_ddlm} 	 fdt?           j                   D             }|D ]5}	|         }t%          |t          j                  sJ  ||          	|<   ~6t          j!        j"        #                                5   |	           ddd           n# 1 swxY w Y   	d _         j$                                          j%                                          j&                                         t&          j'        j(        j)                                         t&          j'        j(        j*                                         t          j        ddi          5   +                                cddd           S # 1 swxY w Y   dS  +                                S )ad  
        For CPU, the cpp wrapper codegen is done in one pass.
        For GPU, the cpp wrapper codegen is done in two steps: JIT-compile the model with python
        wrapper code and run it to generate autotuned kernel binaries in the first pass; and then
        generate cpp wrapper code and compile it to a dynamic library in the second pass.
        ra   Fztriton.store_cubinTNr  rZ   c                 <   | d S t          | t          j        t          j        f          r| j        j        S t          | t                    rt          |           S t          | t          j                  s'J dt          t          |                     z               | S )Nz&Unknown type when creating real inputs)rz   rc   r   r  r   hintr   r   r   r  r  )r  s    r^   materializez;GraphLowering.codegen_with_cpp_wrapper.<locals>.materialize  s     y#t#Aen'EFF 	! v{*#Az22 !%ayy()u|    S SCc$q''llRS S S  !r_   c                     g | ]}||S r\   r]   )r   params     r^   r   z:GraphLowering.codegen_with_cpp_wrapper.<locals>.<listcomp>  s(     # # #! , ,,,r_   c                 &    g | ]} |          S r]   r]   r   r  r  s     r^   r   z:GraphLowering.codegen_with_cpp_wrapper.<locals>.<listcomp>  s1     # # # $A# # #r_   c                 &    g | ]} |          S r]   r]   r  s     r^   r   z:GraphLowering.codegen_with_cpp_wrapper.<locals>.<listcomp>  s1     # # # $A# # #r_   r+   )clone_preserve_stridesc                 l    g | ]0\  }}|j         v t          |         t          j                  .|1S r]   )r  rz   rc   r   )r   rH  r   real_inputsr   s      r^   r   z:GraphLowering.codegen_with_cpp_wrapper.<locals>.<listcomp>  sN     * * *%C4#666&{3'7FF 7 666r_   ztriton.autotune_at_compile_time),r  r   r,   patchcompile_to_modulecalltritonautotune_at_compile_timer   rc   r   r  r   r  rf  _guardsTracingContexttry_getrz   rR   r  rQ   output_stridesclearparams_flat	itertoolschainr   r  
compile_fxr  r   r   r  r  r  r  r  r  r*  r   precomputed_replacementsinv_precomputed_replacementscodegen)
r   compiledtracing_contextr  r  r  rH  mutated_inpr  r  s
   `       @@r^   codegen_with_cpp_wrapperz&GraphLowering.codegen_with_cpp_wrapper  s!    T&&&$D 3T:;; 9 9113389 9 9 9 9 9 9 9 9 9 9 9 9 9 9 =9 G !U\5>5<GH!3u|34! ! ! !  #(-">"F"F"H"H".zM;8 8. '5 ?'6<<>>># #%4%@# # #K
# # # #!*am!L!L# # #KK# # # #  *!-EE/D//!"# # #K & (BBBBBB* * * * *)243D)E)E* * *&  2 ( ( '2#&6)+u|DDDDD+A+A++N+NC('K[1HHJJ * *H[)))* * * * * * * * * * * * * * *  $D &&(((#))+++#))+++G5;;===G9??AAA@%HII & &||~~& & & & & & & & & & & & & & & & & & <<>>!s5   AAA,IIIL))L-0L-c                    ddl m} |                                   || j                  | _         t          j                            | j        | j         j                   | j	        
                    |            | j                                          t                              dt          j        j                   | j	                            | j                  }| j	                                         |S )Nr+   	SchedulerzFFinished codegen for all nodes. The list of kernel names available: %s)r&  r   r  r  rR   rd  draw_orig_fx_graphr1  r   r  push_codegened_graphr  rc  r*  r9  generater   pop_codegened_graph)r   r   r(  s      r^   r  zGraphLowering.codegen  s    ((((((   "4?33	""4<1EFFF..t444   		TG,	
 	
 	

 "++D,=>>--///r_   parent_graphc                     ddl m} |j        | _        |j        | _        |j        | _         || j                  | _         | j                                          dS )a  
        This is a more compact version of the `codegen()` above
        where we codegen this graph as a subgraph of some parent
        graph. The parent graph is passed as an argument: the
        intention is to inline codegening of the subgraph in
        the parent graph's wrapper code (including the generated
        kerenls). The wrapper code is not finalized (via `.generate()`
        call), as this will be done in the parent graph's `codegen()`.
        r+   r  N)r&  r   r  r  r   r  r  )r   r  r   s      r^   codegen_subgraphzGraphLowering.codegen_subgraph  sc     	)((((((5&1'3"4?33     r_   c                     d}g }g }| j         j        D ]^}|                                }||z  }|                    ||dz  f           |                    ||                                f           _|||fS )Nr   rn  )r&  r   get_read_write_buffers_sizesr  get_estimated_runtime)r   total_bytesnode_countsnode_runtimesr   	num_bytess         r^   count_byteszGraphLowering.count_bytes  s    
 N( 	G 	GD99;;I9$Ki1n5666  $(B(B(D(D!EFFFFK66r_   codec                     d S r\   r]   r  s    r^   save_output_codezGraphLowering.save_output_code&  s	     	r_   c                     t          ddd          5  |                                 cd d d            S # 1 swxY w Y   d S )NzGraphLowering.compile_to_modulecode_genF)
phase_namefwd_only)r   _compile_to_moduler  s    r^   r  zGraphLowering.compile_to_module+  s    -*u
 
 
 	- 	- **,,	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	-s   488c                    ddl m} | j        r|                                 n|                                 \  }t
                                         t          j        d           	 d |D             }|	                              \  }t          dfdfd           n## t          $ r t          dfd	            w xY w|                    ||i | j        | j        
          }|| _        | _        || _        |j        J t'          |j                   t(                              d|j                   t          j        d|j                   t,          j        r#t1          d|j         t2          j                   t6          j                            |j                   t6          j                            t<          j                             |j                  d         dz              |S )Nr+   )PyCodeCacheOutput code: 
%sc                 &    g | ]\  }}||j         fS r]   )stack_trace)r   line_nor   s      r^   r   z4GraphLowering._compile_to_module.<locals>.<listcomp>;  s$    PPP}w!12PPPr_   inductor_output_codec                      d iS )Nfilenamer]   )paths   r^   <lambda>z2GraphLowering._compile_to_module.<locals>.<lambda>G  s    T* r_   c                       S r\   r]   r  s   r^   r#  z2GraphLowering._compile_to_module.<locals>.<lambda>H      4 r_   )
payload_fnc                       S r\   r]   r  s   r^   r#  z2GraphLowering._compile_to_module.<locals>.<lambda>A  r%  r_   )linemapattrszOutput code written to: %szCompiled module path: )filer   z.debug)!	codecacher  r   r  r  r   r  rU   rd  writer   r  load_by_key_pathr	  r
  r+  r,  r-  __file__rW   rc  r  r,   benchmark_kernelprintr  stderrrR   output_coder   osr"  splitext)r   r  r(  rb  modr  r"  s        @@r^   r  z GraphLowering._compile_to_module1  s.   ****** 04/?SD))+++T\\^^ 	g 	&&t,,,14888	PPPPPG#))$//IC &****'<<<      	 	 	&'<<<   
 	 **@T^@t'?@	 + 
 
 $ |'''%%%		.===93<HHH" 	L93<99
KKKK	CL)))	RW%%cl33A6ABBB
s   +$B( ( Cc                 t   | j         rddlm} | j        s
J d            |                                 \  }}t          j        d|           d }| j        r/|                     | j                  }t          j        d|           |	                    | ||| j
                  S |                                 j        S )Nr+   )AotCodeCompilerz"AOT mode only supports C++ wrapperr  z#Serialized Extern Kernel Nodes: 
%s)ra   )r   r+  r7  r   r  rU   rd  r  r   compilera   r  r  )r   r7  r  r(  serialized_extern_kernel_nodess        r^   compile_to_fnzGraphLowering.compile_to_fnb  s    = 	1222222#II%IIII 99;;MD'!"5t<<<-1*' 151L1L,2 2.  %:2   #**d: +    ))++00r_   c                 $    d | j         D             S )Nc                     g | ]J}t          |t          j                  t          |t          j                  6|                                KS r]   )rz   r-   NoneAsConstantBufferShapeAsConstantBufferr#  )r   r   s     r^   r   z2GraphLowering.get_output_names.<locals>.<listcomp>|  sZ     
 
 
dB$;<<
 tR%=>>	
MMOO
 
 
r_   )r   r  s    r^   get_output_nameszGraphLowering.get_output_names{  s&    
 
*
 
 
 	
r_   c                     || j                                         v oJ| j         |                                         dk    o'| j         |                                         j        dk    p|| j        v S )Nr+   rF  )r   r  r  r  r  r  r~  s     r^   is_unspec_argzGraphLowering.is_unspec_arg  sy     D%**,,, C!$'1133q8C!$'22449UB3 T22		3r_   )NNNFFNNNFFNNNNr\   )rZ   N)r  r   rZ   N)d__name__
__module____qualname__r   r-   r  __annotations__rc   r   r   r   r  r   r   r{   r   r   r   r   r$   rk   r	   r  r   ExternKernelNoder   r   rg  r   r.   r=  staticmethodr   r  r  r)   r'   r'  r  r  property_subclassesfake_tensorFakeTensorModerj  r?   r  r  r  r`   r  r  r  r  r  r  r  r
   r  r  r  r  r  r  objectr  r   r  r8   r  r@   r  r   r  r  r  r'  r   r3  r!  r   r"  rc  r  r  r  r  r  r  rI   rf  r  r  r   r  r  r:  r?  rA  __classcell__)r:  s   @r^   r   r     s8        	?"""#,#	uT#YT
*+U49d4j3H-II	J# # # #J,	tEJej!11	2    8<(,"&!:>%) "$7;$(26"%WF WFH WF !el!34WF H%	WF
 3-WF WF WF 'tCI7WF TNWF !)d2./0#56!
WF WF WF %T#s(^4WF  SM!WF" /#WF$ sm%WF& 
'WF WF WF WF WF WFrMEO.5v=>MIWM	M M M M ak aD aT a a a \aF     
H 
 U\*
 	

 

 
 
 
20D1A 0 0 0 0dA# A$ A A A A
Del Dt D D D D 5,8G    X	%bi/0	1   $Qc QeBL")4K.L Q Q Q Q
8S 
8U[ 
8 
8 
8 
88S 8U39-= 8 8 8 8& & & & & & & &R\ c     FK   bi d s    tCy S    
 ")!4bi!?@	        
3 
5< 
 
 
 
 'SM').v'	' ' ' 'T 37	
 	
	
"*3-	
		
 	
 	
 	
# 8N SV     99!&v98<S&[8I9	tY$	%9 9 9 9 9 9v-H -C -c3h -TW - - - - - -^ 5u| 5 5 5 5 \5 7 7!&r 748f4E 7	xBK@	A 7  7  7  7D# S # (    # S # (    L
L
!&vL
8<S&[8IL
	L
 L
 L
 L
 L
 L
\        $UX] $ $ $ ^$9UblBK/09U  c5<&7 8# =>9U 
r|R[(	)	9U 9U 9U 9Uv):): *): cN	):
 *): cN): 
): ): ): ):Vb%(- bF b b b b b bHQ Q Q Q&   <d"%T%T	:J5K0K*L d" d" d" d"LsDsDy)9$::;    (! ! ! !&7	T%)3./0$u=NPU=U7V2WW
7 7 7 7  s t    \-: - - - -/J / / / /b1s 1 1 1 12
$s) 
 
 
 
3# 3$ 3 3 3 3 3 3 3 3r_   r   )r4  r  loggingr  r3  r  r  r  collectionsr   
contextlibr   typesr   typingr   r   r   r	   r
   r   r   r   r   r   r   r   r{   r   rc   torch._loggingtorch.fxr   r   torch._decompr   torch._dynamo.utilsr   r   r   r   torch._prims_commonr   torch._subclasses.fake_tensorr   r   %torch.fx.experimental._backward_stater   torch.fx.experimental.sym_noder   r   %torch.fx.experimental.symbolic_shapesr    r!   r"   r#   r$   r%   torch.fx.graphr&   torch.fx.noder'   torch.utils._mode_utilsr(   torch.utils._ordered_setr)   torch.utils._sympy.numbersr*   r   r,   r-   codegen.commonr.   r/   r0   r1   r2   r3   excr4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   r
  rA   rB   rC   rD   rE   rF   rG   rH   r&  rI   r   rJ   r  rK   rL   rM   rN   rO   rP   virtualizedrQ   rR   torch._higher_order_ops.effectsrS   codegen.wrapperrT   torch._inductor.codecacherU   	getLoggerrB  rc  _logginggetArtifactLoggerr  r@  r   countr$  r  torch._inductor.fb.utilsrW   rk   rw   r`   r   r   r  r  r  r   r   r   Interpreterr   r]   r_   r^   <module>rm     s             				 				 



  # # # # # # % % % % % %                                                               , , , , , , 4 4 4 4 4 4 4 4 7 7 7 7 7 7 7 7 > > > > > > 4 4 4 4 4 4             ? ? ? ? ? ? L L L L L L L L                !                 / / / / / / / / / / / / - - - - - -                                  
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ) ( ( ( ( ( & & & & & &                ( ' ' ' ' ' ' '  0;;;;;;////// 5 5 5 5 5 5 g!!00<HHy~*9?,, 6 8888888s c d    $%, $d $t $ $ $ $25: (5;BW    $     
	!
658(+56   B/B/$,T#t)_$=B/	B/ B/ B/ B/JH3 H3 H3 H3 H3EH( H3 H3 H3 H3 H3r_   