
    קg[                    	   U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z% d dl&m'Z'm(Z( d dlm)Z) d dl*Z*d dl+Z+dd	gZ, ej-        d          d
             Z.d dl/m0Z0 d dl1m2Z2 d dl3m4Z4 d dl5m6Z6 d dl7m8Z8 d dl9m:Z: d dl;m<Z<m=Z=m>Z>m?Z?m@Z@ d dlAmBZBmCZC d dlDmEZEmFZF ddlGmHZH ddlImJZK ej        dk    ZL e
jM        eN          ZO e#d          ZPee*jQ        e*jQ        f         ZRe$e+jS        eTf         ZUdZVdZWdZXeXeXdz
  z  d k    reXdk    s
J d            d ZYdd ZZ G d! d"e*j[                  Z\ddd)Z] ej-        d          dd+            Z^dd/Z_d0 Z`d1 Zadd5Zbd d9ZJd: Zcdd>ZdddAZeddDZf	 dddGZgdH ZhdddJZi	 dd	dOZj	 d
ddSZkddWZlddZZmdd[Znd\ Zod] Zp e(d^          Zq e#d_d`a          Zr G db dce eeqerf                   ZsddfZtdg Zudh Zvdi Zw	 dddmZxdn ZyddpZzdq Z{ddvZ|dw Z}ddyZ~dd|Zdd~ZddZd Zd ZddZg Zded<   ddZd Zej        dd            ZddZ ej-        d          d             Z G d de          Z G d d          Z G d de          Zej        d             Z G d d          Z ej-        d          dd            ZddZddZddZddZddddZd Z ej-        d          d             Z ej-        d          d             Zd Zd ZddZd Z G d d          Zd Zd Zd Zd Zd Zd Zej        d             ZddZd Zd Zd Zd Zd ZddZej        d             Zd Z ej-        d          d             Z ej-        d          d             Zd Zd Zd ZddZdÄ ZddƄZdǄ Z G dȄ dej                  Zdʄ Zdd˄Zd̄ Zd̈́ Zd΄ Zdτ ZdЄ Z	 ddфZ	 dd҄ZÐddՄZĐd dׄZej         G d؄ d٦                      Zej        dڄ             ZȐd!dۄZɐdd܄Zʐdd݄Zdބ Z̐d"dZd Zΐd#dZϐd$dZd Zd ZҐd%dZӐd&dZԐd'dZՐd(dZ֐d)dZd ZdS (*      )annotationsN)datetime)StringIO)AnyCallableDictGenericIterableList
NamedTupleOptionalProtocolSequenceSetTypeVarUnion
ValuesView)Concatenate	ParamSpec)mockcudaxpuc                     d t           D             } t          |           dk    sJ t          |           dk    rdn|                                 }|S )Nc                `    g | ]+}t          t          |                                          )|,S  )getattrtorchis_available.0xs     Q/var/www/html/ai-engine/env/lib/python3.11/site-packages/torch/_inductor/utils.py
<listcomp>z get_gpu_type.<locals>.<listcomp>8   s3    KKK'%*;*;*H*H*J*JK!KKK       r   r   )	GPU_TYPESlenpop)
avail_gpusgpu_types     r"   get_gpu_typer+   6   sT    KKYKKKJz??aZA--vv:>>3C3CHOr$   )get_interface_for_device)detect_fake_mode)
DeviceType)	EventList)GraphTransformObserver)	ShapeProp)CeilDivCleanDivFloorDivIdentityModularIndexing)make_symbolSymT)bound_sympyValueRangesr%   )config)ceildivwin32_T   @      zmust be power of 2c                .    | t           z   dz
  t            z  S )z/Round up to the nearest multiple of ALIGN_BYTESr%   )ALIGN_BYTES)nbytess    r"   _alignrE   b   s    [ 1$44r$   v
sympy.Exprc                   t          | t          j        t          j        f          r't	          t          t          | j                            S t          | t                    p"t          j	        | t                    t          k    S )z:v can be statically proven to be a multiple of ALIGN_BYTES)
isinstancesympyAddMaxallmap_is_alignedargsaligngcdrC   )rF   s    r"   rO   rO   g   s]    !ei+,, -3{AF++,,,aK59Q#<#<#KKr$   c                  0    e Zd ZdZdZdZed             ZdS )rQ   z<Symbolically round up to the nearest multiple of ALIGN_BYTESr%   Tc                    t          |t          t          j        f          rt	          t          |                    S t          |          r|S d S N)rI   intrJ   IntegerrE   rO   )clsvalues     r"   evalz
align.evalt   sN    ec5=122 	&#e**%%%u 	L	 	r$   N)__name__
__module____qualname____doc__nargs
is_integerclassmethodr[   r   r$   r"   rQ   rQ   n   s=        FFEJ  [  r$   rQ      d   fnCallable[[], Any]returnfloatc                z    |              t           j                                         t          j        t	          d          t           j        d          }t           j                            d          }t           j                            d          }|                                 t          d          D ] }|                                  |              !|                                 t           j                                         |	                    |          dz  }t          dt	          ||z                      }t          dt	          ||z                      }	t          |          D ]} |              t           j                            t           j        j        j        g          5 }
t          |	          D ] }|                                  |              !t           j                                         d	d	d	           n# 1 swxY w Y   t                              d
           t                              |
                                                    dd                     t'          d |
                                D                       }t+          |          |	z  dk    rt-          dt+          |          |	          t+          |          |	z  t'          fdt/          |          D                       }|                                 |                                }t                              d           t                              |                    d                     t3          d |D                       dz  |	z  }t                              d|           |S )aR  
    Returns benchmark results by examining torch profiler events.
    This could be more accurate as it doesn't count CPU side overhead.
    However, this also requires manually excluding irrelevant event, e.g.
    vectorized_elementwise_kernel which is used to fill L2 cache,
    various CUDA events, etc, so could also be fragile.
    g    Ar   )dtypedeviceT)enable_timing   r%   )
activitiesNz
raw eventsself_device_time_total)sort_by	row_limitc                R    g | ]$}|j         t          j        k    |j        d k    "|%S )zContext Sync)device_typer.   CUDAnamer    events     r"   r#   z,do_bench_using_profiling.<locals>.<listcomp>   s>     	
 	
 	
 JO33
n8T8T 8T8T8Tr$   r   zYFailed to divide all profiling events into #repeat groups. #CUDA events: %d, #repeats: %sc                ,    g | ]\  }}|z  d k    |S r   r   )r    irx   num_event_per_groups      r"   r#   z,do_bench_using_profiling.<locals>.<listcomp>   s8     	
 	
 	
5&&!++ +++r$   zprofiling time breakdown)rr   c              3  $   K   | ]}|j         V  d S rV   )device_time_totalrw   s     r"   	<genexpr>z+do_bench_using_profiling.<locals>.<genexpr>   s%      AA%e%AAAAAAr$   g     @@zprofiling results: %s ms)r   r   synchronizeemptyrW   Eventrecordrangezero_elapsed_timemaxprofilerprofileProfilerActivityru   logdebugkey_averagestabler/   eventsr'   RuntimeError	enumerate_build_treesum)re   warmuprepcachestart_event	end_event_estimate_msn_warmupn_repeatpr{   filtered_eventsactual_eventsresr|   s                  @r"   do_bench_using_profilingr   |   s    BDDD	JKJuyHHHE *"""66K
  t 44I1XX  
	J**9559K 1c&;.//00H1c#+,,--H 8__  
			N+0
 
  
 
 ! 
x 	 	AKKMMMBDDDD
   ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! IIlIIann$$-EQS$TTUUU	
 	
	
 	
 	
 O ?h&!++-  	
 
 	
 o..9	
 	
 	
 	
%o66	
 	
 	
 M !..00MII()))IIm!!B!//000
AA=AAA
A
AF
JX
UCII(#...Js   7AHHHboolc                    	 ddl m}  t          j                            dd           | d uo(t          t          t          j        dd           d          S # t          $ r Y dS t          $ r}dt          |          v sJ Y d }~dS d }~ww xY w)	Nr   )	roi_alignztorchvision::nmsMetatorchvisionr   Fztorchvision::nms does not exist)torchvision.opsr   r   _C%_dispatch_has_kernel_for_dispatch_keyhasattrr   opsImportErrorr   str)r   es     r"   has_torchvision_roi_alignr      s    ------667I6RRR$ 
EI}d33[*
 *
 	
    uu   0CFF::::uuuuus   AA 
B	"	B	+BB	rk   "Union[Optional[torch.device], str]torch.devicec                :   | t          j        d          j        S t          | t                    rt          j        |           } | j        dvrM| j        Ft          | j                  }t          j        | j        |j        	                                          S | S )Ng        )cpumeta)index)
r   tensorrk   rI   r   typer   r,   Workercurrent_devicerk   device_interfaces     r"   decode_devicer      s    ~|C  ''&# &f%%{/))fl.B3FK@@|FK/?/F/U/U/W/WXXXXMr$   c                f    t          j        t          j        | t	          j        d                    S Nr%   )	functoolsreduceoperatormulrJ   rX   its    r"   sympy_productr      s#    HL"emA.>.>???r$   c           	         t          |           t          |          k    sJ t          j        t          d t	          | |          D                                 S )Nc              3  &   K   | ]\  }}||z  V  d S rV   r   )r    abs      r"   r   zsympy_dot.<locals>.<genexpr>   s*      >>daAE>>>>>>r$   )r'   rJ   expandr   zip)seq1seq2s     r"   	sympy_dotr      sN    t99D		!!!!<>>c$oo>>>>>???r$   r   Iterable[_T]ValuesView[_T]c                >    d | D                                              S )Nc                .    i | ]}t          |          |S r   )idr   s     r"   
<dictcomp>zunique.<locals>.<dictcomp>   s     !!!BqEE1!!!r$   )valuesr   s    r"   uniquer      s"    !!b!!!((***r$   numerUnion[int, sympy.Expr]denomc           
        t          | t          j                  st          |t          j                  r4t          t          j        |           t          j        |                    S t          | t
                    rt          |t
                    s/J |  dt          |            d| dt          |                       t          | |          S )Nz: , )rI   rJ   Exprr2   sympifyrW   r   runtime_ceildiv)r   r   s     r"   r<   r<      s     %$$ C
5%*(E(E Cu}U++U]5-A-ABBB eS!! 9js' ' 9 9
884;;88%884;;889 9 9 5%(((r$   c                d   | dS t          |                               d          d         }i dddddd	d
ddddddd	ddddddddddddddddd d!d"d#d$d%d&}t          |                                          D ]}|||<   t	          | t                     r| n
d'||          S )(Nz*i8.rp   r   i1
float8e4nvfp8e4nvfloat8e5fp8e5float8e4b15fp8e4b15float8e4b15x4
fp8e4b15x4float8_e4m3fnfloat8_e5m2float16fp16bfloat16bf16float32fp32float64fp64int8i8int16i16int32i32int64i64uint8u8uint16u16u32u64)uint32uint64*)r   splitlistr   rI   )key	dtype_strtysrF   s       r"   _type_ofr
    sb   
 {uCs##B'Ii 	G 	z	
 	 	 	w 	6 	F 	6 	6 	 	 	 	  	!" 	%#$ '  C, #**,,  AS#&&@33,@I,@,@@r$   lst"Iterable[Union[int, torch.SymInt]]List[sympy.Expr]c                    d | D             S )z
    Gets the shape and stride of a tensor. For non-symbolic tensors, this is
    trivial. But for symbolic tensors, we need to map from SymIntNode into
    sympy.Expr.
    c                6    g | ]}t          j        |          S r   )rJ   r   )r    r{   s     r"   r#   z-convert_shape_to_inductor.<locals>.<listcomp>/  s"    ***EM!***r$   r   )r  s    r"   convert_shape_to_inductorr  '  s     +*c****r$    Iterable[Union[int, sympy.Expr]]List[Union[int, torch.SymInt]]c                ,    ddl m fd| D             S )zz
    Takes a list of shapes from Inductor and converts them into symints (or just
    ints if all shapes are static).
    r%   Vc                    g | ]h}t          |t                    r|nNt          |t          j                  rt          |          n%j        j        j                            |d           iS )N)hint)rI   rW   rJ   rX   graphsizevars	shape_envcreate_symintnode)r    r{   r  s     r"   r#   z+convert_shape_to_symint.<locals>.<listcomp>;  s~        	 a	H a''HSVVVW'99!$9GG  r$   )virtualizedr  )r  r  s    @r"   convert_shape_to_symintr  2  sD            r$   optorch._ops.OpOverloadc                    t          | t          j        j                  sJ t	          d | j        j        D                       S )z-
    Does this op overload have aliasing
    c              3  (   K   | ]}|j         d uV  d S rV   )
alias_infor    r   s     r"   r   zis_view.<locals>.<genexpr>J  s)      FFAq|4'FFFFFFr$   )rI   r   _ops
OpOverloadany_schema	argumentsr  s    r"   is_viewr*  E  sA     b%*/00000FF1EFFFFFFr$   is_pointwise_fn1Optional[Callable[[torch._ops.OpOverload], bool]]c                   | j         dk    sdS t          | j        t          j        j                  s| j        t          j        u sdS | j        t          j        u st          | j                  r t          fd| j
        D                       S t          j        j        | j        j        v pduo | j                  S )z
    Do all uses of this op have torch.Tag.pointwise or return True for optional `is_pointwise_fn`

    Uses in views ops will follow the views uses
    call_functionFc              3  8   K   | ]}t          |          V  d S rV   )is_pointwise_use)r    ur+  s     r"   r   z#is_pointwise_use.<locals>.<genexpr>_  s.      KKA#A77KKKKKKr$   N)r  rI   targetr   r$  r%  r   getitemr*  rM   usersTag	pointwisetags)user+  s    `r"   r0  r0  M  s     6_$$u 	3:uz4559<xGW9W9Wu
zX%%%)<)<%KKKKKKKKKK9#*/1 t#C
(C(Cr$   c                   t           j                                        }g }g }t          |          D ]u\  }}t	          |t           j                  rA|                    |                    d|                      |                    |           `|                    |           vt          d |	                                D                       sJ |
                    | t          |          |          }t          | j        j                  dk    r+t          | j        j        d         j                  dk    r|f}|                    |           t           j                            i |          }	|	|fS )Nargc              3  L   K   | ]}t          |t          j                   V   d S rV   )rI   r   Tensorr   s     r"   r   z$gen_gm_and_inputs.<locals>.<genexpr>p  s1      HH1:a...HHHHHHr$   r%   r   r<  )r   fxGraphr   rI   r<  appendplaceholderrM   r   r.  tupler'   r'  returnsr   r   outputGraphModule)
r2  rP   kwargsgg_argsa_argsnr:  nodegms
             r"   gen_gm_and_inputsrL  f  sM   AFFD//  3c5<(( 	MM!--	a		22333MM#MM#HHHHHHHHHH??65==&99DFN"##q((&q).//8;;wHHTNNN			b!	$	$Bv:r$   r   c                    | dk    rd S t          |           }|                                r|                                 d S d S Nr   )r,   r   r   r   s     r"   r   r   }  sT    /77$$&& '$$&&&&&' 'r$   modelCallable[..., Any]timesrW   c                    t          |           t          j        d           t          j                    }t          |          D ]} | | }t          |           t          j                    }|J ||z
  S )Ni9  )r   r   manual_seedtimeperf_counterr   )rO  example_inputsrQ  rk   t0r   resultt1s           r"   timedrZ    s     	d				B5\\  'F				B7Nr$   r   
         ?c                     t          j         fdt          |          D                       }t          j        |          z  }t	          ||z  d           |S )Nc                4    g | ]}t                    S r   )rZ  )r    r   rP   rk   re   rQ  s     r"   r#   z%print_performance.<locals>.<listcomp>  s'    RRRqE"dE6::RRRr$   z.6f)r   r   r   medianprint)re   rP   rQ  repeatbaselinerk   timingstooks   ```  `  r"   print_performancere    sl     lRRRRRRRE&MMRRRSSG<  5(D	TH_
"
"###Kr$   objr   methodc                `     t          | |                      t          | |fd           dS )zKReplace obj.method() with a new method that returns a precomputed constant.c                      S rV   r   )rX  s   r"   <lambda>z#precompute_method.<locals>.<lambda>  s     r$   N)r   setattr)rf  rg  rX  s     @r"   precompute_methodrl    s8    !WS&!!##FC(((((r$   methods	List[str]c                0    |D ]}t          | |           dS )zFReplace methods with new methods that returns a precomputed constants.N)rl  )rf  rm  rg  s      r"   precompute_methodsrp    s.     ' '#v&&&&' 'r$   c                P    t          | |k              t          | |k               z
  S rV   )rW   )r   r   s     r"   cmprr    s!    q1u::AE

""r$   c                p    t          |           dk    r" t          |           | d         g          |z  S | S )Nr%   r   )r'   r   )r!   sizes     r"   pad_listlikeru    s5    
1vv{{tAww!v%%r$   c                T    t          |           dk    rg S d }t          | |          S )Nr   c                X    t          | t                    r| S |                                 S rV   )rI   r   get_name)elems    r"   	sort_funcztuple_sorted.<locals>.sort_func  s)    dC   	#K ==??"r$   r  )r'   sorted)r!   rz  s     r"   tuple_sortedr}    s:    
1vv{{	# # # !####r$   PRVT)	covariantc                  .    e Zd Zedd            Zdd	Zd
S )CachedMethodrg   Nonec                    d S rV   r   selfs    r"   clear_cachezCachedMethod.clear_cache  s    r$   rP   P.argsrE  P.kwargsr  c                    d S rV   r   r  rP   rE  s      r"   __call__zCachedMethod.__call__  s    r$   Nrg   r  )rP   r  rE  r  rg   r  )r\   r]   r^   staticmethodr  r  r   r$   r"   r  r    sF           \     r$   r  !Callable[Concatenate[Any, P], RV]CachedMethod[P, RV]c                x     d j          dt          j                    fd            }fd}||_        |S )N___cachec                x    t          |           st          |  |                      t          |           S rV   )r   rk  r   )r  re   r  s    r"   wrapperzcache_on_self.<locals>.wrapper  s?    tS!! 	)D#rr$xx(((tS!!!r$   c                L    t          |           rt          |            d S d S rV   )r   delattr)r  r  s    r"   r  z"cache_on_self.<locals>.clear_cache  s5    4 	D#	 	r$   )r\   r   wrapsr  )re   r  r  r  s   `  @r"   cache_on_selfr    sq    
"r{
"
"
"C_R" " " " " "
     &GNr$   c                    ddl m} t          | t                    r6t	          j        t          j        d | D             t                                S t          | |j	                  r| j
        S t                      S )Nr%   irc                T    g | ]%}t          |d           |j        |j        j        &S )rJ  )r   rJ  origins)r    rJ  s     r"   r#   z%aggregate_origins.<locals>.<listcomp>  sI       4(( .2Y	!  r$   ) r  rI   r  r   r   r   or_setExternKernelr  )node_scheduler  s     r"   aggregate_originsr    s    -&& L )  
 EE
 
 	
 
M2?	3	3 $$uur$   c                8   t          |           }|dk    r)d |D             }t          t          |                    }n|dk    rg }|D ]}|j        dk    rsd|j        v rj|j        d         d         }t          |d         t                    r|                    |d                    `|                    |d         j                   t          t          |                    }n|dk    rd	 |D             }nt          |}d

                    dg|z             S )Noriginal_atenc                ~    g | ]:}|j         d k    r-d|j        v r$|j        d         #|j        d         j        j        ;S )r.  r  )r  r   _overloadpacketr\   r    origins     r"   r#   z)get_fused_kernel_name.<locals>.<listcomp>  sX     
 
 
yO++6;..O,8	 K(8A 988r$   r   r.  source_fn_stackrp   r%   inductor_nodec                2    g | ]}|j         d k    |j        S r.  )r  rv   r  s     r"   r#   z)get_fused_kernel_name.<locals>.<listcomp>  s-     
 
 
"VY/5Q5QFK5Q5Q5Qr$   r   fused)r  r|  r  r  r   rI   r   r?  r\   NotImplementedErrorjoin)r  descriptive_namesall_originssourcesr  	source_fns         r"   get_fused_kernel_namer    sI   #M22KO++
 
%
 
 
 W&&	g	%	%! 	: 	:FyO++0AV[0P0P"K(9:2>	ilC00 :NN9Q<0000NN9Q<#8999W&&	o	-	-
 
&1
 
 
 "!G88WI'(((r$   c                x   t          |           }d |D             }t          j        t                    }t          j        t                    }d t	          |          r}d |D             }t	          |          dk    r^|d         j        t          d          s(i }t          j                  D ]
\  }}	|||	<   |_	        |
                    fd           |D ]}
d|
j        v rL|
j        d         ?t          |
j        d         j                  }||                             |
j                   d	|
j        v r9|
j        d	         d         d         }||                             |
j                   d
nd}|j         d| dd                    |                                           dd                    |                                           d}|j         dg}t'          |                                          D ]G\  }}|                    |j         d| dd                    t'          |                                HS|                    |j         d           |D ]3}	|                    |j         d|	                                            4|d                    |          fS )Nc                (    g | ]}|j         d k    |S r  r)  r  s     r"   r#   z'get_kernel_metadata.<locals>.<listcomp>  s$    WWW&):V:Vf:V:V:Vr$   c                    h | ]	}|j         
S r   )r  )r    rI  s     r"   	<setcomp>z&get_kernel_metadata.<locals>.<setcomp>#  s    999Q999r$   r%   r   )_inductor_kernel_metadata_node_to_idx_mapc                    j         |          S rV   )r  )rI  single_graphs    r"   rj  z%get_kernel_metadata.<locals>.<lambda>-  s    lTUVW r$   r{  r  	from_nodezTopologically SortedUnsorted z Source Nodes: [r   z], Original ATen: []z" Source node to ATen node mapping:z   z => z Graph fragment:
)r  collectionsdefaultdictr  r'   r  r   r   nodesr  sortr   r   r  r?  rv   commentr  keysr|  itemsformat_node)r  r  r  inductor_nodesfrom_node_dictoriginal_aten_dictunique_graphsnode_to_idx_mapidxrI  rJ  r  sort_strmetadatadetailed_metadataoriginal_noder  r  s                    @r"   get_kernel_metadatar    s/   #M22KWW;WWWN ,T22N$066
 L
> 99.999}"")!,2L<)TUU Y"$'(:;; - -FC),OA&&IXFWWWW       2 2di''DIo,F,Rdi0@AACs#**49555$)##)K(+A.C3&&ty111)5)A%%zH? 	C 	CX 	C 	CtyyATATAVAV7W7W 	C 	C99%7%<%<%>%>??	C 	C 	C  $OOOOP &~';';'='= > > 
 
u  PP=PPdiiu6N6NPP	
 	
 	
 	

   GO!E!E!EFFF 	O 	OA $$%M%MAMMOO%M%MNNNNTYY01111r$   initial_queueIterable[torch.fx.Node]Set[torch.fx.Node]c                    t          |           } t          |           }| r\|                                 }|j        D ]>}|r ||          r||vr*|                    |           |                     |           ?| \|S )zJReturns the set of nodes whose values depend on those within initial_queue)r  r  r(   r4  addr?  )r  skip_filterdominated_setrJ  users        r"   dominated_nodesr  O  s     ''M&&M
 +  ""J 	+ 	+D {{400 =((!!$'''$$T***  + r$   c                    dd l }ddlm fdfd|                                D             }fd| D             }t	           |j        g ||R            S )Nr   r%   r  c                    t          | j                  r | j                  S t          | j                  r | j                  S t          | j                  ot          | j                  S rV   )rI   	TensorBoxdata
StorageBoxIRNode	Pointwise)rI  r  is_unrealized_nodes    r"   r  z*gather_origins.<locals>.is_unrealized_nodeg  st    a&& 	.%%af---a'' 	.%%af---!RY''GJq",,G,GGr$   c                4    g | ]} |          |j         S r   r  )r    valr  s     r"   r#   z"gather_origins.<locals>.<listcomp>n  s-    WWWS?Q?QRU?V?VWS[WWWr$   c                4    g | ]} |          |j         S r   r  )r    r:  r  s     r"   r#   z"gather_origins.<locals>.<listcomp>o  s,    JJJ32D2DS2I2IJ3;JJJr$   )	itertoolsr  r  r   r  chain)rP   rE  r  kwarg_originsarg_originsr  r  s        @@r"   gather_originsr  b  s    H H H H H H XWWWFMMOOWWWMJJJJ$JJJKy<<m<<<===r$   exprc                H   t          | t          j                  r| j        S t          | t          j                  r-d                    t          t          | j                            S t          | t          j	                  r-d                    t          t          | j                            S t          | t          t          t          t          f          r=| j        j         dd                    t          t          | j                             dS t!          |           S )z
    Normal sympy str is very slow, this is a lot faster.  The result are
    somewhat worse, as it doesn't do as much simplification.  So don't
    use this for final codegen.
    z + z * (r   ))rI   rJ   Symbolrv   rK   r  rN   	sympy_strrP   Mulr6   r3   r4   r5   funcr\   r   )r  s    r"   r  r  s  s     $%% y$	"" 5zz#i33444$	"" 5zz#i33444$(HhGHH O)$NNtyyY	1J1J'K'KNNNNt99r$   c                    ddl m} t          j        r2t	          |j        dd           x}r|j        dk    rt          |           S t          j	                    S )Nr%   r  current_node
index_expr)
r  r  r;   compute_all_boundsr   interpreterr2  r9   r:   unknown)r   r  fx_nodes      r"   get_bounds_index_exprr    sh     	!%~tDDDW% Nl**5!!!"$$$r$   prefixr8   r  sympy.Symbolc                L    | t           j        k    sJ t          | |dd          S )9
    Used to generate an integer-nonnegative symbol.
    Tintegernonnegative)r8   SIZEr7   )r   r  s     r"   sympy_index_symbol_with_prefixr    s0     TY vsDdCCCCr$   c                6    | st           j        ot           j        S rV   )r;   debug_index_assertsassert_indirect_indexing)checks    r"   generate_assertr    s    /V/TV5TTr$   rv   c                L    | d         dk    sJ t          j        | dd          S )r  r   sTr  )rJ   r  rv   s    r"   sympy_index_symbolr    s.     7c>>>> <d====r$   replacementsDict[sympy.Expr, Any]c                    d t          j        |                               fd|                                D                       S )z
    When the passed replacement symbol v is a string, it is converted to a symbol with name v that
    have the same replaced expression integer and nonnegative properties.
    c                    t          | t          j                  sJ t          |t                    r!t          j        || j        | j                  S |S )Nr  )rI   rJ   r   r   r  ra   is_nonnegative)replacedreplacements     r"   	to_symbolzsympy_subs.<locals>.to_symbol  s\    (EJ/////k3'' 	< +$3    r$   c                0    i | ]\  }}| ||          S r   r   )r    krF   r  s      r"   r   zsympy_subs.<locals>.<dictcomp>  s)    ===1IIaOO===r$   )rJ   r   xreplacer  )r  r  r  s     @r"   
sympy_subsr    sY    	 	 	 =''====(:(:(<(<===  r$   r   c                
   t          | t          j                  pit          | t          j                  oOt	          d t          j        |                                 |                                           D                       S )Nc              3  4   K   | ]}t          |          V  d S rV   is_symbolicr   s     r"   r   zis_symbolic.<locals>.<genexpr>  s(      NN1ANNNNNNr$   )	rI   r   SymIntr<  r&  r  r  rt  stride)r   s    r"   r!  r!    sf    a&& 1el## 	ONN	!((**(M(MNNNNNr$   rP   c                 4    t          d | D                       S )Nc              3  4   K   | ]}t          |          V  d S rV   r   r#  s     r"   r   z"any_is_symbolic.<locals>.<genexpr>  s(      ,,!{1~~,,,,,,r$   r&  )rP   s    r"   any_is_symbolicr'    s    ,,t,,,,,,r$   c                   ddl m} h d}t          j                    r|                    h d           | j        j        D ]G}t          |j                  |v r|c S |j	        
                    d          x} ||          r|c S Hd S )Nr   )free_unbacked_symbols>	   aten._assert_scalaraten._local_scalar_denseaten.multinomial.defaultfbgemm.dense_to_jagged.default%fbgemm.jagged_to_padded_dense.default,aten._fused_moving_avg_obs_fq_helper.default7aten._fused_moving_avg_obs_fq_helper_functional.defaultrun_with_rng_staterun_and_save_rng_state>   aten.scatter.srcaten.scatter_add_aten.scatter.reduceaten.index_put.defaultaten.index_put_.defaultaten.scatter_reduce.twoaten.scatter_add.defaultaten.scatter_reduce_.twoaten.scatter.value_reduceaten.scatter_reduce.two_outaten._unsafe_index_put.default0aten._unsafe_masked_index_put_accumulate.defaultr  )%torch.fx.experimental.symbolic_shapesr)  r   $are_deterministic_algorithms_enabledupdater  r  r   r2  r   get)rK  r)  forbidden_setrJ  r  s        r"   %get_first_incompatible_cudagraph_noderD    s    KKKKKK  M 133 
  	
 	
 	
    t{},,KKK9=='''C49N9Ns9S9S4KKK4r$   c                $    t          |           d uS rV   )rD  )rK  s    r"   has_incompatible_cudagraph_opsrF    s    044D@@r$   rK  torch.fx.GraphModulec                    t          t          t          | j        j                                      }|j        dk    sJ |S )z$Get the output node from an FX graphrC  )nextiterreversedr  r  r  )rK  	last_nodes     r"   output_noderM     s<    T(28>223344I<8####r$   z	List[Any]_registered_cachesc                    t          | d          rt          | j                  st          |  d          t                              |            | S )zq
    Use this decorator to register any caches that should be cache_clear'd
    with fresh_inductor_cache().
    cache_clearz# does not have a cache_clear method)r   callablerP  AttributeErrorrN  r?  rf  s    r"   clear_on_fresh_inductor_cacherT  
  sY    
 3&& Jhs.G.G JHHHIIIc"""Jr$   c                 B    t           D ]} |                                  dS )z&
    Clear all registered caches.
    N)rN  rP  rS  s    r"   clear_inductor_cachesrV    s0     "   r$   c              #    K   t                       t          j        |          }	 t          j                            t          j        d|i          5  t          	                    d|           t          j
                            |d          t          j                            t          j        di          5  dV  t          | t
                    rqt          |           dk    s
J d            t          j
                                      r5t          j                  }|                     fd	|D                        ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   |rt#          j        |           n4# t&          $ r' t(          s	 t                              d
|            Y nw xY wt                       dS # t                       w xY w)z
    Contextmanager that provides a clean tmp cachedir for inductor.

    Optionally, pass a dict as 'cache_entries' to get a list of filenames and sizes
    generated with this cache instance.
    )dirTORCHINDUCTOR_CACHE_DIRzUsing inductor cache dir %stritonTRITON_CACHE_DIRNr   z!expected empty cache_entries dictc           	         i | ]D}d |v|t           j                            t           j                            |                    ES )z.lock)ospathgetsizer  )r    ftriton_cache_dirs     r"   r   z(fresh_inductor_cache.<locals>.<dictcomp>6  sP       $%#*!#3#3 !"27??27<<@PRS3T3T#U#U#3#3#3r$   z(on error, temporary cache dir kept at %s)rV  tempfilemkdtempr   patchdictr]  environr   r   r^  r  rI   r'   existslistdirrA  shutilrmtree	Exception_IS_WINDOWSwarning)cache_entriesrX  deleteinductor_cache_dirfilesra  s        @r"   fresh_inductor_cacherr    s|      !)c222 Z__J24FG
 
 	 	 II35GHHH!w||,>II.@BR-STT  mT22 
}--2224W222w~~&677  "
+; < <%,,   ).                  	 	 	 	 	 	 	 	 	 	 	 	 	 	 	$  	.M,---    	 KKBDVWWW	 	 	ss   ,F A(E*<BEE*E	E*E	E*F *E..F 1E.2F G .F=:G <F==G G 	List[int]c           	         | j         }t          t          |                     }t          t	          t          ||d                              S )NT)r  reverse)__getitem__r   r'   r  rK  r|  )seqgettera_rs      r"   argsortrz  J  s?    _F
C//C>>>??@@@r$   c                R    t          j        d|                                           S )Nr   rj   )r   r   element_sizer|  s    r"   get_dtype_sizer~  Q  s#    ;r'''44666r$   c                      e Zd ZU ded<   dS )LineContextr   contextN)r\   r]   r^   __annotations__r   r$   r"   r  r  V  s         LLLLLr$   r  c                      e Zd ZdZddZddZddZdd	Zd
 Zd Z	d Z
d Zd Zd ZddZddZddZd dZd!dZd Zd ZdS )"IndentedBuffer   r   c                "    g | _         || _        d S rV   )_lines_indent)r  initial_indents     r"   __init__zIndentedBuffer.__init__]  s    %r$   rg   )tuple[str, list[tuple[int, LineContext]]]c                   t                      }d}g }| j        D ]}t          |t                    r |            }|$n2t          |t                    r|                    ||j        f           Wt          |t                    sJ |                    |           |                    d           |d|	                    d          z   z  }|
                                |fS )Nr%   r  )r   r  rI   DeferredLineBaser  r?  r  r   writecountgetvalue)r  bufr   linemaplines        r"   getvaluewithlinemapz"IndentedBuffer.getvaluewithlinemapa  s    jjK 	& 	&D$ 011 tvv<  D+.. 4<0111dC(((((IIdOOOIIdOOOTZZ%%%%AA||~~w&&r$   r   c                4    |                                  \  }}|S rV   )r  )r  rF   r   s      r"   r  zIndentedBuffer.getvalues  s    ''))1r$   c                   t                      }| j        D ]}t          |t                    r |            }|$nt          |t                    r;t          |t
                    sJ |                    d          r|                    |d d                    |                    |           |                    d           |                                S )N\rp   r  )	r   r  rI   r  r  r   endswithr  r  )r  r  r  s      r"   getrawvaluezIndentedBuffer.getrawvaluew  s    jjK 	  	 D$ 011 tvv<  D+.. dC(((((}}T""  		$ss)$$$$		$		$||~~r$   c                8    | j                                          d S rV   )r  clearr  s    r"   r  zIndentedBuffer.clear  s    r$   c                *    t          | j                  S rV   )r   r  r  s    r"   __bool__zIndentedBuffer.__bool__  s    DK   r$   c                &    d| j         | j        z  z  S )Nr  )r  tabwidthr  s    r"   r   zIndentedBuffer.prefix  s    dlT]233r$   c                0    |                      d           d S )Nr  	writeliner  s    r"   newlinezIndentedBuffer.newline  s    tr$   c                   t          |t                    r| j                            |           d S t          |t                    rA| j                            |                    |                                                      d S |                                r2| j                            |                                  |            d S | j                            d           d S Nr  )rI   r  r  r?  r  with_prefixr   stripr  r  s     r"   r  zIndentedBuffer.writeline  s    dK(( 	#Kt$$$$$.// 	#Kt//>>?????ZZ\\ 	#K$++--77788888Kr"""""r$   c                :    |D ]}|                      |           d S rV   r  )r  linesr  s      r"   
writelineszIndentedBuffer.writelines  s0     	! 	!DNN4    	! 	!r$   r%   c                J     t           j         fd            } |            S )Nc               3     K   xj          z  c_         	 d V  xj          z  c_         d S # xj          z  c_         w xY wrV   r  )offsetr  s   r"   ctxz"IndentedBuffer.indent.<locals>.ctx  sQ      LLF"LL'&&s   + =)
contextlibcontextmanager)r  r  r  s   `` r"   indentzIndentedBuffer.indent  s=    		"	' 	' 	' 	' 	' 
#	"	' suur$   c                &    | xj         |z  c_         d S rV   r  r  r  s     r"   	do_indentzIndentedBuffer.do_indent      r$   c                &    | xj         |z  c_         d S rV   r  r  s     r"   do_unindentzIndentedBuffer.do_unindent  r  r$   Fc           	        t          |t                    rt          d          }|j        D ]X}t          |t                    sA|r?t          |t          |          t          |                                          z
            }Yt          j	        |          rd}|j        D ]b}t          |t                    r| j        
                    |           2t                              | |t          |          d                     cd S t          j        |          }|r|                                }|sd S |                                }|                    d          D ]}|                     |           d S )Ninfr   r  )rI   r  rh   r  r  minr'   lstripmathisinfr?  r  rW   textwrapdedentrstripr  )r  
other_coder  r  r  s        r"   splicezIndentedBuffer.splice  s   j.11 	%5\\F") I I!$44 I I TS5G5G)GHHFz&!! ") H HdK00 HK&&t,,,,",,T4F3FGGGG	H H "44J 1'..00
 #**,,J"((.. % %t$$$$% %r$   r  Callable[[Any], Any]c                b    t          | j                  }fd| j        D             |_        |S )Nr  c                &    g | ]} |          S r   r   )r    r  r  s     r"   r#   z&IndentedBuffer.map.<locals>.<listcomp>  s!    999Tdd4jj999r$   )r  r  r  )r  r  r   s    ` r"   rN   zIndentedBuffer.map  s7    DL9999999T[999

r$   c                P    t          |            d|                                  dS )Nr  r  )r   r  r  s    r"   __repr__zIndentedBuffer.__repr__  s'    t**11t}}1111r$   c                    | j         |j         k    sJ t          | j                   }|                    | j                   |                    |j                   |S )Nr  )r  r  r  r  )r  otherr   s      r"   __add__zIndentedBuffer.__add__  sV    |u},,,,DL999t{###u|$$$
r$   Nrz   )rg   r  )rg   r   rT   F)r  r  rg   r  )r\   r]   r^   r  r  r  r  r  r  r  r   r  r  r  r  r  r  r  rN   r  r  r   r$   r"   r  r  Z  sE       H& & & &' ' ' '$      $  ! ! !4 4 4  # # #! ! !	 	 	 	      % % % %.   
2 2 2    r$   r  c                  &     e Zd Zd fdZd Z xZS )FakeIndentedBufferrg   r  c                H    t                                                       d S rV   )superr  )r  	__class__s    r"   r  zFakeIndentedBuffer.__init__  s    r$   c                j    |dk    rt                               | |          S t          d| d          )Nr  zTried to call self.z on FakeIndentedBuffer. This bufferis currently used on TritonTemplateKernel to prevent actualwrites to the body without explicitly specifying the body with`TritonTemplateKernel.set_subgraph_body(name)`)object__getattribute__r   )r  rv   s     r"   r  z#FakeIndentedBuffer.__getattribute__  sK    ;**4666=$ = = =
 
 	
r$   r  )r\   r]   r^   r  r  __classcell__)r  s   @r"   r  r    sL             
 
 
 
 
 
 
r$   r  c              #  ~   K   	 d V  | t           _        |t           _        d S # | t           _        |t           _        w xY wrV   )sysstdoutstderr)initial_stdoutinitial_stderrs     r"   restore_stdout_stderrr    sC      $#
#


 $
#
####s   " <c                  F    e Zd ZdZd ZddZddZd	 Zd
 Zd Z	d Z
d ZdS )r  z.A line that can be 'unwritten' at a later timec                @    |                                 sd}|| _        d S r  )r  r  r  s     r"   r  zDeferredLineBase.__init__  s"    zz|| 	D			r$   rg   Optional[str]c                    t           )zJReturns either self.line or None to indicate the line has been 'unwritten'r  r  s    r"   r  zDeferredLineBase.__call__      !!r$   r  r   c                    t           )z3Returns a new deferred line with the same conditionr  r  s     r"   	_new_linezDeferredLineBase._new_line  r  r$   c                >    |                      | | j                   S rV   r  r  )r  r   s     r"   r  zDeferredLineBase.with_prefix  s!    ~~444555r$   c                Z    |                      | j                                                  S rV   )r  r  r  r  s    r"   r  zDeferredLineBase.lstrip  s"    ~~di..00111r$   c                B    |                      | j        |                   S rV   r  )r  r   s     r"   rv  zDeferredLineBase.__getitem__	  s    ~~di.///r$   c                *    t          | j                  S rV   )r   r  r  s    r"   r  zDeferredLineBase.__bool__  s    DIr$   c                *    t          | j                  S rV   )r'   r  r  s    r"   __len__zDeferredLineBase.__len__  s    49~~r$   N)rg   r  )r  r   rg   r  )r\   r]   r^   r_   r  r  r  r  r  rv  r  r  r   r$   r"   r  r    s        88  
" " " "" " " "6 6 62 2 20 0 0      r$   r  c                    d}t           j                            |           j        }||k     r!t                              d||d           dS dS )ND   z,Not enough SMs to use max_autotune_gemm mode)min_sms	avail_sms)extraFT)r   r   get_device_propertiesmulti_processor_countr   rm  )r   r  r  s      r"   
is_big_gpur    s_    G
0077MI7:%I>> 	 	
 	
 	
 u4r$   c                 2    t           j        pt           j        S rV   )r;   max_autotunemax_autotune_gemmr   r$   r"   use_max_autotuner     s    :&"::r$   allowed_layout_dtypesList[torch.dtype]c                    t                      o3| j        j        dk    o#| j        |v ot	          | j        j        pd          S )Nr   r   )r  rk   r   rj   r  r   )layoutr  s     r"   _use_template_for_cudar  $  sN     	1M&(	1L11	1 v}*/a00	r$   backendc                    |                                  d t          j                                                             d          D             v S )Nc                6    g | ]}|                                 S r   r  r   s     r"   r#   z)_use_autotune_backend.<locals>.<listcomp>.  -       		  r$   ,)upperr;   max_autotune_gemm_backendsr  r  s    r"   _use_autotune_backendr  -  P    ==??  !<BBDDJJ3OO    r$   c                    |                                  d t          j                                                             d          D             v S )Nc                6    g | ]}|                                 S r   r  r   s     r"   r#   z._use_conv_autotune_backend.<locals>.<listcomp>4  r  r$   r  )r  r;   max_autotune_conv_backendsr  r  s    r"   _use_conv_autotune_backendr  3  r	  r$   F)enable_int32enable_float8c               |   ddl m}m} t          j        t          j        t          j        g}|r.t          j        t          j        t          j        t          j        g}|r+|                    t          j	        t          j
        g           t          | |          o$t          d          o || j        |j                  S )Nr%   )BackendFeaturehas_backend_featureTRITON)codegen.commonr  r  r   r   r   r   r   extendr   r   r  r  rk   TRITON_TEMPLATES)r  r  r  r  r  layout_dtypess         r"   use_triton_templater  9  s    CCCCCCCC]ENEMBM Tu{S Ge153DEFFFv}55 	P!(++	P~/NOOr$   c                   ddl m} |j        j                            ||z  |z  d          }|dk    s|t
          j        j        k     rdS ddlm	} t          j        j        rdS t          j        t          j        t          j        t          j        g}t#          | |          ot%          d          }|r& |            st&                              d	           dS |S )
Nr%   r  rp   fallbackr   F)try_import_cutlassCUTLASSzFailed to import CUTLASS lib. Please check whether _inductor.config.cuda.cutlass_dir is set correctly. Skipping CUTLASS backend for now.)r  r  r  r  	size_hintr;   r   cutlass_backend_min_gemm_sizecodegen.cuda.cutlass_utilsr  r   versionhipr   r   r   r   r  r  r   rm  )	r  mrI  r  r  	gemm_sizer  r  r   s	            r"   use_cutlass_templater%  H  s     **1q519r*BBIA~~V[%NNNu>>>>>> } u]ENEM5;OM
 
7
7 <Q= =C  !!## 	KK4  
 5Jr$   c                J    t           j                            |           j        S rV   )r   r   r  gcnArchNamerk   s    r"   _rocm_native_device_arch_namer)  d  s    :++F33??r$   c                     	 dd l } ddlm}m} ddlm} t          j                            | j	                  }n%# t          $ r d }d } G d d          }d }Y nw xY w||||fS )Nr   )gen_ops_librarygen_ops_preselected)CKGemmOperationc                     g S rV   r   r   r$   r"   r+  z*try_import_ck_lib.<locals>.gen_ops_libraryx      Ir$   c                     g S rV   r   r   r$   r"   r,  z.try_import_ck_lib.<locals>.gen_ops_preselected{  r/  r$   c                      e Zd ZdS )*try_import_ck_lib.<locals>.CKGemmOperationN)r\   r]   r^   r   r$   r"   r-  r2  ~  s        Dr$   r-  )ck4inductor(ck4inductor.universal_gemm.gen_instancesr+  r,  ck4inductor.universal_gemm.opr-  r]  r^  dirname__file__r   )r3  r+  r,  r-  package_dirnames        r"   try_import_ck_libr9  i  s    	
 	
 	
 	
 	
 	
 	
 	
	
 	
 	
 	
 	
 	
 '//+*>??   	 	 		 	 		 	 	 	 	 	 	 	  O-@/QQs   69 AAc                b  
 t                      sdS t          d          sdS t          j        j        sdS | j        j        dk    sdS t          | j                  }d t          j	        j
        D             p|                    d          d         |i

fd
                                t          j	        j        z  D             }|sdS | j        t          j        t          j        fvrdS dd	lm} |j        j                            ||z  |z  d
          }|dk    rdS t-                      \  }}	}	}	|st.                              d           dS t          j	        j        st.                              d           dS |t          j	        j        k    rt.                              d           dS dS )NFCKr   c                F    i | ]}|                     d           d         |S ):r   )r  )r    r  s     r"   r   z#use_ck_template.<locals>.<dictcomp>  s(    DDDaqwws||ADDDr$   r=  r   c                     g | ]
}|         S r   r   )r    r  requested_archss     r"   r#   z#use_ck_template.<locals>.<listcomp>  s.     ! ! ! 	! ! !r$   r%   r  rp   r  z,Please pip install Composable Kernel packagez,Please set TORCHINDUCTOR_CK_DIR env variablezInvalid path to CK libraryT)r  r  r   r!  r"  rk   r   r)  r;   rocmarchr  r  ck_supported_archrj   r   r   r  r  r  r  r  r9  r   rm  ck_dir)r  r#  rI  r  native_archrequested_supported_archsr  r$  ck_package_dirnamer   r?  s             @r"   use_ck_templaterG    s    u && u= u=''u 0>>KDD6;3CDDD #q!;IO! ! ! ! %%''&+*GG! ! ! % u|EM5>:::u  **1q519r*BBIA~~u #4"5"51a BCCCu; BCCCuV[///0111u4r$   c                >    t                      o| j        j        dk    S rN  )r  rk   r   )r  s    r"   _use_template_for_cpurI    s    =&-"4"==r$   c                <   ddl m} ddlm} ddlm} ddlm} t          |           rt          d          sdS t          j        j        sdS |                                t          j        k    }t          j        t          j        t          j        t          j        g}	 ||||r| j        nd |          \  }
}}} }}t)          ||f          rdS t+          ||j                  r|                                } ||                                          \  }} |d	|
|||                                |                                |t1                      
          }d }| j        |	v o7|d uo3 ||          o(t+          ||j                  o|                                S )Nr%   r  )create_micro_gemm)*get_gemm_template_output_and_compute_dtype)mm_argsCPPF)	out_dtypemat2_transposed
micro_gemm)input_dtypeinput2_dtypeoutput_dtypenum_threadsc                f    |                                   |                                 d         dk    S )Nrp   r%   )freeze_layout
get_strider!   s    r"   is_last_dim_stride1z9use_cpp_packed_gemm_template.<locals>.is_last_dim_stride1  s*    	||~~b!Q&&r$   )r  r  codegen.cpp_micro_gemmrK  codegen.cpp_utilsrL  kernel.mm_commonrM  rI  r  r;   cppweight_prepack	get_dtyper   r   r   r   halfrj   has_free_symbolsrI   BaseViewunwrap_viewparallel_num_threadsr  is_module_buffer)r  mat1mat2rP  r  rK  rL  rM  	int8_gemmr  r#  rI  r  rT  r   rQ  rZ  s                    r"   use_cpp_packed_gemm_templaterj    s   999999MMMMMM)))))) (( 0Ee0L0L u:$ u  EK/I]ENEJLM")'"+5&,,'	# # #Aq!VT4 A u$$$ "!!@@AQAQRROL!""			NN$$^^%%!(**	 	 	J' ' '
 	% 	$d"	$%%	$ tR]++	$ !!##r$   c                 >    t                       pt          d          S )NATEN)r  r  r   r$   r"   use_aten_gemm_kernelsrm    s    !!!B%:6%B%BBr$   c                  N    e Zd ZU  ej        d          Zded<   d
dZd Zd Z	d	S )DebugDirManagerr   r   prev_debug_namerg   r  c                B    t          t          j                  | _        d S rV   )rI  ro  counterr   r  s    r"   r  zDebugDirManager.__init__  s    .//r$   c                    t           j        j        j        | _        | j         d| j         | _        | j        t           j        j        _        d S )N_tmp_)r   _dynamor;   debug_dir_rootrp  r   new_namer  s    r"   	__enter__zDebugDirManager.__enter__  sA    $}3B/??dg??.2m+++r$   c                n    t          j        | j                   | j        t          j        j        _        d S rV   )ri  rj  rw  rp  r   ru  r;   rv  )r  rP   s     r"   __exit__zDebugDirManager.__exit__  s*    dm$$$.2.B+++r$   Nr  )
r\   r]   r^   r  r  rr  r  r  rx  rz  r   r$   r"   ro  ro    sj         ioa  G0 0 0 0< < <
C C C C Cr$   ro  c                    ddl m} g dfd}t          j                            |d|          5  t
          j                                          | |i |}d d d            n# 1 swxY w Y   |fS )Nr%   GraphLoweringcoder   c                2                         |            d S rV   r?  r~  source_codess    r"   save_output_codez*run_and_get_code.<locals>.save_output_code      D!!!!!r$   r  r~  r   r  r}  r   rd  r  r   ru  reset)re   rP   rE  r}  r  rX  r  s         @r"   run_and_get_coder    s    $$$$$$ L" " " " " " 
		=*<>N	O	O % %T$V$$% % % % % % % % % % % % % % % <s   'A$$A(+A(c                ,      fd}t          |          S )Nc                 h                 } |                                                                   | S rV   )r   backward)rX  re   s    r"   run_with_backwardz1run_fw_bw_and_get_code.<locals>.run_with_backward  s-    

r$   )r  )re   r  s   ` r"   run_fw_bw_and_get_coder    s-        
 -...r$   c                b   ddl m} g dfddfd}t          j                            |d	|          5  t          j                            |d
          5  t
          j                                          | |i |}ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   S )zLGet the inductor-generated code, but skip any actual compilation or running.r%   r|  r~  r   c                2                         |            d S rV   r  r  s    r"   r  z"get_code.<locals>.save_output_code%  r  r$   r  r}  c                     G d d          }| j         r|                                 n|                                 \  }} |            |            S )Nc                       e Zd ZdZddZd ZdS )@get_code.<locals>.patched_compile_to_module.<locals>.DummyModulez4This is empty to replace the generated triton modulerg   r  c                    d S rV   r   r  s    r"   r  zIget_code.<locals>.patched_compile_to_module.<locals>.DummyModule.__init__,  s    r$   c                    d S rV   r   r  s      r"   callzEget_code.<locals>.patched_compile_to_module.<locals>.DummyModule.call/  s    r$   Nr  )r\   r]   r^   r_   r  r  r   r$   r"   DummyModuler  )  s=        FF       r$   r  )cpp_wrappercodegen_with_cpp_wrappercodegen)r  r  r~  r   r  s       r"   patched_compile_to_modulez+get_code.<locals>.patched_compile_to_module(  sy    	 	 	 	 	 	 	 	 04/?SD))+++T\\^^ 	a
 	{}}r$   compile_to_moduler  Nr  )r  r}  r  )re   rP   rE  r}  r  r   r  r  s         @@r"   get_coder    sr   $$$$$$ L" " " " " "     ( 
		*,E
 
    z(:<LMM    	B                                                            s5   "B$'BB$B	B$B	B$$B(+B(c                    t          | g|R i |}dt          |          cxk    rdk    sn J dt          |                       |d         S Nr%      z%expected one or two code outputs got r   )r  r'   )re   rP   rE  r  s       r"   get_triton_coder  F  ss    B000000L 	
S####!#####Bs</@/@BB 	$##?r$   c                    t          | g|R i |\  }}dt          |          cxk    rdk    sn J dt          |                       |d         S r  )r  r'   )re   rP   rE  r   r  s        r"   run_and_get_triton_coder  O  sw    &r;D;;;F;;OA| 	
S####!#####Bs</@/@BB 	$##?r$   c                    ddl m} ddlm |j        g fd}t
          j                            |d|          5   | |i |}d d d            n# 1 swxY w Y   |fS )Nr   )CompiledFxGraphr|  c                 v     | i | | d         }t          |          sJ                     |           d S )Nr  )rI   r?  )rP   rE  r  r}  graph_lowerings	real_inits      r"   	fake_initz-run_and_get_graph_lowering.<locals>.fake_init_  sQ    	4"6"""Q%/////u%%%%%r$   r  )torch._inductor.codecacher  torch._inductor.graphr}  r  r   rd  r  )	re   rP   rE  r  r  rX  r}  r  r  s	         @@@r"   run_and_get_graph_loweringr  X  s    999999333333(IO& & & & & & & 
		?J		B	B % %T$V$$% % % % % % % % % % % % % % % ?""s   	AAAc              #     K   ddl m} |j        |          }	 t          j        ||          |j        | <   dV  ||j        | <   dS # ||j        | <   w xY w)z
    Override the lowering of aten_op with override_fn.
    The first argument of override_fn is the original lowering fn.
    r   )loweringN)torch._inductorr  	loweringsr   partial)aten_opoverride_fnr  orig_fns       r"   override_loweringr  k  s{       )((((( )G.&/&7W&M&M7#&-7###g7#----s   !A Ac                |     ddl m} |j         fd}t          j        j                            |d|          S )zr
    Add hook functions to be called at the beginning and end of Scheduler.__init__.
    Used for unit tests.
    r   )	Schedulerc                T     | |            | |          }r | |           |S rV   r   )	schedulerr  outr  post_fnpre_fns      r"   r  z(add_scheduler_init_hook.<locals>.wrapper  sE    y%   gi'' 	&GIu%%%
r$   r  )torch._inductor.schedulerr  r  unittestr   rd  r  )r  r  r  r  r  s   ``  @r"   add_scheduler_init_hookr  {  sc    
 433333 G       =%%iWEEEr$   c                    t           j        rt                              |            dS t                              |            dS )z
    Warnings that will be actionable for PyTorch developers, but not
    end users.  Allows us to easily disable them in stable releases but
    keep them on for nightly builds.
    N)r;   developer_warningsr   rm  info)msgs    r"   developer_warningr    s:       Cr$   c                    	 t           j                            d          } | dz   t          t           j                  k     rZt          t           j        | dz                      dk    r4t           j        | dz            d         dk    rt           j        | dz            S n# t          $ r Y nw xY wt           j        D ]0}|                    d          r|t          d          d         c S 1dS )a  
    An experimental API used only when config.benchmark_kernel is true.

    The benchmark name is only available at codegen time. So we can not
    directly call it in benchmark_all_kernels which is run after codegen.

    The function assumes the argument after --only is the benchmark name.
    It works for torchbench.py/hugginface.py/timm_models.py. But for ad-hoc
    scripts, this function may return None.

    There are 2 flavors of --only argument we need handle:
    1. --only model_name
    2. --only=model_name
    z--onlyr%   r   -z--only=N)r  argvr   r'   
ValueError
startswith)r  r:  s     r"   get_benchmark_namer    s    	hnnX&&!Gc#(mm##CHS1W%&&**q!!$++8C!G$$    x ) )>>)$$ 	)s9~~''((((	)) )s   BB 
B)(B)c                4    t          d | D                       S )Nc              3  "   K   | ]
}|d k    V  dS r%   Nr   r   s     r"   r   zis_ones.<locals>.<genexpr>  &      %%!qAv%%%%%%r$   rM   r  s    r"   is_onesr        %%u%%%%%%r$   c                4    t          d | D                       S )Nc              3  "   K   | ]
}|d k    V  dS )r   Nr   r   s     r"   r   zis_zeros.<locals>.<genexpr>  r  r$   r  r  s    r"   is_zerosr    r  r$   c                4    t          d | D                       S )Nc              3     K   | ];}t          |t          j                  |j        t          j        d           k    V  <dS )r   N)rI   r   r<  rk   )r    items     r"   r   z is_cpu_device.<locals>.<genexpr>  sY        dEL))u|E***     r$   r  )inputss    r"   is_cpu_devicer    s0           r$   r  torch.dtypec                    t          | t          j                  s
J d            | j        rt          j        S t          j        S )Nz8only support sympy.Expr as input to get_sympy_Expr_dtype)rI   rJ   r   ra   r   r   r   )r  s    r"   get_sympy_Expr_dtyper    sS    UZ  B BAB B B ~ {}r$   c              /     K   | r5t          j        j        |i |5 }|V  d d d            d S # 1 swxY w Y   d S d V  d S rV   )r   r   r   )should_profilerP   rE  r   s       r"   maybe_profiler    s       ^#T4V44 	GGG	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   -11c                 Z    t           j        j        } | dk     rt          j                    } | S r   )r;   r^  threadsr   get_num_threads)r  s    r"   re  re    s(    j G{{'))Nr$   c                   ddl m}m} | t          j        t          j        t          j        fv sJ t          j        |          j	        
                    d          r}ddlm}  |            }| t          j        t          j        fv r || |          S t          j        j        j        j        r |t          j        |          S  |t          j        |          S | t          j        t          j        fv r ||           S t          j        j        j        j        r |t          j                  S  |t          j                  S )Nr   )get_max_simd_tflopsget_max_tensorcore_tflops
clock_rate)max_clock_rate)triton.testingr  r  r   r   r   r   inspect	signature
parametersrB  torch._utils_internalr  backendsr   matmul
allow_tf32)rj   r  r  r  sm_clocks        r"   get_device_tflopsr    s?   MMMMMMMMU]ENEMBBBBB,--8<<\JJ 6888888!>##U]EN333,,UH===>%0 	@,,U]HEEE&&u}h???U]EN333,,U333>%0 	6,,U];;;&&u}555r$   c                 "    ddl m}   |             S )Nr   get_dram_gbps)r  r  r  s    r"   get_gpu_dram_gbpsr    s     ,,,,,,=??r$   c                 t    ddl m}  | j        j                            d                              dd          S )Nr   drivermax_shared_mem)triton.runtimer  activeutilsr  rB  r  s    r"   get_gpu_shared_memoryr    s>    %%%%%%=44Q77;;<LaPPPr$   c                ,    |                      d          S )Nwelford)r  reduction_types    r"   is_welford_reductionr    s    $$Y///r$   c                (    t          |           rdndS )N   r%   )r  r   s    r"   reduction_num_outputsr    s    $^44;11!;r$   c                 0    t          j                    dk    S )NLinux)platformsystemr   r$   r"   is_linuxr
    s    ?''r$   c                 "    t           j        dk    S )Nr=   )r  r  r   r$   r"   
is_windowsr    s    <7""r$   itrIterable[Any]c                4    t          d | D                       S )Nc              3  Z   K   | ]&}t          |t          j                  o|j         V  'd S rV   )rI   rJ   r   	is_numberr   s     r"   r   z#has_free_symbols.<locals>.<genexpr>  s7      JJz!UZ((<_JJJJJJr$   r&  )r  s    r"   rb  rb    s    JJcJJJJJJr$   c                    ddl m} | D ]<}t          ||j                  ret	          |j                                                  s;t          |j        d          r)t	          |j                                                  r dS }t          ||j	        |j
        |j        f          rit          |d          rt          |d          sJ t	          |                                          s!t	          |                                          r dS t          ||j                  st          dt          |                     dS )Nr%   r  rX  Tget_sizezunexpected type for is_dynamic F)r  r  rI   r  rb  r  r  r   rX  r  rc  ComputedBufferr  	TypeErrorr   )rP   r  ts      r"   
is_dynamicr     sN    I Ia&& 	I 1 122 --2B16CTCTCVCV2W2W ttBM2;8IJKK 	I1j))Fga.F.FFFF

-- 1A!,,..1Q1Q ttAry)) 	IGd1ggGGHHH5r$   c                      e Zd ZdZdZdS )PlaceholderKERNEL_NAMEDESCRIPTIVE_NAMEN)r\   r]   r^   r  r  r   r$   r"   r  r  6  s          K *r$   r  c                   ddl m} t          j        ddd          5 }t	          j                    }t	          j                    } t          |t          |                    j        |  t          d|j
         |	           t          |j
        |	           t          j                    }t          ||t          j        j                  5   | |j
                   d d d            n# 1 swxY w Y   t          j                    |z
  }	 ||j
                   |j
                                         |                                 t          d
|j
         |	           t          |j
        |	           |                                |                                k    }
t(                              d||j        |
|	           d d d            d S # 1 swxY w Y   d S )Nr%   )stable_topological_sortwzutf-8F)modeencodingro  )rK  	fake_modezBefore:
)filezAfter:
zZ%s, save before/after graph to %s, graph before/after are the same = %s, time elapsed = %s)pattern_matcherr  rb  NamedTemporaryFileior   r1   r-   	propagater`  r  r   nowr0   r;   tracelog_url_for_graph_xformlint	recompiler  r   r  rv   )r  rK  inpr  r  r`  	before_ioafter_io
start_timetime_elapsedr  s              r"   pass_execution_and_saver1  @  sI   888888		$
 
 
 
 
KMM	;==C	R#3C#8#8999CSII$"($$1----bhY''''\^^
#BV\-QRR 	 	DNNN	 	 	 	 	 	 	 	 	 	 	 	 	 	 	|~~
2)))

###!,,,,bhX&&&&  H$5$5$7$77hF	
 	
 	
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
s7   B/GC*G*C.	.G1C.	2CGGGc                X    ddl m} t          |           |j        k    o|d u p| j        |u S Nr%   r  )r  r  r   _CollectiveKernelop_overloadrJ  r  r  s      r"   is_collectiver7  b  s?    ::--X2:3WAQUWAWXr$   c                >    ddl m} t          |           |j        k    S r3  )r  r  r   _WaitKernel)rJ  r  s     r"   is_waitr:  h  s'    ::''r$   c                    ddl m}m} t          | |          sJ t          | |          rt	          d | j        D                       S t          | j                  S )Nr   BaseSchedulerNodeGroupedSchedulerNodec              3  4   K   | ]}t          |          V  d S rV   )contains_collectiver   s     r"   r   z&contains_collective.<locals>.<genexpr>s  s+      @@a&q))@@@@@@r$   )r  r=  r>  rI   r&  snodesr7  rJ  snoder=  r>  s      r"   r@  r@  n  ss    QQQQQQQQe./////%-.. )@@5<@@@@@@UZ(((r$   c                    ddl m}m} t          | |          sJ t          | |          rt	          d | j        D                       S t          | j                  S )Nr   r<  c              3  4   K   | ]}t          |          V  d S rV   )contains_waitr   s     r"   r   z contains_wait.<locals>.<genexpr>}  s*      ::=##::::::r$   )r  r=  r>  rI   r&  rA  r:  rJ  rB  s      r"   rF  rF  x  ss    QQQQQQQQe./////%-.. #::U\::::::uz"""r$   c                    ddl m} t          |t          j        j                  r|h}t          | |j                  o| j        |v S r3  )r  r  rI   r   r$  r%  FallbackKernelr5  r6  s      r"   is_fallback_oprI    sR    "ej+,, TdB-..I43Cr3IIr$   c                L    |||          j                                                  S rV   )defining_oprx  )buf_namename_to_bufname_to_fused_nodes      r"   buf_name_to_fused_snoderO    s!    k(3?HHJJKKr$   c                    |r ||           rd S |                     |            | j        D ]1}t          |j        ||          }||v rt	          |||||           2d S )Ncriteria_cb)r  unmet_dependenciesrO  rv   find_recursive_deps_of_node)rC  collected_node_setrM  rN  rR  depdefining_op_for_deps          r"   rT  rT    s      {{5)) 5!!!' 
 
5Hk#5
 
 "444##	
 	
 	
 	
 	

 
r$   c           	        |r ||           rd S |                     |            |                                 D ]}|j        D ]}}|j        J |j                                        dk    r)|j                                        |vrE||j                                                 }||v rit          |||||           ~d S )NOUTPUTrQ  )r  get_outputsr4  rJ  rx  find_recursive_users_of_node)rC  rU  rM  rN  rR  or  user_ops           r"   r[  r[    s
     {{5)) 5!!!    G 	 	D9(((y!!##x//y!!##+===(););)=)=>G,,,(""'    	 r$   dynamo_gm_num_inputsaot_fw_gm_num_inputsc                    t           j        j        j        rdnd}t           j        j        j        r%t           j        j                                        sdS || z
  |z
  S )zaComputes the number of inputs to the aot fw graph which have fixed addresses (params and buffers)r  r   )r   
_functorchr;   functionalize_rng_opsru  inline_inbuilt_nn_modulesr  is_parameter_freezing)r^  r_  num_rng_seed_offset_inputss      r"   num_fw_fixed_argumentsrf    sb     $:A  	6#99;; q"669SSSr$   fx_gc                
   d }d}g }| j         j        D ]2}|j        dk    r% ||          r|                    |           |dz  }3|t	          t          t          |                              k    sJ t          |          S )z>
    Infers which inputs are static for a backwards graph
    c                8    d| j         vod| j         vod| j         vS )Ntangentsbwd_seedbwd_base_offsetr  rY  s    r"   is_saved_tensorz'count_tangents.<locals>.is_saved_tensor  s/    af$ 0!&(0!/	
r$   r   r@  r%   )r  r  r  r?  r  r   r'   )rg  rm  	arg_countstatic_arg_idxsrI  s        r"   count_tangentsrp    s    

 
 
 IOZ  4=  q!! 2&&y111NId5_)=)=#>#>??????r$   c                  6    e Zd ZU ded<   d Zed             ZdS )	BoxedBoolr   rZ   c                    | j         S rV   )rZ   r  s    r"   r  zBoxedBool.__bool__  s
    zr$   c                B    t          | t                    r	d| _        | S dS NF)rI   rr  rZ   rS  s    r"   disablezBoxedBool.disable  s%    c9%% 	CIJur$   N)r\   r]   r^   r  r  r  rv  r   r$   r"   rr  rr    sI         KKK     \  r$   rr  c              #      K   ddl m} |j         fd}t          j        j                            |d|          5  d V  d d d            d S # 1 swxY w Y   d S )Nr%   )WrapperCodeGenc                L                         |            | |||g|R i |S rV   r  )r  rv   kernel_coder  rP   rE  kernel_listorig_define_kernels         r"   new_define_kernelz2collect_defined_kernels.<locals>.new_define_kernel  s?    ;'''!!'4hXXXXQWXXXr$   define_kernel)codegen.wrapperrx  r~  r  r   rd  r  )r{  rx  r}  r|  s   `  @r"   collect_defined_kernelsr    s      //////'5Y Y Y Y Y Y
 
		#	#NOEV	W	W                   s   AAAc                    | dz   S )N__original__r   r  s    r"    get_cloned_parameter_buffer_namer    s    .  r$   c                L    t          | t                    s| 
J |             | dv S )N)r   r   )rI   r   r(  s    r"   is_gpur    s+    fc""<fnnfnnn_$$r$   c                N    t          | t                    sJ t          |           S rV   )rI   r   r  r(  s    r"   device_need_guardr    s$    fc"""""&>>r$   c                L    | t           j        t           j        t           j        hv S rV   )r   r   r   r   r|  s    r"   ,needs_fallback_due_to_atomic_add_limitationsr    s    U[%*en===r$   r5  c                *   | j         t          j        j        j        t          j        j        j        fv r|dS | j         t          j        j        j        k    rdnd}|d |hvp|ot          |          ot          |          p| j         t          j        j        j        k    oA|dk    o;|o9|dk    o3t          j
        j        o"t          j
        j        pt                      dk    p2||k    o|t          j        t          j        hv pt          j                    S )NFr  r   r   r%   )overloadpacketr   r   atenscatter_reduce_scatter_reducescatter_r  r  r;   r^  fallback_scatter_reduce_sumdynamic_threadsre  r   r   r@  )r5  r  
self_dtype	src_dtypesrc_device_typesrc_is_tensor	reduce_tys          r"   use_scatter_fallbackr    s=    	"IN*EIN,IJ	K 	K"u +uy~/FFFE 
 	tY// 	8 H''H<YGG		8 &%).*HH L%'LL  5(L 
6	L
 +J/C/E/E/J	8 i'SJ5:u{:S,S	8 577!r$   c                   ddl m}m} ddlm} t          dt          |            d           t          |           D ]!\  }}t          d|dd           ||u rt          d	           .||u rt          d
           Bt          ||          r|	                                }t          |rdnd d           |r*|j
        J t          d|j
        j        j                    t          d           |j        j        D ]}t          |           t          d           |j        j        D ]}t          |           t!          dt#          |                     dS )z
    An API that can be used in pdb to dump a node_schedule.
    Right mainly dump the read/write dependencies but can add more as needed.
    r   )DisableReductionEnableReduction)SchedulerNodezNode schedule with z nodesr  3r=  zenable reductionzdisable reductionredpwz scheduler nodeNzoriginal reduction hint zReadDep:z	WriteDep:zUnrecognized node type: )torch._inductor.codegen.simdr  r  r  r  r`  r'   r   rI   is_reductionrJ  r  reduction_hintread_writesreadswritesr   r   )r  r  r  r  r  rJ  is_redrV  s           r"   dump_node_scheduler  =  s   
 ONNNNNNN777777	
:M 2 2
:
:
:;;;}-- H H	Tl#llll?""$%%%%%%%%&&&&m,, 	H&&((Ff.UU$???@@@ Ry,,,P1NPPQQQ*'-  c



+'.  c



 F$t**FFGGG'H Hr$   r   torch.Tensorc                    ddl m}  ||                                 t          | j                  z  t
          z  dk              S )Nr   )statically_known_true)r?  r  storage_offsetr~  rj   GPU_ALIGN_BYTES)r   r  s     r"   tensor_is_alignedr  \  sU     LKKKKK  				 	 >&,#?#?	??RVWW  r$   example_inputc                n    t          | j        j                  sdS t          j        pt          |           S ru  )r  rk   r   r;   assume_aligned_inputsr  )r  s    r"   should_assume_input_alignedr  j  s6     -&+,, u'K+<]+K+KKr$   c                     t           j        j                                        } | st	          j                    S | j        j        }|st	          j                    S |                                S rV   )	r   _guardsTracingContexttry_getr  nullcontextr!  r  suppress_guards)tracing_contextr  s     r"   #maybe_get_suppress_shape_guards_ctxr  s  sg    
 m2::<<O (%'''  )3I (%'''$$&&&r$   c                   t           j        j                            t          dd          5  t
          j                                         dd l}dd l	} |j
                    } |j        |          }ddlm} |                    |           |j        }|                    |j                    | |i |}	|                                }
|                    |           |                    |           d d d            n# 1 swxY w Y   |	|
fS )Nr   Tr   )output_code_log)r  r   rd  r  r;   r   ru  r  r%  loggingr   StreamHandlerr  r  
addHandlerlevelsetLevelDEBUGr  removeHandler)re   rP   rE  r%  r  log_capture_stringchr  
prev_levelrX  r  s              r"   run_and_get_cpp_coder    sZ    
		#	#FGT	:	: * *			(R[]]"W"#566======""2&&&$*
  ///T$V$$''))  ,,,%%b)))* * * * * * * * * * * * * * *  19s   CD  DDr  List[torch.Tensor]c                    d }t          |           }||j        S | D ]*}t          |t          j                  r|j        j        c S +d S rV   )r-   r  rI   r   r"  rJ  )r  r  r!  inputs       r"   shape_env_from_inputsr    sg    I ((I ""  ( (eU\** 	(:''''	( 4r$    Callable[[List[InputType]], Any]inputs_to_checkSequence[int]c                B     t                    dk    r S d fd}|S )Nr   
new_inputsList[InputType]c                :    t          |             |           S rV   )copy_misaligned_inputs)r  r  rO  s    r"   runz)align_inputs_from_check_idxs.<locals>.run  s#    z?;;;uZ   r$   )r  r  )r'   )rO  r  r  s   `` r"   align_inputs_from_check_idxsr    sH     ?q  ! ! ! ! ! ! ! Jr$   r!   c                d   t          d t          |                                 |                                           D                       dz   }t	          j        | |fd                                          }t	          j        ||                                 |                                           S )Nc              3  ,   K   | ]\  }}|d z
  |z  V  dS r  r   )r    shaper#  s      r"   r   z)clone_preserve_strides.<locals>.<genexpr>  s/      PP]UFUQY& PPPPPPr$   r%   rT   )r   r   rt  r#  r   
as_stridedclone)r!   needed_sizebuffers      r"   clone_preserve_stridesr    s    PPc!&&((AHHJJ6O6OPPPPPSTT  a+66<<>>FFAFFHHahhjj999r$   r  r  check_inputs_idxsr  c                    |D ]T}| |         }t          |t          j                  sJ |                                t          z  rt          |          | |<   Ud S rV   )rI   r   r<  data_ptr	ALIGNMENTr  )r  r  r{   _inps       r"   r  r    se      9 9!}$-----==??Y& 	92488JqM	9 9r$   static_input_idxsc                   g }|D ]Y}| |         }t          |t          j                  r5|                                t          z  dk    r|                    |           Zt          |          t          |          k    r|S |S )z[
    We require all inputs to be aligned, so introduce a copy for any
    that aren't.
    r   )rI   r   r<  r  r  r?  r'   )r  r  aligned_static_input_idxsr  r  s        r"   remove_unaligned_input_idxsr    s     !#  2 2seU\** 	20@0@90LQR/R/R%,,S111
$%%->)?)???((r$   c                n   t           j        j                                        }||j        t          |j                  dk    sJ t          |           |j        D ]V}||j                            d            |j                            t          fd|D                                  Sd S d S d S )Nr   c              3  d   K   | ]*}                     |          nt          |          V  +d S rV   )evaluate_symexprrW   )r    r   r  s     r"   r   z5set_tracing_context_output_strides.<locals>.<genexpr>  s[           )4 &66q999!$Q	     r$   )	r   r  r  r  output_stridesr'   r  r?  rA  )rV  compiled_graphr  exprsr  s       @r"   "set_tracing_context_output_stridesr    s    m*2244Gw5A7)**a////).99	#2 	 	E}&--d3333&--     "'    	 	 	 	 AA	 	r$   )rF   rG   )rc   rd   )re   rf   rg   rh   )rg   r   )rk   r   rg   r   )r   r   rg   r   )r   r   r   r   rg   r   )r  r  rg   r  )r  r  rg   r  )r  r  rV   )r+  r,  )r   )rk   r   )r%   r   )rO  rP  rQ  rW   rk   r   rg   rh   )r   r[  r[  r\  r   )rf  r   rg  r   )rf  r   rm  rn  )rg   rW   )re   r  rg   r  )r  r  rg   r  )r  rG   rg   r   )r   r8   r  rW   rg   r  )rv   r   rg   r  )r  rG   r  r  rg   rG   )r   r   rg   r   )rP   r   rg   r   )rK  rG  )rf  r   )NNT)rg   rs  )r  r  rg   r   )r  r   rg   r   r  )r  rG   rg   r  )r  r  )r^  rW   r_  rW   )rg  rG  )rv   r   )r5  r  )r   r  )r  r  )r  r  )rO  r  r  r  rg   r  )r!   r  )r  r  r  r  rg   r  )r  r  r  r  )
__future__r   r  r  dataclassesenumr   r  r%  r  r  r  r   r]  r  ri  r  rb  r  rT  r  r   r   typingr   r   r   r	   r
   r   r   r   r   r   r   r   r   r   typing_extensionsr   r   r   rJ   r   r&   	lru_cacher+   torch._dynamo.device_interfacer,   torch._dynamo.utilsr-   torch.autogradr.   torch.autograd.profiler_utilr/   (torch.fx.passes.graph_transform_observerr0   torch.fx.passes.shape_propr1   torch.utils._sympy.functionsr2   r3   r4   r5   r6   torch.utils._sympy.symbolr7   r8   torch.utils._sympy.value_rangesr9   r:   r  r;   runtime.runtime_utilsr<   r   rl  	getLoggerr\   r   r>   r   	VarRangesr<  rW   	InputTyper  r  rC   rE   rO   FunctionrQ   r   r   r   r   r   r   r
  r  r  r*  r0  rL  r   rZ  re  rl  rp  rr  ru  r}  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r!  r'  rD  rF  rM  rN  r  rT  rV  r  rr  rz  r~  r  r  r  r  r  r  r  r  r  r  r  r%  r)  r9  rG  rI  rj  rm  ro  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  re  r  r  r  r  r  r
  r  rb  r  Enumr  r1  r7  r:  r@  rF  rI  rO  rT  r[  rf  rp  	dataclassrr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r$   r"   <module>r     s   " " " " " " "                   				        				   



                                                 5 4 4 4 4 4 4 4         UO	
 T   D C C C C C 0 0 0 0 0 0 % % % % % % 2 2 2 2 2 2 K K K K K K 0 0 0 0 0 0              8 7 7 7 7 7 7 7 D D D D D D D D       = = = = = = lg%g!!WT]]UZ'(	%,#$	 	{Q'A--+2B2B2BDX2B2B2B5 5 5
L L L L    EN   O O O O Od T      @ @ @@ @ @
+ + + +) ) ) ) A  A  AF+ + + +   &G G G G OS    2  .' ' ' ' ' NT      CI    ) ) ) )' ' ' '# # # #  $ $ $ IcNNWTT"""    8WQU^      "  &) ) )B52 52 52r 9=    &> > >"   $% % %	D 	D 	D 	DU U U	> 	> 	> 	>   .   - - - -( ( (VA A A    !#  " " " "	 	 	 	   (  (  (  ( VA A A A Q7 7 7    *   ~ ~ ~ ~ ~ ~ ~ ~B
 
 
 
 
 
 
 
 $ $ $       @ T	 	 	 	; ; ; ;          16U       8 T@ @ @ TR R R63 3 3l> > >1 1 1 1hC C CC C C C C C C C"     / / /$ $ $N    # # #& . . .F F F F&	 	 	) ) )>& & && & &           T6 6 66 T  Q Q Q0 0 0< < <( ( ( (# # #K K K K  ,* * * * *$) * * *
 
 
DY Y Y Y( ( () ) )# # #J J JL L L
 MQ
 
 
 
, MQ   2T T T T"       0           ! ! ! !% % % %
   
> > >
$ $ $ $NH H H>   L L L L' ' '"  .   *   : : : :9 9 9 9   $    r$   