
    קgì                       U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dlm
Z
 d dlmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ d dlmZ d dlmZ d dlZd dlm Z m!Z!m"Z" d dl#m$c m%c m&Z' d dl(Z)d dl*Z)d dl+m,c m-Z. d d	l/m0Z0 d d
l1m2Z2 d dl3m4Z4 d dl5m6Z6 d dl7m8Z8 d dl9m:Z:m;Z;m<Z<m=Z=m>Z> d dl?m@Z@ d dlAmBZBmCZCmDZDmEZEmFZFmGZGmHZH d dlImJZJ d dlKmLZLmMZMmNZN d dlOmPZP ddlQmRZRmSZS ddlTmUZUmVZV ddlSmWZWmXZXmYZYmZZZ ddl[m\Z\ ddl]m^Z^m_Z_ ddl`maZa ddlbmcZc ddl,mdZdmeZemfZfmgZgmhZhmiZimjZjmkZkmlZlmmZmmnZnmoZompZpmqZq ddlrmsZsmtZtmuZu erddlvmwZw  ed          Zx ed           Zy ed!          Zzee{e f         Z|d"e}d#<    ej~        e          Z ej
        ej        d$%          Ze)js        j        Z	 ee{d&eed&f         d'd(eeee{eed&f         d&d'd(f                           f         Zd"e}d)<   dd.Zd d2Zd!d6Zd!d7Zd"d=Zg d>Zg d?Zd#dAZd$dDZed%d&dJ            Zed%d'dL            Z	 d%d(dOZd)dSZd*dVZd+dWZd+dXZ G dY d(          Zej         G dZ d[                      Zej         G d\ d]e                      Zd,dcZ G dd dee          Zej         G df dge                      Z edh           edi           edj           edk           edl           edm          dnZdoe}dp<   	 d%d-dtZd.dyZej         G dz d{e                      Z G d| d}e          Zej         G d~ de                      Zej         G d de                      Zej         G d de                      Zd/dZd/dZ	 	 	 	 	 d0d1dZ ej
        edE          Zd2dZej         G d de                      Zej         G d de                      Zej         G d de                      Z G d de          Zej         G d de                      Zej         G d de                      Zej         G d de                      Zej         G d de                      Z G d de          Z G d de          Zej         G d de                      Zej         G d de                      Zd3dZd4dZej         G d de                      Z G d de          Z G d de          Z G d de          Z G d de          Z G d de          Zej         G d de                      Zej         G d dee                      Z G d de          Z G d de          Z G d de          Z G dĄ de          Zej         G dƄ de                      Z G dȄ de          Z G dʄ de¦          Zee{eeeeee{eeef                  f         Z G d̄ dͦ          Z G d΄ deǦ          Z G dЄ deæ          Z G d҄ de¦          Z G dԄ de¦          Zej         G dք de                      Z G d؄ de̦          Z G dڄ deͦ          Zej         G d܄ de̦                      Zej         G dބ deϦ                      Z G d deЦ          Z G d deϦ          Z G d de          Z G d deϦ          Z G d deϦ          Z G d deϦ          Z G d deϦ          Z G d deצ          Z G d deҦ          Z G d deϦ          Z G d deϦ          Z G d deЦ          Z G d deϦ          Z G d deϦ          Zej         G d d                      Z eJej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        g          Z G d deҦ          Zej         G d  de                      Zej         G d de                      Z G d deϦ          Zej         G d de                      Z G d d&e          Z G d	 d
e          Zej         G d de                      Zd5dZej         G d deϦ                      Zej         G d deϦ                      Z G d de          Zej         G d de                      Z G d de          Z  G d de           Zd6dZdS (7      )annotationsN)nullcontext)partial)AnyCallableClassVarContextManagerDictIterableListLiteralOptionaloverloadSequenceTupleTYPE_CHECKINGTypeVarUnion)	TypeAlias)patch)ExprIntegerSymbol)get_interface_for_device)identity)GraphModuleSerializer)can_auto_functionalize)metrics)compute_required_storage_lengthis_boolean_dtypeis_float_dtypemake_channels_last_strides_for
StrideType)get_schema_info)CallMethodKeycompute_unbacked_bindingsDivideByKeyfree_unbacked_symbolsrebind_unbackedresolve_unbacked_bindingsSymTypes
OrderedSet)CleanDivFloorDivModularIndexing)SymT   )configdependencies)BackendFeatureindex_prevent_reordering)extract_free_unbacked_symbols#extract_input_node_reduction_rangesextract_read_writesvar_builder)LoopBody)OpCounterCSEOpCountResult)benchmarker)ReductionHint)argsortcache_on_selfceildivconvert_shape_to_inductorconvert_shape_to_symintdeveloper_warningget_kernel_metadata
is_dynamicis_gpu	sympy_dotsympy_index_symbolsympy_index_symbol_with_prefixsympy_product
sympy_subs)opsOpsValueV)GraphLowering_T_U_Vr   _IntLikez  prefix	TensorBoxr   IRNode_NodeOrNodesnode_or_nodesOptional[_NodeOrNodes]returnNonec                *    dfd |            d S )Nnodesr\   r]   r^   c                   | d S t          | t          t          f          r| D ]} |           d S t          | t                    r$|                                 D ]} |           d S t          | t
          j        j        j        t          t          t          t          j        j        j        t           t"          t$          f          sJ dt'          |            d            d S )NzFound zE, which is not a supported top level IR node. See [Note: Inductor IR])
isinstancelisttupledictvaluestorch	_inductorir
ExpandViewDynamicScalarAssertScalarrX   sympylogicboolalgBooleanr   intEffectfulKerneltype)r`   node_check_tensorboxs     N/var/www/html/ai-engine/env/lib/python3.11/site-packages/torch/_inductor/ir.pyru   z%validate_ir.<locals>._check_tensorbox   s    =De}-- 	k ' '  &&&&' 't$$ 	k ' '  &&&&' ' O&1! K'/#	  k k kUjjjk k k k k    )r`   r\   r]   r^    )r[   ru   s    @rv   validate_irry      s@    k k k k k k6 ]#####rw   namestrCallable[..., OpsValue]c                B     t           t                    sJ d fd}|S )Nargsobjectkwargsr]   rO   c                 :     t          t                    | i |S N)getattrrN   )r~   r   rz   s     rv   fnzops_wrapper.<locals>.fn   s"    !wsD!!426222rw   )r~   r   r   r   r]   rO   )rb   r{   )rz   r   s   ` rv   ops_wrapperr      s>    dC     3 3 3 3 3 3 Irw   orderSequence[int]&Callable[[Sequence[_T]], Sequence[_T]]c           
         t          t          | t          t          |                                         dfd}|S )NindexSequence[_T]r]   c                     t                     t                    k    sJ  fdt          t                               D             S )Nc                ,    g | ]}|                  S rx   rx   ).0ir   	inv_orders     rv   
<listcomp>z4inverse_reorder.<locals>.reindex.<locals>.<listcomp>   s"    ???il#???rw   lenrange)r   r   s   `rv   reindexz inverse_reorder.<locals>.reindex   sL    5zzS^^++++?????U3u::->->????rw   r   r   r]   r   )re   zipr   r   )r   r   r   s     @rv   inverse_reorderr      sS    Sc%jj 1 12233I@ @ @ @ @ @ Nrw   c                     d fd}|S )Nr   r   r]   c                     t                     t                    k    sJ  fdt          t                               D             S )Nc                ,    g | ]}|                  S rx   rx   )r   r   r   r   s     rv   r   z1same_reorder.<locals>.reindex.<locals>.<listcomp>   s!    ;;;AeAh;;;rw   r   )r   r   s   `rv   r   zsame_reorder.<locals>.reindex   sL    5zzSZZ'''';;;;;s5zz):):;;;;rw   r   rx   )r   r   s   ` rv   same_reorderr      s(    < < < < < < Nrw   reindex1&Callable[[Sequence[_U]], Sequence[_V]]reindex2&Callable[[Sequence[_T]], Sequence[_U]]&Callable[[Sequence[_T]], Sequence[_V]]c                     d fd}|S )Nr   r   r]   Sequence[_V]c                ,      |                     S r   rx   )r   r   r   s    rv   r   z fuse_reindexing.<locals>.reindex   s    x(((rw   )r   r   r]   r   rx   )r   r   r   s   `` rv   fuse_reindexingr      s.    ) ) ) ) ) ) ) Nrw   )   r      r2   )   r   r   r   r2   Sequence[Union[int, Integer]]c                    d t          |           D             fdt          t          |                     D             }|S )z
    Convert stride order to fill order
    For channel last format,

    stride order = [3, 0, 2, 1] and fill order = [1, 3, 2, 0]
    c                    i | ]\  }}||	S rx   rx   r   idxposs      rv   
<dictcomp>z+stride_order2fill_order.<locals>.<dictcomp>   s    88883c3888rw   c                     g | ]
}|         S rx   rx   r   r   lookups     rv   r   z+stride_order2fill_order.<locals>.<listcomp>   s    777&)777rw   )	enumerater   r   )r   
fill_orderr   s     @rv   stride_order2fill_orderr      sM     98y'7'7888F7777U3u::%6%6777Jrw   seq(Sequence[Union[int, torch.SymInt, Expr]]c                    t          |           }d t          t          |                     D             }t          |          D ]
\  }}|||<   |S )z)
    Convert strides to stride order
    c                    g | ]}d S r   rx   r   _s     rv   r   z$get_stride_order.<locals>.<listcomp>   s    
&
&
&1
&
&
&rw   )r@   r   r   r   )r   
sorted_idxoutr   elems        rv   get_stride_orderr      sY     $CLLJ
&
&eCHHoo
&
&
&CZ((  4D		Jrw   TxLiteral[None]guard_shapeboolc                    d S r   rx   r   r   s     rv   ir_node_to_tensorr         Crw   torch.Tensorc                    d S r   rx   r   s     rv   r   r   
  r   rw   Optional[IRNode]Optional[torch.Tensor]c                   | d S |st           j        j        j        nt          fd|                                 D             }t          |           r&fd|                                 j        D             }nt          
                    |          }|                                 }|                                 }t          |          }t          |          }t           j        j        j                                        5  t!          j        ||||                                          }d d d            n# 1 swxY w Y   |S )Nc                &    g | ]} |          S rx   rx   r   sshape_fns     rv   r   z%ir_node_to_tensor.<locals>.<listcomp>  s!    ...AHHQKK...rw   c                &    g | ]} |          S rx   rx   r   s     rv   r   z%ir_node_to_tensor.<locals>.<listcomp>  s!    ===!((1++===rw   )sizestridedtypedevice)rP   graphsizevars	size_hintr   get_sizeis_storage_and_layout
get_layoutr   FlexibleLayoutcontiguous_strides	get_dtype
get_devicerD   	shape_envsuppress_guardsrg   empty_stridedzero_)r   r   r   r   r   r   tr   s          @rv   r   r     sm    	yt  7#-.......DQ 9====q||~~'<===22488KKMME\\^^F"4((D$V,,F	
		#	3	3	5	5  fE&
 
 

%'' 	
               Hs   +D::D>D>valueOptional[Sequence[_T]] Optional[Sequence[Optional[_T]]]c                :    t          | t                    r| sd gS | S r   )rb   rc   r   s    rv   may_convert_to_optionalr   +  s+     % u  vLrw   r   Optional[str]c                    t          | dd           x}rt           |                      S t          | t          j                  r| j        S d S )Nr   )r   get_device_typerb   rg   r   rs   )r   r   s     rv   r   r   5  sO    Qd333z -zz||,,,!U\"" v4rw   c                \    t          |           }t          |ot          |                    S r   )r   r   rH   )r   r   s     rv   	is_tritonr   =  s(    AE'&--(((rw   c                (    t          |           dk    S Ncpu)r   r   s    rv   is_cpur   B  s    1&&rw   c                  N   e Zd ZU  e            Zded<   eej        d/d                        Z	d Z
d0d	Zd
 Zd Zd1dZd2dZd Zd Zd Zed             Zd Zd Zd Zd3dZded<   ded<   ded<   ded<   d ed!<   ded"<   ded#<   d$ed%<   d&ed'<   d&ed(<   d)ed*<   d+ed,<   d-ed.<   dS )4rY   zClassVar[OrderedSet[Any]]_current_originsoriginsOrderedSet[torch.fx.Node]c              #     K   t           j        }|| z  t           _        	 d V  |t           _        d S # |t           _        w xY wr   )rY   r   )r   olds     rv   current_originszIRNode.current_originsI  sL       %"%-	*EEE&)F###cF#))))s   1 ?c                    t          | j                  | _        t          j        rt          j                    nd | _        d S r   )r-   r   r   r3   debug_ir_traceback	tracebackformat_stackselfs    rv   __post_init__zIRNode.__post_init__S  s6    !$"7885;5NX/111TXrw   r]   OrderedSet[str]c                @    t          dt          |                      )NzNYI on NotImplementedErrorrs   r  s    rv   get_read_nameszIRNode.get_read_namesW  s    !"8DJJ"8"8999rw   c                    | j         S r   )r   r  s    rv   get_tracebackzIRNode.get_tracebackZ  s
    ~rw   c                    t           r   r  r  s    rv   get_defining_opzIRNode.get_defining_op]      !!rw   Tc                t    dt          | dd           }|r t          |          dk    r|d d          d}|gS )Nzorigins=r    @   =   z...)r   r   )r  shortenr   s      rv   common_reprzIRNode.common_repr`  sQ    ;WT9b99;; 	+s7||b(( "***Gyrw   c                &   ||                      |          z   }t          t          t          |                    }|r<t	          d                    |                    }t          |           j         d| dS t          |           j         d| dS )Nz,
z(
z
)())r  rc   mapr{   indentjoinrs   __name__)r  linesr  	multiline	new_liness        rv   
str_helperzIRNode.str_helperg  s    ((111Se__%% 	5uzz%0011I4jj)<<i<<<<4jj)44E4444rw   c                    | j         S r   r   r  s    rv   r   zIRNode.get_dtypep  s
    zrw   c                B    t          dt          |            d          )Nz#get_layout() is not implemented by !r  r  s    rv   r   zIRNode.get_layouts  s"    !"UT

"U"U"UVVVrw   c                B    t          dt          |            d          )Nz!get_size() is not implemented by r#  r  r  s    rv   r   zIRNode.get_sizev  s"    !"Sd4jj"S"S"STTTrw   c                *    |                                  S r   r   r  s    rv   shapezIRNode.shapey  s    }}rw   c                D    t          |                                           S r   )rL   r   r  s    rv   	get_numelzIRNode.get_numel}  s    T]]__---rw   c                    t           j        j                            t	          j        |                                 d                    S Nr   rP   r   r   is_expr_static_and_truerm   Eqr)  r  s    rv   is_zero_elementszIRNode.is_zero_elements  2    w77AQAQST8U8UVVVrw   c                @    t          dt          |                      )a)  
        If the IRNode refers to data which has not been materialized (e.g.,
        it is a Pointwise/Reduction that could potentially have more
        compute fused into it), realize the IRNode into physical memory,
        ending the possibility of fusing into it, but allowing, e.g., multiple
        users to access the data without having to recompute.

        Check StorageBox.realize for a particularly notable implementation.

        TODO(ezyang): I think, in principle, every IRNode should have an
        implementation of this, and most of the time no-op is OK, but you
        really do have to audit each IRNode for this, so for now, raise
        an error if it's not implemented.  Note that some code in graph.py
        will catch this thrown error and suppress it with a warning.
        zrealize NYI on r  r  s    rv   realizezIRNode.realize  s!      ""@DJJ"@"@AAArw   Nc                @    t          dt          |                      )Nzcodegen_reference NYI on r  r  writers     rv   codegen_referencezIRNode.codegen_reference  s    !"Jd4jj"J"JKKKrw   zCallable[[], torch.device]r   torch.dtyper   zCallable[[], str]get_namezCallable[[], Any]	get_readszCallable[[], int]	num_reads
get_strideget_storage_numelzCallable[[], bool]has_exceeded_max_readsz"Callable[[], Callable[[Any], Any]]make_loadermake_indexerzCallable[[int], None]
mark_reusezCallable[[], None]realize_hintz&Callable[[], OrderedSet[sympy.Symbol]]get_unbacked_symbol_uses)r   r   r]   r  T)TTr   )r  
__module____qualname__r-   r   __annotations__staticmethod
contextlibcontextmanagerr   r  r  r
  r  r  r  r   r   r   propertyr'  r)  r/  r2  r6  rx   rw   rv   rY   rY   F  s        2<*,,>>>>* * *  \*Y Y Y: : : :  " " "   5 5 5 5  W W WU U U   X. . .W W WB B B$L L L L +***        !!!!((((....33334444%%%%$$$$DDDDDDrw   c                  r    e Zd Zd Zd Zd Zd ZddZd Zd	 Z	d
 Z
d ZddZd ZddZddZddZd ZdS )	Operationc                    d | _         d S r   operation_namer  s    rv   r  zOperation.__post_init__  s    -1rw   c                    t           r   r  r  s    rv   r   zOperation.get_device  r  rw   c                4    t          | d          sJ | j        S )Norigin_node)hasattrrS  r  s    rv   get_origin_nodezOperation.get_origin_node  s!    t]+++++rw   c                4    t          | d          sJ | j        S )Nr   )rT  r   r  s    rv   get_originszOperation.get_origins  s     tY'''''|rw   r]   r{   c                "    | j         J | j         S r   rO  r  s    rv   get_operation_namezOperation.get_operation_name  s    "...""rw   c                    dS NFrx   r  s    rv   	is_externzOperation.is_extern      urw   c                    dS r[  rx   r  s    rv   is_no_opzOperation.is_no_op  r]  rw   c                    t           r   r  r  s    rv   get_read_writeszOperation.get_read_writes  r  rw   c                .    ||                                  v S r   )r  )r  rz   s     rv   
is_user_ofzOperation.is_user_of  s    t**,,,,rw   r  c                X    t          d |                                 D                       S )Nc              3  $   K   | ]}|j         V  d S r   rz   )r   deps     rv   	<genexpr>z+Operation.get_read_names.<locals>.<genexpr>  s$      ??s#(??????rw   )r-   r9  r  s    rv   r  zOperation.get_read_names  s)    ??dnn.>.>??????rw   c                4    |                                  j        S r   )ra  readsr  s    rv   r9  zOperation.get_reads  s    ##%%++rw   List[Buffer]c                    t           r   r  r  s    rv   get_outputszOperation.get_outputs  r  rw   OrderedSet[sympy.Symbol]c                    t                      S r   r,   r  s    rv   get_unbacked_symbol_defsz"Operation.get_unbacked_symbol_defs      ||rw   c                    t                      S )a  
        Returns the unbacked symbols which are required to be in scope in
        order to successfully perform codegen for this buffer.  For example,
        a buffer that corresponds to an extern kernel call that takes i0 as
        an argument would return {i0} here.  This is used to generate necessary
        dependencies that ensure we actually bind i0 in codegen before you
        try to use it.

        Note that this is NOT transitive; in particular, if this buffer takes
        in as input another buffer with dynamic shape (e.g., (i0,)), we will
        not report it here, because you will already have a dependency
        on that buffer, which will eventually have a dependency on i0 if
        necessary.
        r,   r  s    rv   rB  z"Operation.get_unbacked_symbol_uses  s     ||rw   c                    dS )z
        Gets extra global memory size needed by this buffer.
        Some algorithms (e.g. group gemm) may require extra global memory in the generated code.
        r   rx   r  s    rv   get_workspace_sizezOperation.get_workspace_size  s	    
 qrw   Nr]   r{   rC  r]   rk  r]   rn  )r  rE  rF  r  r   rU  rW  rY  r\  r_  ra  rc  r  r9  rm  rp  rB  rt  rx   rw   rv   rM  rM    s       2 2 2" " "       # # # #    " " "- - -@ @ @ @, , ," " " "      "    rw   rM  c                      e Zd ZU ded<   ded<   ded<   ded<   d#dZd$dZ fdZeZd Zd Z	d Z
d Zd Zed             Zeej        fd            Zed%d            Zd Zed             Zd Zd Zd Zd&dZd Zd  Zd! Zd" Z xZS )'Loopstorch.devicer   r7  r   Callable[..., Any]inner_fn
List[Expr]rangesr]   rn  c                z     t                      j        g d | j        D             |                                 R  S )Nc              3  4   K   | ]}t          |          V  d S r   r(   r   es     rv   rh  z1Loops.get_unbacked_symbol_uses.<locals>.<genexpr>  s+      <<1#A&&<<<<<<rw   )r-   unionr~  inner_fn_free_unbacked_symbolsr  s    rv   rB  zLoops.get_unbacked_symbol_uses  sN    !z||! 
<<<<<
//11
 
 
 	
rw   r~  c                                           d j        j         dt           j                                                   g fd|D             z   d j        gz             S )N'c                :    g | ]}| d t          |           S =)r   )r   rz   r  s     rv   r   z!Loops.__str__.<locals>.<listcomp>  s1    AAA4$..t,,..AAArw   origin_node=)r  r   rs   r{   r   inner_fn_strrS  )r  namess   ` rv   __str__zLoops.__str__  s    'DK$'''DJ!!##
 BAAA5AAAB 3d.2234
 
 	
rw   c                V    t                                                       d | _        d S r   superr  rS  r  	__class__s    rv   r  zLoops.__post_init__  '    rw   c                    | j         S r   r   r  s    rv   r   zLoops.get_device  
    {rw   c                    | j         S r   rS  r  s    rv   rU  zLoops.get_origin_node      rw   c                    | j         S r   r  r  s    rv   r   zLoops.get_size  r  rw   c                    | j         S r   r  r  s    rv   get_pointwise_sizezLoops.get_pointwise_size  r  rw   c                    dS r[  rx   r  s    rv   r\  zLoops.is_extern  r]  rw   c                    |                     dd           }|                     dd           } | |i |}||_        t          j        r|pt	          j                    nd |_        t                              |          S )NrS  r   )poprS  r3   r   r   r   rX   create)clsr~   r   rS  tbrs         rv   r  zLoops.create  s|    jj55ZZT**C   #.4.GQB*)(**T 	
 """rw   c                :    fdt          |           D             S )Nc                j    g | ]/\  }}|d k    rt          j        d          nt          |          0S )r2   r   )rm   r   rK   )r   nr   rW   s      rv   r   z Loops._index.<locals>.<listcomp>)  sP     
 
 
1 !"QEM!,J6ST,U,U
 
 
rw   r   )r~  rW   s    `rv   _indexzLoops._index'  s6    
 
 
 
!&))
 
 
 	
rw   r=   c                ~   t          t          j                              }t          j        |          5  t	          j        t          dd          5   | j        |                                   |	                                cd d d            cd d d            S # 1 swxY w Y   d d d            d S # 1 swxY w Y   d S Nallow_indexingT)
r<   rP   MockHandlerset_ops_handlerr   r   r   r|  inner_fn_argsgetvalue)r  	opcounters     rv   inner_fn_opcountzLoops.inner_fn_opcount.  sL    11	y)) 	( 	(5<,d,
 ,
 	( 	( DM4--//00%%''		( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	(s5   B20BB2B	B2!B	"B22B69B6c                8    |                      | j                  fS r   )r  r~  r  s    rv   r  zLoops.inner_fn_args7  s    DK((**rw   c                ^    t          j        j        | j        g|                                 R  S r   )rP   KernelFormatterHandlerir_to_stringr|  r  r  s    rv   r  zLoops.inner_fn_str:  s8    '4M
 ..00
 
 
 	
rw   c                P    |                                  j        t          j        k    S r   )r  num_opsr3   realize_opcount_thresholdr  s    rv   has_large_inner_fnzLoops.has_large_inner_fn@  s    $$&&.1QQQrw   c                `    |                      | j                  }t          | j        |          S r   )r  r~  r7   r|  )r  r   s     rv   r  z$Loops.inner_fn_free_unbacked_symbolsC  s'    DK((,T]EBBBrw   c                   t          j        t          dd          5  |                                 rXt	          |                                 |                                 |                                           j        cd d d            S t	          |                                 |                                           j        cd d d            S # 1 swxY w Y   d S r  )	r   r   r   get_reduction_typer9   r>  r   get_reduction_sizerj  r  s    rv   r9  zLoops.get_readsG  s    \.*:DAA 	 	&&(( 
*$$&&MMOO++--  	 	 	 	 	 	 	 	 +$$&&MMOO  	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   A C	8CCCr  c                N    t          |                                 j                  S r   )r-   r  read_buffersr  s    rv   r  zLoops.get_read_namesU  s    $//11>???rw   c                N    t          |                                 j                  S r   )r   r  r  r  s    rv   r:  zLoops.num_readsX  s    4((**7888rw   c                B    t          dt          |            d          )Nz+get_reduction_size() is not implemented by r#  r  r  s    rv   r  zLoops.get_reduction_size[  (    !G$t**GGG
 
 	
rw   c                B    t          dt          |            d          )Nz+get_reduction_type() is not implemented by r#  r  r  s    rv   r  zLoops.get_reduction_type`  r  rw   c                B    t          dt          |            d          )Nz+constant_to_device() is not implemented by r#  r  r  r   s     rv   constant_to_devicezLoops.constant_to_devicee  r  rw   rw  )r  )r]   r=   rC  ) r  rE  rF  rG  rB  r  r  __repr__r   rU  r   r  r\  classmethodr  rH  r1   INDEXr  rA   r  r  r  r  r  r9  r  r:  r  r  r  __classcell__r  s   @rv   ry  ry    s            
 
 
 
	
 	
 	
 	
          H              # # [# "j 
 
 
 \
 ( ( ( ](+ + + 
 
 ]

R R RC C C  @ @ @ @9 9 9
 
 


 
 


 
 
 
 
 
 
rw   ry  r   Union[Expr, Sequence[Expr]]r   r7  rO   c               ~    |j         r"t          j        t          d          |          S t          j        d|          S )Nnanr   )is_floating_pointrN   constantfloat)r   r   s     rv   nop_loader_fnr  k  s7     &|E%LL%000|Au%%%rw   c                  ,    e Zd Zd Zd Zd Zd Zd ZdS )	Pointwisec                n    |                                  rt          t          | j                  S | j        S Nr!  )r/  r   r  r   r|  r  s    rv   r>  zPointwise.make_loaders  s3      "" 	<=
;;;;}rw   c                    g S r   rx   r  s    rv   r  zPointwise.get_reduction_sizez  s    	rw   c                    d S r   rx   r  s    rv   r  zPointwise.get_reduction_type}      trw   c                z    |                                  }t          j        | ||           ||                    S r   )r>  rN   storer  output_nameindexervarsloaders        rv   store_outputzPointwise.store_output  s7    !!##yggdmmVVD\\BBBrw   c                    |                                  } t          j        t          d|          |          }t	          || j        || j                  S FMove this to a given device. Requires that all reads are to constants.override_device)r>  r   r   ConstantBufferr  r   r~  r  r   r  s      rv   r  zPointwise.constant_to_device  sI    !!##Hn.?HHPPVT[AAArw   N)r  rE  rF  r>  r  r  r  r  rx   rw   rv   r  r  r  sg              C C CB B B B Brw   r  c                  4    e Zd ZU ded<   dZded<   d Zd ZdS )ScatterzCallable[[List[Expr]], Expr]output_indexerNr   scatter_modec                    |                                  } t          j        t          d|          |          }t	          || j        || j        | j        | j                  S r  )	r>  r   r   r  r  r   r~  r  r  r  s      rv   r  zScatter.constant_to_device  s_    !!##Hn.?HHPPJK
 
 	
rw   c                    |                                  }t          j        | ||                     |                     ||          | j                  S )N)mode)r>  rN   r  r  r  r  s        rv   r  zScatter.store_output  s[    !!##yGD''--..F4LL"	
 
 
 	
rw   )r  rE  rF  rG  r  r  r  rx   rw   rv   r  r    sQ         0000"&L&&&&
 
 

 
 
 
 
rw   r  
logical_ormaximumminimummuladdbitwise_xor)anymaxminprodsumxor_sumz"Dict[str, Callable[..., OpsValue]]REDUCTION_COMBINE_FNreduction_typearg_break_ties_leftCallable[..., object]c                      t           v rt                     S  dv r
d fd}|S  dk    rdd
}|S t          d            )NargmaxargminaTuple[object, object]br]   Tuple[OpsValue, OpsValue]c                   | \  }}|\  }}dk    rt          j        ||          }nt          j        ||          }t          j        ||          }t	                    rzt          j        ||          }t          j        ||          }	t          j        |t          j        ||	                    }t          j        |t          j        ||	                    }rt          j        ||          nt          j        ||          }
t          j        |t          j        ||
                    }t          j        |||          t          j        |||          fS )Nr  )	rN   ltgteqr!   ner  logical_andwhere)r  r  a_valuea_indexb_valueb_indexmaskequala_isnanb_isnantier  r   r  s              rv   argmax_combine_fnz3get_reduction_combine_fn.<locals>.argmax_combine_fn  s@     !GW GW))vgw//vgw//F7G,,Ee$$ Q&'22&'22~dCF7G,D,DEEucogw.O.OPP '.w(((VGW-- 
 >$s(C(CDDD	$11	$11 rw   welford_combine#Tuple[OpsValue, OpsValue, OpsValue]c                l    | \  }}}|\  }}}||z
  }||z   }	||	z  }
|||
z  z   ||z   ||z  |z  |
z  z   |	fS r   rx   )r  r  a_meana_m2a_weightb_meanb_m2b_weightdelta
new_weight	w2_over_ws              rv   welford_combine_fnz4get_reduction_combine_fn.<locals>.welford_combine_fn  sm     &'"FD(%&"FD(VOE!H,J :-I**teemh6BB rw   zunknown reduction_type=)r  r  r  r  r]   r  )r  r  r  r  r]   r  )r  r  )r  r   r  r  r&  s   ```  rv   get_reduction_combine_fnr'    s     ---#N33	/	/	/	 	 	 	 	 	 	 	: ! 	,	,	,	 	 	 	  "! ""LN"L"LMMMrw   strides1Sequence[_IntLike]strides2r   c                |     d t          |          D             } fd|D              fd|D              k    S )zP
    Returns true if the strides are equal, ignoring dimensions of size 1 .
    c                l    g | ]1\  }}t           j        j                            |d           dk    /|2S r   fallbackr2   rP   r   r   r   r   r   dims      rv   r   z-significant_strides_equal.<locals>.<listcomp>  sI       As7%%cA%66!;; 	
;;;rw   c                d    g | ],}t           j        j                            |                   -S rx   r0  )r   r   r(  s     rv   r   z-significant_strides_equal.<locals>.<listcomp>  0    OOOA **8A;77OOOrw   c                d    g | ],}t           j        j                            |                   -S rx   r0  )r   r   r*  s     rv   r   z-significant_strides_equal.<locals>.<listcomp>  r4  rw   r  )r(  r*  r   non_1_indicess   ``  rv   significant_strides_equalr7    sh     oo  M
 POOOOOOHOOOOOOOHxrw   c                      e Zd ZU ded<   ded<   ded<   ded<   d1d
Zd1dZd2 fdZd Zd Zd Z	d Z
d Zd Zd Ze	 d3d4d            Zed             Zeej        dfd5d             Zed!             Zed"             Zed6d&            Zed'             Zed(             Zed7d.            Zed8d/            Zed9d0            Z xZS ):	Reductionr}  reduction_rangesr{   r  r7  	src_dtyper?   reduction_hintr]   c                :    t                               | d          S )N)r~  r:  r  )r  )ry  r  r  s    rv   r  zReduction.__str__  s#    }}H  
 
 	
rw   c                *    |                                  S r   )r  r  s    rv   r  zReduction.__repr__  s    ||~~rw   rn  c                    t                                                       t                      j        d | j        D              z  S )Nc              3  4   K   | ]}t          |          V  d S r   r  r  s     rv   rh  z5Reduction.get_unbacked_symbol_uses.<locals>.<genexpr>  s+      FF1#A&&FFFFFFrw   )r  rB  r-   r  r:  r  s    rv   rB  z"Reduction.get_unbacked_symbol_uses  sE    ww//114FJLL4FFF0EFFF5
 
 	
rw   c                    | j         S r   )r:  r  s    rv   r  zReduction.get_reduction_size  s    $$rw   c                    | j         S r   )r  r  s    rv   r  zReduction.get_reduction_type      ""rw   c           	         t          j        | j        | j        | j        |                     ||                    }t          j        | ||          |          S r   )rN   	reductionr   r;  r  r|  store_reduction)r  r  r  r  reduction_varsr   s         rv   rF  zReduction.store_reduction  sS    JNMM$//	
 
 ";uEEErw   c                T    t          | j                  t          | j                  z   S r   )r   r~  r:  r  s    rv   index_lengthzReduction.index_length$  s"    4;#d&;"<"<<<rw   c                    |                      | j                  }|                      | j        t          j                  }||fS r   )r  r~  r:  r1   RINDEXr  r   rindexs      rv   r  zReduction.inner_fn_args'  s7    DK((T2DK@@vrw   c                    |                      | j                  }|                      | j        t          j                  }t          | j        ||          S r   )r  r~  r:  r1   rK  r7   r|  rL  s      rv   r  z(Reduction.inner_fn_free_unbacked_symbols,  sA    DK((T2DK@@,T]E6JJJrw   c           
         |                                  } t          j        t          d|          |          }t	          || j        || j        | j        | j        | j	        t          j                  S r  )r>  r   r   r  r9  r   r~  r:  r  r;  r?   DEFAULTr  s      rv   r  zReduction.constant_to_device1  sj    !!##Hn.?HHPPJK!N!	
 	
 		
rw   N
input_noder   c	           
        !"#$% d }	t           j        j                            |          }
t           j        j                            t	          |                    }t           j                            | t          j                   o%|dvo!t          j	        o |	|
          o
 |	|          }|st          j        dfS t          t          |                     }|j                            |           }t          |           dk    r|j        $n|j        $d#d!d%#$z  %z  "!$z  %z    !"#$%fd} !"#$%fd	}|dk    r ||
|          }|dk    rt          j        |fS |t'          |t(                    r}t+          |          \  }}|i|gt           j        j                            t	          ||z                       }|
|k    r-t,                              d
|||||           t          j        dfS t          j        |fS |
#k    s|$dz  dz  k    rt          j        dfS t1          | ||||||t          j                  }d } ||          \  }}|r ||          \  }}t3          |          dk    rt          j        dfS t5          j        |                                |                                          \  \  }}}d}d}|D ]}t           j        j                            ||          }t           j        j                            |||                                           }tC          d |D                       }|r|dz  }|dz  }||k    rt          j         ||
|          fS t          j"         ||
|          fS )Nc                D    t          | t          t          j        f          S r   rb   rq   rm   r   r   s    rv   
_is_staticz(Reduction.num_splits.<locals>._is_staticL  s    a#u}!5666rw   r  r2   xpu       i   c                  	 d}d|z  }|dz  k    rdS | dk    rdS | |z  k    r}n| |z  
k     ryz  d|z  z  }||z   dz
  |z  }| ||z  z   dz
  ||z  z  	t          j        |           }t          |	fd          }t          |	z
            dk     rt	          |          }nF	}nCt          j        |           }t          |fd	          }t          |z
            d
k     r|}n}| ||z  z   dz
  ||z  z  S )N   rW  r   r2   i    c                (    t          | z
            S r   absr   tmp_split_sizes    rv   <lambda>zFReduction.num_splits.<locals>.inner_reduction_splits.<locals>.<lambda>      c!n:L6M6M rw   key   c                (    t          | z
            S r   r\  r   max_elements_per_threads    rv   r`  zFReduction.num_splits.<locals>.inner_reduction_splits.<locals>.<lambda>      c!>U:U6V6V rw   2   rm   divisorsr  r]  r  )reduction_numel_hint
numel_hint	num_warpsnum_threads
split_sizetarget_blocksblocks_per_outputrk  closestr_  max_elements_per_devicerg  min_elements_per_devicemin_elements_per_threadnum_smthreads_per_sms            @rv   inner_reduction_splitsz4Reduction.num_splits.<locals>.inner_reduction_splitso  s    Iy.KQZ''q#t++q#j04KKK4

%
25LLL & 7AO L%2Z%?!%C
$R!(;9J+JJQN!$55"7 !>*>??h,M,M,M,MNNNw/00255!$W.E!F!FJJ!/JJ >*>??h,V,V,V,VWWWw!8899B>>!(JJ!8J(:+CCaG[( rw   c                   d}|dz  }d}d}||z   dz
  |z  }| |z  k     r}n| |z  k     rvz  |z  }||z   dz
  |z  }| ||z  z   dz
  ||z  z  t          j        |           }	t          |	fd          }
t          |
z
            dk     rt	          |
          }nF}nCt          j        |           }	t          |	fd	          }
t          |
z
            d
k     r|
}n}| ||z  z   dz
  ||z  z  S )NrZ  rW  r      r2   c                (    t          | z
            S r   r\  r^  s    rv   r`  zFReduction.num_splits.<locals>.outer_reduction_splits.<locals>.<lambda>  ra  rw   rb     c                (    t          | z
            S r   r\  rf  s    rv   r`  zFReduction.num_splits.<locals>.outer_reduction_splits.<locals>.<lambda>  rh  rw   ri  rj  )rl  rm  rn  ro  rvals_per_threadxvals_per_blockxblocksrp  rq  rk  rs  r_  rt  rg  ru  rv  rw  rx  s              @rv   outer_reduction_splitsz4Reduction.num_splits.<locals>.outer_reduction_splits  sv    I#b.K !O!O3a7OKG#j03JJJ4

%
25LLL & 7K H!.!81!< H(+;m+KKaO&6"8 !>*>??h,M,M,M,MNNN~/00255!$W.E!F!FJJ!/JJ >*>??h,V,V,V,VWWWw!8899B>>!(JJ!8J(+;j+HH1L :- rw   zUse previous IRNode's range and reduction_ranges instead of split. current ranges: %s, current reduction ranges: %s, current split: %d, new ranges: %s, new reduction ranges: %sr   c           	     x   t          d t          |                                 |                                 |                                           |           }|                                }d |j        D             }g }d}t          |j        d           D ]t          fd|D                       r|
                    j                   j        t          j        j        v rNt          j        j        j                 }|j        j        }|                                 |j        j        |k    rd}||fS )	Nr   r   r   rz   layoutdatac                z    g | ]8}t          |t          j                  t          |t          j                  6|9S rx   )rb   rm   r   Numberr   r  s     rv   r   zBReduction.num_splits.<locals>.get_read_indices.<locals>.<listcomp>  sQ       a,, 6@5<5P5P  rw   Fc                    | j         S r   rf  r   s    rv   r`  z@Reduction.num_splits.<locals>.get_read_indices.<locals>.<lambda>  s    af rw   rb  c              3  4   K   | ]}|j         j        v V  d S r   )r   free_symbols)r   r  mds     rv   rh  zAReduction.num_splits.<locals>.get_read_indices.<locals>.<genexpr>  s-      FFaqBH11FFFFFFrw   T)ComputedBufferr   r   r   r   ra  
range_varssortedrj  allappendr   rz   rP   r   name_to_bufferr  r   decide_layout)	r  cbread_writesr  indiceschangedbuforiginal_strider  s	           @rv   get_read_indicesz.Reduction.num_splits.<locals>.get_read_indices  sN   %<<>>++--  
   B ,,..K $/  J
 GG[.4D4DEEE + +FFFF:FFFFF +NN28,,,w!'"888g4RW=*-**;))+++:,??&*GG##rw   r   c              3  "   K   | ]
}|d k    V  dS r2   Nrx   r   r   s     rv   rh  z'Reduction.num_splits.<locals>.<genexpr>  s&      //!A//////rw   )#rP   r   r   symbolic_hintrL   has_featurer5   REDUCE_TO_SINGLE_ELEMENTr3   split_reductionsr?   rP  r   r   Workerget_device_propertiesgpu_subslice_countmulti_processor_countINNERrb   rX   r8   logdebugr9  r   r4   index_vars_squeezer   r  simplify_with_rangesstride_hintskeysr  OUTER)&r   	dst_dtyper;  r|  r~  r:  r  reduction_numelrQ  rU  rl  rm  should_splitdevice_interfacedevice_propertiesry  r  split
new_rangesnew_reduction_rangesextracted_numel_hintr  r  r  r  r   rG  	num_outer	num_innerr   stridesouterrt  rg  ru  rv  rw  rx  s&                                   @@@@@@rv   
num_splitszReduction.num_splits@  s   	7 	7 	7  !w/==oNNW%33M&4I4IJJ
 ##FN,STTT 	'	' '	' 
/00	' 
:&& 	  	, (!++3OF4K4KLL,3II&QQ6""e++&9FF '<F"$"%"9F"B^"S"9F"B^"S"	 "	 "	 "	 "	 "	 "	 "	 "	 "	H!	 !	 !	 !	 !	 !	 !	 !	 !	 !	H ??**+?LLEzz$*E11%*Z*K*K%3V4 40
0 ).B.N+,7+;+I+I%j3G&GHH, ,( ,/CCC		G #,!&0	 	 	  -2B66 &-- $;;;VaZ"_,, (!++!	
 	
	$ 	$ 	$@ ,+A.. 	-))!,,JGQw<<1 (!++&2&EJJLL!..00'
 '
#NV 		 	 	A 55a@@Ag&33A~v{{}}UUG//w/////E Q		Q		y   &(>(>$j) )   !&(>(>$j) )  rw   c                     d D             t          ||          fd|dv rGt          ddt                                                                                   fdfdS  S )z1Convert inner_fn from a reduction to an pointwisec                V    g | ]&}t           j        j                            |          'S rx   )rP   r   r   evaluate_static_shaper   r   s     rv   r   z2Reduction._unroll_reduction_fn.<locals>.<listcomp>%  s8     
 
 
:;AG22155
 
 
rw   c                v     t          j         fdt          j        d D              D                       S )Nc              3  0   K   | ]} |          V  d S r   rx   )r   rM  r   value_fns     rv   rh  z=Reduction._unroll_reduction_fn.<locals>.fn.<locals>.<genexpr>.  sC         HUF++     rw   c                ,    g | ]}t          |          S rx   )r   r  s     rv   r   z>Reduction._unroll_reduction_fn.<locals>.fn.<locals>.<listcomp>1  s    ===q%((===rw   )	functoolsreduce	itertoolsproduct)r   
combine_fnr:  r  s   `rv   r   z*Reduction._unroll_reduction_fn.<locals>.fn+  sa    #    "+"3==,<===#    rw   r  r  Nc                    d |D             } | |          t          j         |          t          j                  fS )Nc                6    g | ]}t          j        |          S rx   )rm   expandr   r   s     rv   r   zDReduction._unroll_reduction_fn.<locals>.value_fn.<locals>.<listcomp>?  s     :::a%,q//:::rw   )rN   
index_exprrg   int64)r   rM  flatten_indexr|  s     rv   r  z0Reduction._unroll_reduction_fn.<locals>.value_fn>  sJ    ::6:::HUF++N==#8#8%+FF rw   c                &     |           d         S Nr2   rx   )r   r   s    rv   r`  z0Reduction._unroll_reduction_fn.<locals>.<lambda>E  s    E1 rw   )r'  FixedLayoutr   r   r?  )r|  r:  r  r;  r  r  r   r  s   ``  @@@@rv   _unroll_reduction_fnzReduction._unroll_reduction_fn"  s    
 
?O
 
 
 .niHH
		 		 		 		 		 		 		 111' 112BCC	 
 lnn       .----HIrw   r   rz  r  r|  r{  r~  c
                   t           j        j                            t	                              }
|
dk    rfd} |d           |d           |d           |d          d                                v sJ  d            fd}t                              |||t          |                    S |
dk    r-dv rfd	}nfd
}t                              |||          S t          |
t          j                  rxt           j        j                            |
          t          j        k     rFt	          |          dk    r3t                              ||                     |          |          S |                     ||||
|		  	        \  }}|t"          j        k    r|}|dk    r<|	J t'          |	          \  }}|J |J |                     ||||||
  
        S |dk    r|                     |||||	  	        S t,                              t/          ||||                    S )Nr   c                    t           j        k    rt          |           n$j        rt          |           nt	          |           S r   )rg   r   r  r  rq   )valr  s    rv   py_cnstz!Reduction.create.<locals>.py_cnst]  sD     !EJ.. III !2"sSrw   r2   )r  r  r  r  z* not supported for zero-dimension tensors!c                :    t          j                           S r   rN   r  )r   r  r  rtypes_to_initss    rv   const_fnz"Reduction.create.<locals>.const_fnr  s    |ON$CYOOOrw   r   r   r|  r~  r  c                .    t          j        d          S r+  r  )r   r  s    rv   r   zReduction.create.<locals>.fn  s    <9555rw   c                4    d D             } | |          S )Nc                6    g | ]}t          j        d           S r   rm   r   r   s     rv   r   z0Reduction.create.<locals>.fn.<locals>.<listcomp>  "    &R&R&RAu}Q'7'7&R&R&Rrw   rx   )r   reduction_indexr|  r:  s     rv   r   zReduction.create.<locals>.fn  s)    &R&RAQ&R&R&RO#8E?;;;rw   r  )rP   r   r   simplifyrL   r  r  r  rc   rb   rm   r   r   r3   unroll_reductions_thresholdr  r  r?   rP  r8   !create_multilayer_existing_rangescreate_multilayerrX   r9  )r  r   r  r;  r|  r~  r:  r  r<  rQ  r  r  r  r   hintr  r  r  r  s     ` ` ``          @rv   r  zReduction.createJ  sY    '*33MBR4S4STTa     wqzz"71::

wqzz	 O /"6"6"8"8888 LLL 988P P P P P P P ##!F||	 $    a!5556 6 6 6 6 6
< < < < < < ##FIr6BBB 66	 **?;;01 1f%%**##((.	     nn

 

e ]222!NB;;)))/R0 0,J, )))'33388 $   QYY(( 
 
 
  	 	
 
 	
rw   c                P   | dv rHt          |          rt          d          S t          |          rdS t          j        |          j        S | dv rHt          |          rt          d          S t          |          rdS t          j        |          j        S ddddddd|          S )	N)r  r  z-infr   )r  r  infr2   r   r   r   )r  r  r  r  welford_reducer  )r!   r  r    rg   iinfor  r  r  r   s     rv   default_accumulatorzReduction.default_accumulator  s    ...e$$ .V}}$!%(( .q{5))--...e$$ .U||#!%(( .q{5))-- '(
 
  	rw   c                H    | dk    rdS t                               | |          S )Nr  r   )r9  r  r  s     rv   default_valuezReduction.default_value  s)    ---1,,^UCCCrw   r  rq   rm  c                    | dk    r|S | dk    r"|dk    r|t           j        k    rt           j        S | dk    r"|dk    r|t           j        k    rt           j        S |S )Nr  rX  i      )r?   r  
OUTER_TINY)r  rm  r<  s      rv   _multilayer_second_step_hintz&Reduction._multilayer_second_step_hint  sm     B;;!!C<<J#--.MDW2W2W ++TMMc!!-"555 ++rw   c                   	 t                               |g          	t          j        j                            t          j        |z  d                     	fd}|S )Nr   c                   |\  }| ^ }|z  |z   	fd}
ret          j        t          j        t          j                  t          j        t          j                            }t          j        ||          S  |            S )Nc                 0       g                    S r   rx   )r  r  	new_indexr   s   rv   bodyzCReduction._multilayer_wrap_loader.<locals>.wrapper_fn.<locals>.body  s!    vi'););<<<rw   )rN   r
  r  rg   int32masked)r   r  reduction_blockr  r  r  r  
block_sizedefaultr  	need_maskr  r   s        @@rv   
wrapper_fnz5Reduction._multilayer_wrap_loader.<locals>.wrapper_fn  s    !0_*/'Y ?2_DG= = = = = = = =  vN7EK88N?EK@@  z$g666tvvrw   )Viewdynamic_reshape_indexerrP   r   r   r-  rm   r.  )
r  r  r:  r  r  r  r  r  r  r   s
    ` ` `` @@rv   _multilayer_wrap_loaderz!Reduction._multilayer_wrap_loader
  s     ../?/ARSS(@@H_u,a00
 
 
		 	 	 	 	 	 	 	 	 	" rw   c                    t          d D                       sJ d            t                              |t          |          t          |          z             fd}|S )Nc              3  "   K   | ]
}|d k    V  dS r  rx   r  s     rv   rh  zDReduction._multilayer_wrap_loader_existing_ranges.<locals>.<genexpr>6  s7       
 
AF
 
 
 
 
 
rw   z8Only enabled for numel_hint == 1, found original_ranges=c           	         | d t                             }| t                    d          } | t          |          t          |          z                       S r   )r   rd   )merged_indexnew_reduction_indexoriginal_idxr  r  original_rangesr   s       rv   r  zEReduction._multilayer_wrap_loader_existing_ranges.<locals>.wrapper_fn=  sm    '(>#o*>*>(>?L$S%9%9%;%;<I6i((51D+E+EEFF  rw   )r  r  r  rd   )	r  r  r  original_reduction_rangesr  r  r  r  r   s	    ``     @rv   '_multilayer_wrap_loader_existing_rangesz1Reduction._multilayer_wrap_loader_existing_ranges,  s      
 
+
 
 
 
 
 	I 	IHoHH	I 	I 	I ..%uZ'8'85AU;V;V'V
 
	 	 	 	 	 	 	 rw   r  r  r  r  r  c                @   |t           j        t           j        fvr|nt           j        }t                              |||||||	|          }|                                 |                                fd}t          j	        j
                            t          |                    }|                     |
||          }||dt          |                   k    sJ t                              t	          |||||t          |          d         |	||                    S )a
        Break a large reduction up into multiple smaller reductions
        recursively
        c                "     g | |          S r   rx   )r   r  intermediate_loaders     rv   intermediate_fnz;Reduction.create_multilayer_helper.<locals>.intermediate_fno  s    &&'A'A'ABBBrw   N)rg   float16bfloat16r  r9  r  r2  r>  rP   r   r   r   rL   r  r   rX   )r  r   r  r;  r  r  r  r  r  r  r  r<  intermediate_dtypeintermediater  rm  r  s                   @rv   create_multilayer_helperz"Reduction.create_multilayer_helperG  sQ   0  ??? I 	
 !'' 	
 	
 	*6688	C 	C 	C 	C 	C W%//o0N0NOO
99:~
 
 *-Cs?/C/C-C"DDDDD3//112	 	
 
 	
rw   c
                    t          |          }
t          |
|dz
  z   |          }|                     ||          }|                     |||
|||          }|                     ||||||g |||g|||	          S )r  r2   )rL   r/   r  r  r  )r  r   r  r;  r|  r~  r:  r  r  r<  r  r  r  r  s                 rv   r  zReduction.create_multilayer  s    $ ((899o;UCC
##NI>>00&
G
 

 ++feL
 
 	
rw   c                    |                      |	|          }|                     ||||||          }|                     ||||||g ||||	d|
          S )r  r  )r  r  r  )r  r   r  r;  r|  r  r  r  r  r  r<  r  r  s                rv   r  z+Reduction.create_multilayer_existing_ranges  s    $ ##NI>>@@% 
 

 ++%+o+
+ 
 
 	
rw   ru  rw  r   )rQ  r   )r   rz  r  r7  r;  r7  r|  r{  r~  r}  r:  r}  r  r{   r<  r?   rQ  r   )r  rq   rm  rq   r<  r?   r]   r?   )r   rz  r  r7  r;  r7  r  r{  r  r}  r  r}  r  r}  r  r}  r  r{   r  rq   r<  r?   )r   rz  r  r7  r;  r7  r|  r{  r~  r}  r:  r}  r  r{   r  rq   r<  r?   )r   rz  r  r7  r;  r7  r|  r{  r  r}  r  r}  r  r}  r  r}  r  r{   r<  r?   )r  rE  rF  rG  r  r  rB  r  r  rF  rI  r  r  r  rH  r  r  r  r?   rP  r  r  r  r  r  r  r  r  r  r  r  s   @rv   r9  r9     sz            !!!!
 
 
 

   
 
 
 
 
 

% % %# # #F F F= = =  
K K K

 
 
  (,_ _ _ _ \_B % % \%N  )6(='+L
 L
 L
 L
 [L
\   \2 D D \D
    \    [B   [4 ;
 ;
 ;
 [;
z $
 $
 $
 [$
L &
 &
 &
 [&
 &
 &
 &
 &
rw   r9  c                       e Zd ZU ded<    fdZd Zeej        fdd            Z	e
d             Zedd            Z xZS )WelfordReductionrq   output_indexc	           
         t                    dk    r	d         }	nfd}	t                                          |||	|||||           || _        d S )Nr2   r   c                @     t           fdD                       S )Nc              3  0   K   | ]} |          V  d S r   rx   )r   r   r   reduction_idxs     rv   rh  z<WelfordReduction.__init__.<locals>.loader.<locals>.<genexpr>  s/      HHRR]33HHHHHHrw   )rd   )r   r#  	inner_fnss   ``rv   r  z)WelfordReduction.__init__.<locals>.loader  s+    HHHHHiHHHHHHrw   )r   r  __init__r  )r  r   r   r$  r~  r:  r  r<  r  r  r  s      `      rv   r%  zWelfordReduction.__init__  s     y>>Qq\FFI I I I I 			
 		
 		
 )rw   c           	         t          j        | j        | j        | j        |                     ||                    }|| j                 }t          j        | ||          |          S r   )rN   rE  r   r;  r  r|  r  rF  )r  r  r  r  rG  rf   r   s          rv   rF  z WelfordReduction.store_reduction  s`    JNMM$//	
 
 t()";uEEErw   r   rz  r   r7  r$  Sequence[Callable[..., Any]]r~  r}  r:  r  r{   r<  r?   c           
        dv sJ t           j        j                            t	                              }fd}	|dk    r& |	d          }
 |	d          } |	d          }|
||fS |dk    rOfddk    r& d                    |	d           |	d          fS t          fdD                       S t                              d         |          \  }}t          j	        k    r||dk    r| 
                    |          S fd	t          d
          D             }|D ]}|                                 |S )N)r  r  c                h      fd}t                               |t                              S )Nc                .    t          j                  S r   r  )r   r   r  s    rv   r|  z8WelfordReduction.create.<locals>.const.<locals>.inner_fn  s    |  rw   r  r  r  rc   )r  r|  r   r   r~  s   ` rv   constz&WelfordReduction.create.<locals>.const  sS          ##!F||	 $   rw   r   r2   c                h      fd}t                               |t                              S )Nc                4    d D             } | |          S )Nc                6    g | ]}t          j        d           S r   r  r   s     rv   r   zKWelfordReduction.create.<locals>.copy.<locals>.inner_fn.<locals>.<listcomp>(  r  rw   rx   )r   r  r  r:  s     rv   r|  z7WelfordReduction.create.<locals>.copy.<locals>.inner_fn'  s)    &R&RAQ&R&R&RO!6#777rw   r  r+  )r  r|  r   r   r~  r:  s   ` rv   copyz%WelfordReduction.create.<locals>.copy&  sS    8 8 8 8 8 8 !''!%<<	 (   rw   r  c              3  .   K   | ]} |          V  d S r   rx   )r   r   r0  s     rv   rh  z*WelfordReduction.create.<locals>.<genexpr>5  s+      ::"TT"XX::::::rw   )r  r  c                l    g | ]0}t                               t          |                    1S rx   )rX   r  r  )	r   
output_idxr   r   r$  r~  r<  r:  r  s	     rv   r   z+WelfordReduction.create.<locals>.<listcomp>d  s`     
 
 
   $""	 	 
 
 
rw   r   )rP   r   r   r  rL   rd   r9  r  r?   rP  r  r   r2  )r  r   r   r$  r~  r:  r  r<  r  r,  meanm2weightr  r  resultsr   r0  s    ```````         @rv   r  zWelfordReduction.create  s/    !FFFFF'*33MBR4S4STT	 	 	 	 	 	 	 a588DqBU1XXFV##a
 
 
 
 
 
 
 
 !111tIaL))5588UU1XX==::::	::::::&  **aL)+ + 	
 	
e ]222!N199(( 	 	 	
 
 
 
 
 
 
 
 
 
 $Ahh
 
 
  	 	AIIKKKKrw   c                    dS )Nr  rx   r  s     rv   r  zWelfordReduction.default_valuew  s    yrw   r  c	                J    t                    t          j        j                            t          j        z  d                     }	|	rO|dk    rIfd}
                     ||d         t          |
d          t          |
d          f|d|          S t          dz
  z             t                              |t           fd|D                       g |g||          }|D ]}|                                 d |D             }d	 t          j        j                            t          |                    }                     ||          }t                              |t          fd
|D                       |gd|          S )r  r   r  c                .    t          j        |          S r   r  )r   r#  r   r   s      rv   r  z4WelfordReduction.create_multilayer.<locals>.constant  s    |E5111rw   r   r2   )r   r   r$  r~  r:  r  r  r<  c           	   3  N   K   | ]}                     |d           V   dS )r   )r  N)r  )r   r  r  r  r  r:  r  s     rv   rh  z5WelfordReduction.create_multilayer.<locals>.<genexpr>  s`       
 
  ++$# ,  
 
 
 
 
 
rw   c                6    g | ]}|                                 S rx   )r>  r  s     rv   r   z6WelfordReduction.create_multilayer.<locals>.<listcomp>  s     <<<Q]]__<<<rw   c                      |g | |          S r   rx   )r   r  r  s      rv   intermediate_loader_fnzBWelfordReduction.create_multilayer.<locals>.intermediate_loader_fn  s    64E4O4555rw   c              3  ^   K   | ]'}t          |                                           V  (dS ))r  N)r   r>  )r   r   r>  s     rv   rh  z5WelfordReduction.create_multilayer.<locals>.<genexpr>  sM         .q}}GGG     rw   )rL   rP   r   r   r-  rm   r.  r  r   r/   r  r  rd   r2  r   r  )r  r   r   r$  r~  r:  r  r  r<  r  r  intermediatesr   	i_loadersrm  r  r>  r  s   ` `  ` `       @@@rv   r  z"WelfordReduction.create_multilayer{  sW     ((899(@@H_u,a00
 
 
	  	+<<<2 2 2 2 2 ((aLHA...HA...
 !10- )    o;UCC
(// 
 
 
 
 
 
 
 
 (
 
 
 
 
 feL#
 
&  	 	AIIKKKK<<m<<<		6 	6 	6 W%//f0E0EFF
99:~
 
  &&    &     G
 
 	
rw   )r   rz  r   r7  r$  r'  r~  r}  r:  r}  r  r{   r<  r?   )r   rz  r   r7  r$  r'  r~  r}  r:  r}  r  r{   r  rq   r<  r?   )r  rE  rF  rG  r%  rF  r  r?   rP  r  rH  r  r  r  r  s   @rv   r  r    s         ) ) ) ) )<F F F  )6(=s s s s [sj   \ V
 V
 V
 [V
 V
 V
 V
 V
rw   r  c                       e Zd ZU ded<   ded<   ded<   ded<   ded	<   d
ed<   ded<   ded<   d. fdZ fdZd Zd Zd Zd Z	d Z
d Zd Zd Zeej        fddd/d%            Zed0d-            Z xZS )1Scanr}  scan_rangesr   =Callable[[Tuple[Any, ...], Tuple[Any, ...]], Tuple[Any, ...]]r  .Callable[[List[Expr], List[Expr]], List[Expr]]r   r?   r<  rq   r  Tuple[torch.dtype, ...]dtypesTuple[Callable[..., Any], ...]r$  r]   rn  c                    t                                                       t                      j        d | j        D              z   t                      j        d | j        D              z  S )Nc              3  4   K   | ]}t          |          V  d S r   r  r  s     rv   rh  z0Scan.get_unbacked_symbol_uses.<locals>.<genexpr>  +      "V"V#8#;#;"V"V"V"V"V"Vrw   c              3  4   K   | ]}t          |          V  d S r   r  r  s     rv   rh  z0Scan.get_unbacked_symbol_uses.<locals>.<genexpr>  +      "O"O#8#;#;"O"O"O"O"O"Orw   )r  rB  r-   r  rD  r   r  s    rv   rB  zScan.get_unbacked_symbol_uses  sn    
 GG,,.. jll "V"VTEU"V"V"VWX jll "O"OTY"O"O"OPQ	
rw   c                    t          | j                  t          | j                  z   t          | j                  k    sJ t	                                                       d S r   )r   r~  rD  r   r  r  r  s    rv   r  zScan.__post_init__  R    4;#d&6"7"773ty>>IIIIrw   c                    |                      ||          fd| j        D             }t          j        | j        | j        |          }t          j        | |          || j                           S )Nc                &    g | ]} |          S rx   rx   r   r|  r   s     rv   r   z(Scan.store_reduction.<locals>.<listcomp>  !    ???H((3--???rw   )r   r$  rN   scanrH  r  r  r  )r  r  r  r  	scan_varsrf   resultr   s          @rv   rF  zScan.store_reduction  sl    ll4++???????$+t??yggcllF4;L4MNNNrw   c                    dS )Ncustomrx   r  s    rv   r  zScan.get_reduction_type  s    xrw   c                    | j         S r   )rD  r  s    rv   r  zScan.get_reduction_size  r  rw   c                    | j         S r   r   r  s    rv   r   zScan.get_size  
    yrw   c                    | j         S r   r  r  s    rv   r  zScan.get_pointwise_size  r  rw   c                T    t          | j                  t          | j                  z   S r   )r   r~  rD  r  s    rv   rI  zScan.index_length  "    4;#d&6"7"777rw   c                    |                      | j                  }|                      | j        t          j                  }|                     ||          }|fS r   )r  r~  rD  r1   rK  r   r  r   rM  r   s       rv   r  zScan.inner_fn_args  G    DK((T-t{;;ll5&))vrw   c                    |                      | j                  }|                      | j        t          j                  }|                     ||          }t          | j        |          S r   )r  r~  rD  r1   rK  r   r7   r|  rb  s       rv   r  z#Scan.inner_fn_free_unbacked_symbols  R    DK((T-t{;;ll5&)),T]C@@@rw   T)can_fallback_to_atenr   rz  'Tuple[Callable[[List[Expr]], Any], ...]axisrf  r   List[Optional[TensorBox]]c               p  	 g d          dz   d                   gt           j                            t          j                  sd gt                    z  S t                    dk    r=t           j                            t          j                  sd gt                    z  S t           j        j        }
|
                    t                              }t                    t                    k    sJ |

                    t          j        |d                    r+fdt          t                              D             S |                     d         d         |          \  }t          |dk    rGt           j        j        d u ot                    dk    }|s|rd gt                    z  S d}nt&          fd	fdt          t                              D             }|D ]}|                                 |S )Nr2   c                d    g | ],}t                               |         |                    -S r  r  r  r   r  r   rH  r$  r   s     rv   r   zScan.create.<locals>.<listcomp>4  V        !   ! .&|4	 !    rw   r   )r   r   r|  rh  pointwise_rangesrD  r  
scan_numelc                    t          |          t                    k    sJ t          |           t                    k    sJ g | d          || d          S r   r   )r   
scan_indexrh  rp  rD  s     rv   r   zScan.create.<locals>.reindexT  g    z??c+&6&66666u::%5!6!66666>U5D5\>J>tuu>>rw   c                    g | ];}t                                d|         |         
	|d           <S ))r   r   rH  r|  r$  r   r~  rD  r  r   r<  r  rx   )rX   r  )r   r  r  r   rH  r$  r   rp  r<  r   rD  	scan_typer   s     rv   r   zScan.create.<locals>.<listcomp>Y  s     
 
 
$ # 	 ! .!&|4'+ +)##1!-    
 
 
rw   )rP   r   r  r5   SCANr   TUPLE_REDUCTIONr   r  rL   r-  rm   Ler   r  rC  rg   versionhip	SplitScanr2  )r  r   rH  r$  r   rh  r  r<  rf  r   r   rq  r  supports_splitr7  rW  rp  r   rD  rw  s    ``````` `      @@@@rv   r  zScan.create  s    =T%4%[<4q

+;<Dzlw""6>+>?? 	(6CKK''v;;??17#6#6N2$
 $
? 6CKK''7#&&}['A'ABB
6{{c)nn,,,, ++EHZ,C,CDD 		       %*#f++$6$6    &)^^)q\-#!! &4 	&
 	&
"
 	>>"].$6K3v;;!;KN! &' # 6CKK//!"JJ%		? 	? 	? 	? 	? 	? 	?

 
 
 
 
 
 
 
 
 
 
 
 
 
$ !&c&kk 2 2%
 
 
*  	 	FNNrw   r   r7  r|  Callable[[List[Expr]], Any]rp  rq  r   c	           
     V    fd}	t                               ||||	||d|          S )Nc                F     g | d          || d                    S r   rx   )r   r#  rh  r|  s     rv   r  z#Scan.num_splits.<locals>.wrapper_fn  s3    8Fc%4%jF=F3tuu:FGGGrw   r  )r   r  r;  r|  r~  r:  r  r  )r9  r  )
r  r   r   r|  rh  rp  rD  r  rq  r  s
      ``     rv   r  zScan.num_splitss  s_    	H 	H 	H 	H 	H 	H ###( & $ 	
 	
 		
rw   rw  )r   rz  rH  rG  r$  rg  r   r}  rh  rq   r  rE  r<  r?   rf  r   r]   ri  )r   rz  r   r7  r|  r  rh  rq   rp  r}  rD  r}  r  rE  rq  r   )r  rE  rF  rG  rB  r  rF  r  r  r   r  rI  r  r  r  r?   rP  r  r  r  r  s   @rv   rC  rC    s        MMMM;;;;!!!!####----
 
 
 
 
 
         O O O           8 8 8  A A A  )6(=] &*] ] ] ] ] []~ 
 
 
 [
 
 
 
 
rw   rC  c                      e Zd ZdS )r}  N)r  rE  rF  rx   rw   rv   r}  r}    s        Drw   r}  c                       e Zd ZU ded<   ded<   ded<   ded<   ded	<   d
ed<   ded<   ded<   ded<   d# fdZ fdZd Zd Zd Zd Z	d Z
d Zd Zd Zeej        fd$d"            Z xZS )%Sortr}  sort_rangesr   rF  r   r?   r<  rq   r  rG  rH  rI  r$  r   stable
descendingr]   rn  c                    t                                                       t                      j        d | j        D              z   t                      j        d | j        D              z  S )Nc              3  4   K   | ]}t          |          V  d S r   r  r  s     rv   rh  z0Sort.get_unbacked_symbol_uses.<locals>.<genexpr>  rL  rw   c              3  4   K   | ]}t          |          V  d S r   r  r  s     rv   rh  z0Sort.get_unbacked_symbol_uses.<locals>.<genexpr>  rN  rw   )r  rB  r-   r  r  r   r  s    rv   rB  zSort.get_unbacked_symbol_uses  sl    GG,,.. jll "V"VTEU"V"V"VWX jll "O"OTY"O"O"OPQ	
rw   c                    t          | j                  t          | j                  z   t          | j                  k    sJ t	                                                       d S r   )r   r~  r  r   r  r  r  s    rv   r  zSort.__post_init__  rP  rw   c                    |                      ||          fd| j        D             }t          j        | j        || j        | j                  }t          j        | |          || j                           S )Nc                &    g | ]} |          S rx   rx   rS  s     rv   r   z(Sort.store_reduction.<locals>.<listcomp>  rT  rw   )	r   r$  rN   sortrH  r  r  r  r  )r  r  r  r  	sort_varsrf   rW  r   s          @rv   rF  zSort.store_reduction  sp    ll4++???????$+vt{DOLLyggcllF4;L4MNNNrw   c                    dS )Nr  rx   r  s    rv   r  zSort.get_reduction_type  s    vrw   c                    | j         S r   )r  r  s    rv   r  zSort.get_reduction_size  r  rw   c                    | j         S r   r\  r  s    rv   r   zSort.get_size  r]  rw   c                    | j         S r   r  r  s    rv   r  zSort.get_pointwise_size  r  rw   c                T    t          | j                  t          | j                  z   S r   )r   r~  r  r  s    rv   rI  zSort.index_length  r`  rw   c                    |                      | j                  }|                      | j        t          j                  }|                     ||          }|fS r   )r  r~  r  r1   rK  r   rb  s       rv   r  zSort.inner_fn_args  rc  rw   c                    |                      | j                  }|                      | j        t          j                  }|                     ||          }t          | j        |          S r   )r  r~  r  r1   rK  r   r7   r|  rb  s       rv   r  z#Sort.inner_fn_free_unbacked_symbols  re  rw   r   rz  rg  rh  ri  c	                p  	 g d          dz   d                   gt           j                            t          j                  sd gt                    z  S t           j        j        }
|
                    t                              }d}t          j
        j        o'|
                    t          j        ||                    }|sd gt                    z  S t                    t                    k    sJ |
                    t          j        |d                    r+fdt          t                              D             S fd	fdt          t                              D             }|D ]}|                                 |S )Nr2   rX  c                d    g | ],}t                               |         |                    -S rl  rm  rn  s     rv   r   zSort.create.<locals>.<listcomp>  ro  rw   c                    t          |          t                    k    sJ t          |           t                    k    sJ g | d          || d          S r   rs  )r   
sort_indexrh  rp  r  s     rv   r   zSort.create.<locals>.reindex  ru  rw   c                    g | ]@}t                               t          d|         |         
	|d           AS ))r   r   rH  r|  r$  r   r~  r  r   r<  r  r  r  rx   )rX   r  r  )r   r  r  r   rH  r$  r   rp  r<  r   r   r  r  s     rv   r   zSort.create.<locals>.<listcomp>  s     
 
 
& %  ! .!&|4'+ +##1!-!)    
 
 
rw   )rP   r   r  r5   SORTr   r   r  rL   r3   tritonpersistent_reductionsr-  rm   rz  r   r2  )r  r   rH  r$  r   rh  r  r  r<  r   r   
sort_numel
max_rblockis_persistent_kernelr7  rW  rp  r   r  s    `````````      @@@rv   r  zSort.create  s*    =T%4%[<4q

+;<Dzlw""6>+>?? 	(6CKK''7#&&}['A'ABB
 
M/ S00*j1Q1QRR 	 $ 	(6CKK''6{{c)nn,,,, ++EHZ,C,CDD 		       %*#f++$6$6   	? 	? 	? 	? 	? 	? 	?

 
 
 
 
 
 
 
 
 
 
 
 
 
& !&c&kk 2 2'
 
 
,  	 	FNNrw   rw  )r   rz  rH  rG  r$  rg  r   r}  rh  rq   r  r   r  r   r<  r?   r]   ri  )r  rE  rF  rG  rB  r  rF  r  r  r   r  rI  r  r  r  r?   rP  r  r  r  s   @rv   r  r    sw         ;;;;!!!!####----LLL
 
 
 
 
 
         O O O           8 8 8  A A A  )6(=L L L L [L L L L Lrw   r  c                L    	 t          | d           dS # t          $ r Y dS w xY w)NFfreezeT)as_storage_and_layoutr  r   s    rv   r   r   "  sA    a....t   uus    
##c                    	 t          | d          \  }}|                                r|                                 |                                S # t          $ r Y dS w xY wNFr  )r  should_pad_stridespad_stridesis_contiguousr  )r   bufferr  s      rv    is_contiguous_storage_and_layoutr  *  sy    .q??? $$&& 	!   ##%%%   uus   AA 
A A Fr  want_contiguousstride_order'Optional[Sequence[Union[int, Integer]]]allow_paddingexact_stridesTuple[StorageBox, Layout]c                   t          | t                    rt          | j        |||||          S t          | t                    rt          | j        t
                    r|r|r:| j                                         | j        j                                        sJ nW|| j        	                    ||           n8|| j        
                    ||           n| j                                         | | j        j        fS t          | t                    r"t          | j        |          \  }}|| j        fS t          )z
    Try to simplify x into a StorageBox and a Layout.

    allow_padding only affect how we apply stride_order. When allow_padding
    is True, we have the freedom to add padding when applying the stride_order.
    r  r  r  r  r  Nr  r  )rb   rX   r  r  
StorageBoxBufferfreeze_layoutr  r  freeze_layout_with_stride_order freeze_layout_with_exact_stridesr  ReinterpretViewr  )r   r  r  r  r  r  r  r   s           rv   r  r  6  sy    !Y 
$F+%''
 
 
 	
 !Z    Z%?%?   	' '$$&&&v}22444444)66  7     *77! 8     $$&&&!&-!_%%   *F
 
 
	 qx
rw   )r  c                x    	 t          | d          \  }}|                    |          S # t          $ r Y dS w xY wr  )r  is_stride_orderedr  )r   r  r  r  s       rv   "is_stride_order_storage_and_layoutr  m  sS    .q???''555   uus   (+ 
99c                      e Zd ZU ded<   d Zd Zd Zd Zed             Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd ZddZd Zd Zd ZdS )BaseViewrY   r  c                4    | j                                         S r   r  rB  r  s    rv   rB  z!BaseView.get_unbacked_symbol_uses{      y11333rw   c                &    t          d|            )Nzmake_reindexer NYI on r  r  s    rv   make_reindexerzBaseView.make_reindexer~  s    !"A4"A"ABBBrw   c                p    | j                                         |                                 fd}|S )Nc                ,      |                     S r   rx   r   innerr   s    rv   r  z&BaseView.make_indexer.<locals>.indexer      5&&&rw   )r  r?  r  )r  r  r  r   s     @@rv   r?  zBaseView.make_indexer  sM    	&&((%%''	' 	' 	' 	' 	' 	' rw   c                p    | j                                         |                                 fd}|S )Nc                ,      |                     S r   rx   r  s    rv   r  z$BaseView.make_loader.<locals>.loader  r  rw   )r  r>  r  )r  r  r  r   s     @@rv   r>  zBaseView.make_loader  sM    	%%''%%''	' 	' 	' 	' 	' 	' rw   c                    | j         j        S r   r  r   r  s    rv   r   zBaseView.dtype      yrw   c                4    | j                                         S r   r  r   r  s    rv   r   zBaseView.get_layout      y##%%%rw   c                4    | j                                         S r   )r  r   r  s    rv   r   zBaseView.get_device  r  rw   c                    d S r   rx   r  s    rv   rU  zBaseView.get_origin_node  r  rw   c                4    | j                                         S r   r  r8  r  s    rv   r8  zBaseView.get_name      y!!###rw   c                *    |                                  S r   r&  r  s    rv   r  zBaseView.get_pointwise_size      }}rw   c                6    | j                             |          S r   )r  r@  r  userss     rv   r@  zBaseView.mark_reuse  s    y##E***rw   c                4    | j                                         S r   )r  r=  r  s    rv   r=  zBaseView.has_exceeded_max_reads  s    y//111rw   c                4    | j                                         S r   r  r2  r  s    rv   r2  zBaseView.realize      y  """rw   c                4    | j                                         S r   )r  rA  r  s    rv   rA  zBaseView.realize_hint  s    y%%'''rw   c                4    | j                                         S r   )r  r<  r  s    rv   r<  zBaseView.get_storage_numel  s    y**,,,rw   c                4    | j                                         S r   )r  r\  r  s    rv   r\  zBaseView.is_extern      y""$$$rw   c                4    | j                                         S r   )r  is_module_bufferr  s    rv   r  zBaseView.is_module_buffer      y))+++rw   r]   r  c                4    | j                                         S r   r  r  r  s    rv   r  zBaseView.get_read_names      y'')))rw   c                    t          j        t          dd          5  t          |                                 |                                           j        cd d d            S # 1 swxY w Y   d S r  )r   r   r   r9   r>  r   rj  r  s    rv   r9  zBaseView.get_reads  s    \.*:DAA 	 	&  ""  		 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   9A""A&)A&c                l    | }t          |t                    r|j        }t          |t                    |S r   )rb   r  r  )r  r   s     rv   unwrap_viewzBaseView.unwrap_view  s9    H%% 	A H%% 	rw   c                    |                                  } t          j        t          d|          |          }t	          ||                                 ||                                           S r  )r>  r   r   r  r  r   r   r  s      rv   r  zBaseView.constant_to_device  sW    !!##Hn.?HHPP!1!164==??KKKrw   NrC  )r  rE  rF  rG  rB  r  r?  r>  rK  r   r   r   rU  r8  r  r@  r=  r2  rA  r<  r\  r  r  r9  r  r  rx   rw   rv   r  r  w  ss        LLL4 4 4C C C       X& & && & &  $ $ $  + + +2 2 2# # #( ( (- - -% % %, , ,* * * *    L L L L Lrw   r  c                  R    e Zd ZU ded<   ed             Zed             Zd Zd Z	dS )rj   r}  r   c                \   t           j        j        }t          t	          t
          j        |                    }|                                 }dgt          |          t          |          z
  z  t          |          z   }t          |          t          |          k    sJ t          t          |                    D ]m}||         dk    r||         J ||         ||<   $||         ||         dk    r9|
                    ||         ||         z
  d          dk    s
J d            n|S )zReplace `-1` with correct sizesNr  r2   r   r.  zKBroadcast failed in ExpandView({x.get_size()}, {new_size}) on dimension {i})rP   r   r   rc   r  rm   r  r   r   r   r   )r   new_sizer   old_sizer   s        rv   _normalize_sizezExpandView._normalize_size  s)    7#EL(3344::<<6S]]S]]:;d8nnL8}}H----s8}}%% 	a 	aA{b  {...&qk!$q(8(8 &&x{Xa['@1&MMQRRRR` SRRRrw   c                4   |                      ||          }t          |          rt          |          \  }}t          |          t          |j                  z
  }|dk    sJ t          j        d          g|z  }t          |j        |j                  D ]4\  }}|	                    |dk    r|nt          j        d                     5t          |j        |j        t          |          ||j                  }	t          ||	          S t!          ||          S Nr   r2   )r  r   r  r   r   rm   r   r   r   r  r  r   r   rc   offsetr  rj   )
r  r   r  storage
old_layoutskip
new_strider   r   
new_layouts
             rv   r  zExpandView.create  s   &&q(33 ## 	8"7":":GZx==3z#7#77D19999-**+d2J #J$5z G G M M!!DAII&&5=;K;KLLLL$! X! J #7J777!X&&&rw   c                    | j         S r   r\  r  s    rv   r   zExpandView.get_size	  r]  rw   c                    |                                  }| j                                         t          |          t                    z
  fd}|S )Nc                    t          | d                    } t          |           t                    k    sJ t          t                              D ]%}|         dk    rt          j        d          | |<   &| S Nr2   r   )rc   r   r   rm   r   )r   r   actualr  s     rv   r   z*ExpandView.make_reindexer.<locals>.reindex	  sx    tuu&&Eu::V,,,,3v;;'' 0 0!9>>$}Q//E!HLrw   )r   r  r   )r  targetr   r   r  s      @@rv   r  zExpandView.make_reindexer	  s_    ##%%6{{S[[(	 	 	 	 	 	 rw   N)
r  rE  rF  rG  rH  r  r  r  r   r  rx   rw   rv   rj   rj     st           \0 ' ' ['*      rw   rj   c                  R    e Zd ZU ded<   ed             Zed             Zd Zd ZdS )PermuteViewr}  dimsc                   |                      |          }t          |          t          t          t          |                              k    sJ t	          |          r\t          |          \  }t          j        j        fd|D             fd|D             j	                  }t          ||          S t          ||          S )Nc                *    g | ]}j         |         S rx   r\  r   r   r  s     rv   r   z&PermuteView.create.<locals>.<listcomp>'	  s     222#222rw   c                *    g | ]}j         |         S rx   r   r  s     rv   r   z&PermuteView.create.<locals>.<listcomp>(	  s!    444!"1%444rw   )_map_neg_dimsr-   r   r   r   r  r  r   r   r  r  r  )r  r   r  r  r  r  s        @rv   r  zPermuteView.create	  s      &&$:eCII.>.>#?#????? ## 		8"7":":GZ$! 2222T2224444t444! J #7J7771d###rw   c                     fdD             S )Nc                D    g | ]}|d k    r|nt                    |z   S r   rs  )r   r2  r  s     rv   r   z-PermuteView._map_neg_dims.<locals>.<listcomp>1	  s/    EEEsaxxSYY_EEErw   rx   )r  r  s    `rv   r
  zPermuteView._map_neg_dims/	  s    EEEEEEEErw   c                   t          |                     | j                            t          t          t	          | j                                      k    sJ | j                                        fd| j        D             S )Nc                     g | ]
}|         S rx   rx   )r   r   r   s     rv   r   z(PermuteView.get_size.<locals>.<listcomp>8	  s    +++AQ+++rw   )r-   r
  r  r   r   r  r   )r  r   s    @rv   r   zPermuteView.get_size3	  s    $,,TY7788J#di..!!=
 =
 
 
 
 
 y!!##++++++++rw   c                *   d t          | j                  D             fdt          t          | j                            D             t	                    t	          t          t          | j                                      k    sJ fd}|S )Nc                    i | ]\  }}||	S rx   rx   )r   r   js      rv   r   z.PermuteView.make_reindexer.<locals>.<dictcomp>;	  s    5551q!555rw   c                     g | ]
}|         S rx   rx   )r   r   invs     rv   r   z.PermuteView.make_reindexer.<locals>.<listcomp><	  s    555!s1v555rw   c                "      fdD             S )Nc                     g | ]
}|         S rx   rx   )r   r   r   s     rv   r   z?PermuteView.make_reindexer.<locals>.reindex.<locals>.<listcomp>@	  s    ***E!H***rw   rx   )r   r  s   `rv   r   z+PermuteView.make_reindexer.<locals>.reindex?	  s    ****c****rw   )r   r  r   r   r-   )r  r   r  s     @rv   r  zPermuteView.make_reindexer:	  s    55	$) 4 45555555uS^^44555#*U3ty>>-B-B"C"CCCCC	+ 	+ 	+ 	+ 	+ rw   N)	r  rE  rF  rG  r  r  r
  r   r  rx   rw   rv   r  r  	  sw         $ $ [$" F F [F, , ,    rw   r  c                  H    e Zd Zeddd            Zedd            Zd ZdS )	SqueezeViewN)r2  c                  t          |          r-t          |          \  }}g }g }?t          t                    s
J d            dk    rt	          |j                  k     sJ t          t          |j        |j                            D ]|\  }\  }}	1|dk    r*|	                    |           |	                    |	           ;|k    r+|	                    |           |	                    |	           l|dk    s
J d            }t          |j        |j        |||j                  }
t          ||
          S 7t                              |d |                                D                       S |                                         dk    sJ t                              |fdt          |                                          D                       S )Nzexpected integer dim argumentr   r2   zexpected squeezed size to be 1c                    g | ]
}|d k    |S r2   rx   r  s     rv   r   z&SqueezeView.create.<locals>.<listcomp>g	  s    "E"E"Ea1ff1fffrw   c                &    g | ]\  }}|k    |S rx   rx   )r   r   r   r2  s      rv   r   z&SqueezeView.create.<locals>.<listcomp>j	  s#    "U"U"UAAQTHH1HHHrw   )r   r  rb   rq   r   r   r   r   r   r  r  r   r   r  r  r  r  r   )r  r   r2  r  r  r  r  r   r   r   r  s     `        rv   r  zSqueezeView.createF	  s    ## 	8"7":":GZHJ!#s++LL-LLLLCxxC#jo*>*>$>$>$>$>%.s:?JDU/V/V%W%W 
K 
K!>D&;qyy ---"))&111Cxx ---"))&1111#qyyy*Jyyyy$! ! J #7J777;;;q"E"Eajjll"E"E"EFFF::<<$))));;q"U"U"U"U1::<<1H1H"U"U"UVVVrw   r   Tuple[sympy.Expr, ...]c                    d | D             }d t          |           D             t          |           dfd}||fS )	Nc                    g | ]
}|d k    |S r  rx   r  s     rv   r   z(SqueezeView.squeezer.<locals>.<listcomp>n	  s    ...!qAvvAvvvrw   c                $    g | ]\  }}|d k    |S r  rx   )r   r   r   s      rv   r   z(SqueezeView.squeezer.<locals>.<listcomp>o	  s!    ;;;AAFF1FFFrw   r   List[sympy.Expr]r]   r  c                    t          |           t                    k    sJ |  d             t          j        d          gz  }t          |           D ]
\  }}|||<   t	          |          S )N r   )r   rm   r   r   rd   )r   r  r   r   lengthnot_ones       rv   r   z%SqueezeView.squeezer.<locals>.reindexr	  s    u::W---%/C/C'/C/C---q))*V3Igu-- # #Q!"	####rw   )r   r   r]   r  )r   r   )r   r  r   r#  r$  s      @@rv   squeezerzSqueezeView.squeezerl	  sk    ..t...;;4;;;T	$ 	$ 	$ 	$ 	$ 	$ 	$   rw   c                     t          d          )Nzuse SqueezeView.create())AssertionError)r  r  s     rv   r%  zSqueezeView.__init__{	  s    7888rw   )r   r  )r  rE  rF  r  r  rH  r%  r%  rx   rw   rv   r  r  E	  so        " #W #W #W #W [#WJ ! ! ! \!9 9 9 9 9rw   r  c                  X    e Zd ZU ded<   ded<   d Zd Zdd	ZeZed
             Z	d Z
dS )GenericViewr}  r   r{  r   c                    | j         S r   )r   r  s    rv   r  zGenericView.make_reindexer	  
    |rw   c                    d t          t          | j                            D             }t          |                     |                    }dd                    t          t          |                     d| S )Nc                B    g | ]}t          t          j        |          S rx   )rK   r1   r  )r   r  s     rv   r   z+GenericView.reindex_str.<locals>.<listcomp>	  s2     
 
 
>?*4:q99
 
 
rw   zlambda , : )r   r   r   rc   r   r  r  r{   )r  	index_old	index_news      rv   reindex_strzGenericView.reindex_str	  sw    
 
CHTYCXCX
 
 
	 i0011	F3sI#6#677FF9FFFrw   r]   r{   c                v    |                      | j        d| j         d|                                  g          S )Nsize=zreindex=)r  r  r   r2  r  s    rv   r  zGenericView.__str__	  sD    Y+	++-L8H8H8J8J-L-LM
 
 	
rw   c                6     | |t          |          |          S r   )rc   )r  r   r  r   s       rv   r  zGenericView.create	  s    s1d8nng...rw   c                    | j         S r   r\  r  s    rv   r   zGenericView.get_size	  r]  rw   Nru  )r  rE  rF  rG  r  r2  r  r  r  r  r   rx   rw   rv   r)  r)  	  s           G G G
 
 
 

 H/ / [/    rw   r)  c                  |    e Zd Zed             Zed             Zed             Zed             Zed             Z	dS )r  c                    t          j        |           } t          j        |          }t          j        j        j        j        } |t          j        | d                    r| |z   } | S r+  )rm   r  rP   r   r   r   evaluate_exprLt)r   r   r9  s      rv   handle_negative_indexzView.handle_negative_index	  s\    l3|D!!(2@=#q))** 	*C
rw   c                B  	 t          |t          t          f          sJ |                     |                                |          \  	}t
          j        j                            	|          r|S d}t          t          	                    dk    s t          t          |                    dk    rd}d|v r	fd} | |t          |          |          S t          |          s|r|r)t          |          st                              |          }t          |          \  }}t          |j        |j        |t$                              |          |j                  }t+          ||          S |                     	|          } | |t          |          |          S )NFr   Tc                D    t          dgt                    z            S r+  )rd   r   )r   r  s    rv   fake_reindexz!View.create.<locals>.fake_reindex	  s    aS3x==0111rw   )rb   rd   rc   resolve_negative_sizer   rP   r   r   statically_known_list_equalsr   r(   r  ExternKernelrealize_input as_contiguous_storage_and_layoutr  r   r   r   r   r  r  r  )
r  r   r  unbacked_symbols_in_sizesr>  r  r  r  r   r  s
            @rv   r  zView.create	  s   (UDM22222 66qzz||XNN( 7888LL 	H$)!%h//00144(2233a77(,%==2 2 2 2 2 3q$x..,777-a00 	84M 	8( 22RST2U2U 2 !..q11"B1"E"EGZ$! 11(;;! J #7J777--hAAs1d8nng...rw   c                   d |D             }d | D             } t          |          }t          t          |                    D ]T}||         dk    rFt          j        d          ||<   t          t          |           t          |                    ||<    nUt          j        j	        
                    t          |           t          |                     | |fS )Nc                V    g | ]&}t           j        j                            |          'S rx   rP   r   r   r  r  s     rv   r   z.View.resolve_negative_size.<locals>.<listcomp>	  +    CCCQAG$--a00CCCrw   c                V    g | ]&}t           j        j                            |          'S rx   rG  r  s     rv   r   z.View.resolve_negative_size.<locals>.<listcomp>	  rH  rw   r  r2   )rc   r   r   rm   r   r.   rL   rP   r   r   guard_equals)r  r  r   s      rv   r?  zView.resolve_negative_size	  s    CC(CCCCC(CCC>>s8}}%% 	 	A{b  #mA..&}X'>'>h@W@WXX !
 	
%%mH&=&=}X?V?VWWW!!rw   c                    	 |                      ||          }nc# t          t          f$ rO t          |          g}|                      ||          }|                      ||          }t	          ||          }Y nw xY w|S r   )_dynamic_reshape_indexerr'  
IndexErrorrL   r   )r  r  r  r   flatr   r   s          rv   r  zView.dynamic_reshape_indexer	  s    	:228XFFGG
+ 	: 	: 	:!(++,D33HdCCH33D(CCH%h99GGG	: s    AA98A9c                   t           j        j        j        }d t	          t          |                    D             t          t          |                    }t          |           }g |r|r|                                }|                                \  }}|dk    r@	                    t          j        d                     |	                    ||f           n|dk    r|	                    |           n ||           ||          k    r<	                    |           t           j        j                            ||           n ||           ||          k     r ||           ||          k     r<|                                \  }}	||z  |z   }||	z  } ||           ||          k     <	                    |           t           j        j                            ||           n ||           ||          k    rt          j        d          }
|}	                    t          ||
|                     |
|z  }
 ||           ||          k    rZ|                                }	                    t          ||
|                     |
|z  }
||z  } ||           ||          k    Zt           j        j                            ||           nt          |r||rb|                                }t           j        j                            |d           	                    t          j        d                     |b|r>|                                \  }}t           j        j                            |d           |>                                 t                    t          |           k    sJ fd}|S )zG
        Perform a reshape entirely by modifying indexing math
        c                B    g | ]}t          t          j        |          S rx   )rK   r1   VIEWr  s     rv   r   z1View._dynamic_reshape_indexer.<locals>.<listcomp>	  s2     
 
 
=>*49a88
 
 
rw   r2   r   c                   t          |           t                    k    s&J t          |           t                    f            t          t          |                     t          fdD                       S )Nc              3  8   K   | ]}t          |          V  d S r   )rM   )r   r   replacementss     rv   rh  zAView._dynamic_reshape_indexer.<locals>.reindex.<locals>.<genexpr>,
  s-      HHA|44HHHHHHrw   )r   re   r   rd   )r   rT  r  	view_exprs    @rv   r   z.View._dynamic_reshape_indexer.<locals>.reindex)
  st    u::T***SZZT,C***D% 0 011LHHHHiHHHHHHrw   )rP   r   r   r   r   r   rc   r   r  r  rm   r   rJ  r0   r'  reverse)r  r  r   	stack_new	stack_oldsize_oldvarsize_newvar2	size_new2divisormodulusr   r  rU  s                @@rv   rL  zView._dynamic_reshape_indexer	  s   
 G$.	
 
BGHBVBV
 
 
 T8,,--	NN		 	%I 	% }}H%MMOOMC1}}  q!1!1222  #x1111Q  ****8$$		((;(;;;  %%% --hAAAA8$$yy':':::i))IIh,?,???&/mmooOD)/C/C')3H  i))IIh,?,???   %%% --hAAAA8$$yy':':::-**"  gw!G!GHHH!G+i))IIh,?,???'mmooG$$_S'7%K%KLLL%/G''1H	  i))IIh,?,???
  --hAAAA$$=  	%I 	%@  	/ }}HG))(A666U]1--...  	/
  	7%MMOOMCG))(A666  	7 	9~~X....	I 	I 	I 	I 	I 	I
 rw   N)
r  rE  rF  rH  r;  r  r  r?  r  rL  rx   rw   rv   r  r  	  s          \ '/ '/ ['/R " " \" 	 	 [	 @ @ \@ @ @rw   r  c                       e Zd ZU dZded<    fdZddZeZd Zd	 Z	d
 Z
ed             Zd Zd Zd Zd Zd Zd ZddZddZd Z xZS )r  z*Pretend our storage has a different layoutLayoutr  c                    t                                                       t          | j        t                    r | j                                        | _        d S d S r   )r  r  rb   r  r  r  r  s    rv   r  zReinterpretView.__post_init__7
  sP    di** 	0	--//DIII	0 	0rw   r]   r{   c                D    |                      | j        | j        g          S r   )r  r  r  r  s    rv   r  zReinterpretView.__str__<
  s'    	
 
 	
rw   c                4    | j                                         S r   r  r  s    rv   r8  zReinterpretView.get_nameF
  r  rw   c                    | j         j        S r   r  r   r  s    rv   r   zReinterpretView.get_deviceI
      {!!rw   c                    d S r   rx   r  s    rv   rU  zReinterpretView.get_origin_nodeL
  r  rw   c                    | j         j        S r   )r  r   r  s    rv   r   zReinterpretView.dtypeO
  s    {  rw   c                4    t          | j        j                  S r   rc   r  r   r  s    rv   r   zReinterpretView.get_sizeS
      DK$%%%rw   c                4    t          | j        j                  S r   rc   r  r   r  s    rv   r;  zReinterpretView.get_strideV
      DK&'''rw   c                      fd}|S )Nc                   j                                         }t          j                                         ||                     }j         j        j        j        k    r%t          j        |j        j        j                  S |S r   )r  r?  rN   loadr8  r   r  to_dtype_bitcast)r   r  
tmp_loaderr  s      rv   r  z+ReinterpretView.make_loader.<locals>.loaderZ
  sl    k..00G$--//775>>BBJ{ DIO33+J
DIOTTT!!rw   rx   r  r  s   ` rv   r>  zReinterpretView.make_loaderY
  s#    	" 	" 	" 	" 	" rw   c                4    | j                                         S r   r  r?  r  s    rv   r?  zReinterpretView.make_indexerd
      {'')))rw   c                    | j         S r   r  r  s    rv   r   zReinterpretView.get_layoutg
  r  rw   c                    d S r   rx   r  s    rv   r  zReinterpretView.freeze_layoutj
      rw   rn  c                    t          | j        j                  t          | j        j                  z  t          | j        j                  z  S r   )r(   r  r   r   r  r  s    rv   rB  z(ReinterpretView.get_unbacked_symbol_usesm
  sA    !$+"233#DK$6778#DK$6778	
rw   Nc                    t           j        j                            | j        | j        j        | j        j        | j        j        || j        j	                  S r  )
rP   r   wrapper_codecodegen_reinterpret_viewr  r  r   r   r  r   r4  s     rv   r6  z!ReinterpretView.codegen_referencet
  sN     w#<<IKKK+# = 
 
 	
rw   c                    dS r  rx   r  s    rv   r:  zReinterpretView.num_reads
      qrw   ru  rw  r   )r  rE  rF  __doc__rG  r  r  r  r8  r   rU  rK  r   r   r;  r>  r?  r   r  rB  r6  r:  r  r  s   @rv   r  r  1
  s=        44NNN0 0 0 0 0

 
 
 
 H$ $ $" " "   ! ! X!& & &( ( (	 	 	* * *    
 
 
 

 
 
 
      rw   r  c                  b    e Zd ZU dZded<   ed             ZddZeZe	d             Z
d	 Zd
 ZdS )	DtypeViewz(Pretend our storage has a different typer7  target_dtypec                    t          |          rIt          |          \  }}t          |j        ||j        |j        |j                  }t          ||          S t          ||          S r   )	r   r  r  r   r   r   r  r  r  )r  r   	new_dtyper  r  r  s         rv   r  zDtypeView.create
  so     ## 		8"7":":GZ$!!! J #7J777I&&&rw   r]   r{   c                D    |                      | j        | j        g          S r   )r  r  r  r  s    rv   r  zDtypeView.__str__
  s    	4+<=>>>rw   c                    | j         S r   )r  r  s    rv   r   zDtypeView.dtype
  s      rw   c                4    | j                                         S r   r  r   r  s    rv   r   zDtypeView.get_size
  r  rw   c                H      j                                          fd}|S )Nc                `    t          j         |           j        j        j                  S r   )rN   rs  r  r  r   )r   r  r  s    rv   r  z%DtypeView.make_loader.<locals>.loader
  s'    'c

D4EtyWWWrw   )r  r>  )r  r  r  s   ` @rv   r>  zDtypeView.make_loader
  sB    	%%''	X 	X 	X 	X 	X 	X rw   Nru  )r  rE  rF  r  rG  r  r  r  r  rK  r   r   r>  rx   rw   rv   r  r  
  s         22' ' ['? ? ? ? H! ! X!$ $ $    rw   r  c                  <    e Zd Zed             Zedd            ZdS )	SliceViewc                     t           j        j        |                                |         t	          d ||fD                       rd nfd fd} ||dd          } |||          }||fS )zz
        Normalize start and end such that both are in the range
        [0, x.get_size()[dim]] and start <= end.
        c              3  4   K   | ]}t          |          V  d S r   r  r  s     rv   rh  z0SliceView.normalize_start_end.<locals>.<genexpr>
  s+      HHA$Q''HHHHHHrw   c                R    t          j        t          j        | |          |          S r   )rm   MinMax)r   loweruppers      rv   clampz,SliceView.normalize_start_end.<locals>.clamp
  s     y1e!4!4e<<<rw   c                X                                             | |          |          S r   )evaluate_minevaluate_max)r   r  r  r   s      rv   r  z,SliceView.normalize_start_end.<locals>.clamp
  s)    ,,X-B-B1e-L-LeTTTrw   c                R    | |S                      |           }  | ||          S r   )r;  )r  r  r  r  r  r  dim_sizes       rv   
clamp_wrapz1SliceView.normalize_start_end.<locals>.clamp_wrap
  s6    {++C::C5eU+++rw   r   )rP   r   r   r   r  )	r  r   r2  startendr  r  r  r   s	   `     @@@rv   normalize_start_endzSliceView.normalize_start_end
  s     7#::<<$HH%h1GHHHHH 	U= = = =
U U U U U	, 	, 	, 	, 	, 	, 	, 
5!Xq11jeXx88czrw   r2   Tc           	        t          j                  t          t           j                  sdk    sJ 	 dk    r|dk    rdk    r|S n# t          $ r Y nw xY wt
          j        j        }t          |	                                          |r| 
                    ||          \  }t          |z
  dz
  z             <   t          |          rwt          |          \  }}	t          |	j                  }
|
         z  |
<   t          |	j        |	j        |
|	j        |	j                 z  z             }t'          ||          S fd}t)          ||          S )Nr   l    r2   c                    t          |           t                    k    sJ d|  d             t          |           } |          z  z   | <   | S )Nzwrong ndim r"  )r   rc   )r   r2  r  r  steps    rv   r   z!SliceView.create.<locals>.reindex
  s`    u::X...0Pe0P0Ph0P0P...KKEsd*U2E#JLrw   )r   r   )rm   r  rb   r   	TypeErrorrP   r   r   rc   r   r  r/   r   r  r   r  r   r   r  r  r  )r  r   r2  r  r  r  r  r   r  r  r  r  r   r  s     `` `       @rv   r  zSliceView.create
  s   |D!!$
++7taxxxx	zzcY..4199 	 	 	D	 7#

%%
  	E00CDDJE3 uq!94@@ ## 	8"7":":GZj/00J(o4JsO$! !J$5c$:U$BB J #7J777	 	 	 	 	 	 	 	 7;;;;s   A 
AAN)r2   T)r  rE  rF  r  r  r  rx   rw   rv   r  r  
  sM          [8 )< )< )< [)< )< )<rw   r  c                  N    e Zd ZU ded<   ded<   d Zd Zd Zd Zd	 Zd
 Z	d Z
dS )BaseConstantr7  r   rz  r   c                    dS Nrx   rx   r  s    rv   r   zBaseConstant.get_size
      rrw   c                    | j         S r   r  r  s    rv   r   zBaseConstant.get_device   r  rw   c                    d S r   rx   r  s    rv   rU  zBaseConstant.get_origin_node  r  rw   c                    d S r   rx   r  s     rv   r@  zBaseConstant.mark_reuse  r|  rw   c                    dS r[  rx   r  s    rv   r=  z#BaseConstant.has_exceeded_max_reads	  r]  rw   c                    dS r  rx   r  s    rv   r9  zBaseConstant.get_reads  r  rw   c                    dS r[  rx   r  s    rv   r\  zBaseConstant.is_extern  r]  rw   N)r  rE  rF  rG  r   r   rU  r@  r=  r9  r\  rx   rw   rv   r  r  
  s                         rw   r  c                  @    e Zd ZU ded<   ded<   ded<   d Zd Zd	 Zd
S )Constantr   r   r7  r   rz  r   c                      fd}|S )Nc                B    t          j        j        j                  S r   )rN   r  r   r   r   r  s    rv   r  z$Constant.make_loader.<locals>.loader  s    <
DJ777rw   rx   ru  s   ` rv   r>  zConstant.make_loader  s#    	8 	8 	8 	8 	8 rw   c                    d S r   rx   r  s    rv   r2  zConstant.realize  r|  rw   c                8    t          | j        | j        |          S r   )r  r   r   r  s     rv   r  zConstant.constant_to_device"  s    
DJ777rw   N)r  rE  rF  rG  r>  r2  r  rx   rw   rv   r  r    sd         JJJ    8 8 8 8 8rw   r  c                  :    e Zd ZU ded<   ded<   ded<   d Zd Zd	S )
IndexingConstantr   r   r7  r   rz  r   c                      fd}|S )Nc                B    t          j        j        j                  S r   )rN   r  r   r   r  s    rv   r  z,IndexingConstant.make_loader.<locals>.loader-  s    >$*dj999rw   rx   ru  s   ` rv   r>  zIndexingConstant.make_loader,  s#    	: 	: 	: 	: 	: rw   c                8    t          | j        | j        |          S r   )r  r   r   r  s     rv   r  z#IndexingConstant.constant_to_device2  s    
DJ???rw   N)r  rE  rF  rG  r>  r  rx   rw   rv   r  r  &  sZ         JJJ  @ @ @ @ @rw   r  r   r'  c           	         t          d t          | t                              |          |          D                       S )Nc              3  6   K   | ]\  }}}|d k    p||k    V  dS r  rx   )r   leftrightr   s       rv   rh  z2is_contiguous_strides_for_shape.<locals>.<genexpr>9  sI        D% 		"TU]     rw   )r  r   r   r   )r   r'  s     rv   is_contiguous_strides_for_shaper  6  sQ       !$N55e<<e"
 "
     rw   rq   c                *    t           j        | j        z  S r   )r3   padding_alignment_bytesitemsizer!  s    rv   get_align_for_dtyper  A  s    )U^;;rw   c                      e Zd Z ed          fd dZed             Zd!dZeZd Z	e
d             Zd Zd Zd Ze
d             Zd Zd Zd Zd Zd"dZd#dZdS )$ra  r   r   rz  r   r7  r   r}  r   $Optional[Sequence[Union[Expr, int]]]r  r   c                    |0t          |          t          |          k    sJ d| d|             || _        || _        t          d |D                       sJ || _        || _        || _        d S )Nr4  	, stride=c              3  N   K   | ] }t          |t          t          f          V  !d S r   )rb   r   rq   r  s     rv   rh  z"Layout.__init__.<locals>.<genexpr>T  s0      <<!:a$--<<<<<<rw   )r   r   r   r  r   _strider  )r  r   r   r   r   r  s         rv   r%  zLayout.__init__G  s     ~Tc/
 /
 "
 "
 "
*4**&**"
 "
 "
 
<<t<<<<<<<<	rw   c                    | j         S r   )r  r  s    rv   r   zLayout.strideY  s
    |rw   r]   r{   c                    d}| j         dk    r
d| j          }t          |           j         d| j        j         d| j         d| j         d| j         | dS )	Nr  r   z	, offset=z('z', z, size=r  r  )r  rs   r  r   r   r   r   )r  r  s     rv   r  zLayout.__str__]  s    ;!...FDzz" ? ?dk&6 ? ?4: ? ?I? ?(,?5;? ? ?	
rw   c                6    t          | j        | j                  S r   )r  r   r   r  s    rv   r  zLayout.is_contiguoush  s    .t{DIFFFrw   c                    t          |           }|dvs| d         dk    rdS t          |t          |           |           D ]\  }}}|dk    r	||k    r dS dS )N)r      r2   FT)r   r   r"   )r'  r  ndimr  r  r   s         rv   is_channels_last_contiguousz"Layout.is_channels_last_contiguousk  sy    5zzvqQ5!$3E::E"
 "
 	 	D% qyyTU]]uutrw   c                    t          | j        t          t                              t          t          | j                                                | j                  D ]\  }}}|dk    r	||k    r dS dS )Nr2   FT)r   r   reversedr   r   rc   r   )r  r  r  r   s       rv   is_transposedzLayout.is_transposedw  sw    !$K^66tHTY<O<O7P7PQQRRI"
 "
 	 	D%
 qyyTU]]uutrw   c                &    t           j                  t                    k    sJ d t           j                  D             } fd|D             }fd|D             d } |          dgt                    z  }t	          t                              D ]5}t
          j        j                            ||                   ||         <   6t	          t                    dz
            D ]}||         ||dz            k    r dS dS )	Nc                l    g | ]1\  }}t           j        j                            |d           dk    /|2S r-  r0  r1  s      rv   r   z,Layout.is_stride_ordered.<locals>.<listcomp>  sI     
 
 
3w))#)::a?? ???rw   c                *    g | ]}j         |         S rx   r	  )r   r   r  s     rv   r   z,Layout.is_stride_ordered.<locals>.<listcomp>  s    888Q$+a.888rw   c                     g | ]
}|         S rx   rx   )r   r   r   s     rv   r   z,Layout.is_stride_ordered.<locals>.<listcomp>  s    111aq111rw   c                >    t          |           fd| D             S )Nc                :    g | ]}                     |          S rx   )r   )r   element
sorted_arrs     rv   r   zDLayout.is_stride_ordered.<locals>.sorted_indices.<locals>.<listcomp>  s'    AAA'J$$W--AAArw   )r  )arrr  s    @rv   sorted_indicesz0Layout.is_stride_ordered.<locals>.sorted_indices  s(    JAAAASAAAArw   r  r2   FT)	r   r   r   r   r   rP   r   r   r   )r  r   r6  r   r  stride_orderedr   s   ``     rv   r  zLayout.is_stride_ordered  sJ   4;3u::----
 
#DI..
 
 
 9888-8881111=111	B 	B 	B
 u%% E

*s5zz"" 	M 	MA'(w'7'A'A&)'L'LN58$$s5zzA~&& 	 	Aa >!a%#888uu 9trw   c                    dgt          t          t          dt          | j                  dz
                                z   }t          |          g|z   }|                     |          S r  )rc   r  r   r   r   r  r  r   s     rv   is_channels_last_stride_orderedz&Layout.is_channels_last_stride_ordered  s]    d8E!S-=-=-A$B$BCCDDDUu$%%e,,,rw   c                ,   t          |          }t          |           dk    r| S t          j        st                              ||           r| S t          j                    }t          |d          r|j	        
                    dd          r| S t          d t          j        | |          D                       s| S t          |           }t          |          }d t!          t          |                     D             }d||d         <   d}t#          |dd         d	          D ]T\  }	}
||	dz
           }||         ||         z  }|t          j        k    r||z  dk    rt'          ||          |z  }d
}|||
<   U|s| S t(          xj        dz  c_        |S )z
        The padding does not change stride order but makes sure all strides larger
        than the threshold are multiple of align.
        r   metadislike_paddingFc              3  X   K   | ]%}t          |t          t          j        f          V  &d S r   rT  r  s     rv   rh  z&Layout._pad_strides.<locals>.<genexpr>  sG       
 
 q3.//
 
 
 
 
 
rw   c                    g | ]}d S r   rx   r   s     rv   r   z'Layout._pad_strides.<locals>.<listcomp>  s    999Qq999rw   r2   N)r  T)r  r   r3   pad_channels_lastra  r  rP   get_current_noderT  r  getr  r  chainr   r   r   r   padding_stride_thresholdrB   r   num_comprehensive_padding)
in_stridesr   r   aligncurrent_fx_noder  r   new_stridespaddedrankr   prev_idxr   s                rv   _pad_strideszLayout._pad_strides  s    $E**z??a' 	F,N,N*-
 -
 	 ,..?F++ 	0D0H0Hu1
 1
 	   
 
_Z66
 
 
 
 
 	 '
33,\::
99%J"8"8999 &'JqM"":abb>;;; 	& 	&ID#!$(+H *T(^;F777FUNa<O<O //%7%K 	 ))Q.))rw   c                    t          | t                    sJ | j        J |                     | j        | j        | j                  | _        d S r   )rb   r   r  r  r   r   r  s    rv   r  zLayout.pad_strides  sI    $/////|'''((ty$*MMrw   c                D    t           j        ot          | t                    S r   )r3   comprehensive_paddingrb   r   r  s    rv   r  zLayout.should_pad_strides  s    +P
40P0PPrw   c                    t          | t                    r| S |                                 r|                                  t          | j        | j        | j        | j        | j                  S r   )	rb   r  r  r  r   r   r   r   r  r  s    rv   as_fixedzLayout.as_fixed  sj    dK(( 	K""$$ 	KJIKK
 
 	
rw   c                    t           j        s J dt          |           j         d            |                                                                 S )Nzconvert z to FixedLayout first)r   r  rs   r  r  r?  r  s    rv   r?  zLayout.make_indexer  sY    )	A 	A@d4jj)@@@	A 	A 	A }}++---rw   r   c                    | j         |j         k    o?| j        |j        k    o/| j        |j        k    o| j        |j        k    o| j        |j        k    S r   r   r   r   r   r  )r  others     rv   __eq__zLayout.__eq__  s[    K5<' ,
ek),	UZ', u|+, u|+	
rw   
sympy.Exprc                B    t          | j        | j        | j                  S r   )r   r   r   r  r  s    rv   storage_sizezLayout.storage_size  s    .ty$+t{SSSrw   N)
r   rz  r   r7  r   r}  r   r  r  r   ru  )r]   r   r]   r  )r  rE  rF  r   r%  rK  r   r  r  r  rH  r  r  r  r  r  r  r  r  r?  r   r  rx   rw   rv   ra  ra  E  sV        wqzz    $   X
 
 
 
 HG G G 	 	 \	    <- - - 8 8 \8tN N N
Q Q Q
 
 
. . .
 
 
 
T T T T T Trw   ra  c                  B     e Zd ZdZd ed          fd fdZd Z xZS )r  z A Tensor layout we cannot changeNr   r   rz  r   r7  r   Union[List[Expr], List[int]]r   r  r  Union[Expr, int]c                    |t                               |          }t                                          |||||           d S r   )r   r   r  r%  )r  r   r   r   r   r  r  s         rv   r%  zFixedLayout.__init__  sR     >#66t<<F	
 	
 	
 	
 	
rw   c                      fd}|S )z1A closure containing math to read a given elementc                   t          |           t          j                  k    sJ t          |           t          j                  k    sJ j        }t	          | j        j                  D ]\  }}}|dk    r|||z  z   }|S r  )r   r   r   r  r   )r   rW  r   r   szr  s        rv   r  z)FixedLayout.make_indexer.<locals>.indexer!  s    u::T[!1!11111u::TY////[F#&udk49#E#E 3 3VR77#cFl2FMrw   rx   r  r  s   ` rv   r?  zFixedLayout.make_indexer  s#    	 	 	 	 	 rw   )
r   rz  r   r7  r   r  r   r  r  r  )r  rE  rF  r  r   r%  r?  r  r  s   @rv   r  r  	  sg        ** 8<#*71::
 
 
 
 
 
 
$      rw   r  c                       e Zd ZdZdZed             Zed             Zed             Zed             Z	ed             Z
ddZdd	Zd
 Zd Zd fd	Z xZS )r   z(A Tensor layout we are allowed to changeFc                    t          |           dk    rg S t          j        d          g}t          | dd                    D ] }|                    ||d         z             !t          t          |                    S )Nr   r2   r  )r   rm   r   r  r  rc   )sizesreversed_stridesr   s      rv   r   z!FlexibleLayout.contiguous_strides3  s    u::??I!M!,,-U122Y'' 	A 	AD##D+;B+?$?@@@@H-..///rw   c                
   t          t          t          |                               t          |          k    sJ | |f            t          j        d          }dgt          |          z  }|D ]}|||<   || |         z  }|S )z
        Create a stride based on the order the dimensions should be filled in.

        In this format, channels last would be:
            [1, 3, 2, 0]
        r2   N)r-   r   r   rm   r   )r  r   next_strider  r   s        rv   fill_orderedzFlexibleLayout.fill_ordered<  s     %E

++,,
50A0AAAAE5>AAAmA&&&3u::% 	1 	1A$GAJ%a0KKrw   c                    t          t          t          |                               t          |          k    sJ t          |          }t                              | |          S )z
        Create a stride based on the sorted order of a permuted range.

        In this format, channels last would be:
            [3, 0, 2, 1]
        )r-   r   r   r   r   r  )r  r   r   s      rv   r  zFlexibleLayout.stride_orderedM  sV     %E

++,,
50A0AAAAA,U33
**5*===rw   c                Z   |t           j        k    r t                              | t                    S |t           j        k    r t                              | t                    S |t           j        k    rt                              |           S t          
                    d|           t          )aq  
        Create a stride based on a memory format.

        Memory format is translasted into a stride order,
        so channels_last is the same as:
            FlexibleLayout.stride_ordered(sizes, [3, 0, 2, 1])

        This interface does not support memory_format `torch.preserve_format`
        which should be used to deduce a format from another source
        z>stride_ordered_for_memory_format, unsuppored memory_format: %s)rg   channels_lastr   r  NHWC_STRIDE_ORDERchannels_last_3dNHWDC_STRIDE_ORDERcontiguous_formatr   r  r  r  )r  memory_formats     rv    stride_ordered_for_memory_formatz/FlexibleLayout.stride_ordered_for_memory_formatY  s     E///!008IJJJe444!008JKKKe555!44U;;;IIP   &%rw   c                    t          |           t          |          k    sJ d |D             }t          t          t          |                    |j                  }t                              | |          S )z
        Create a stride that has the same stride order as given stride

        For example, if given stride is [1000, 1, 100, 10],
        the fill order should be [1, 3, 2, 0]
        c                V    g | ]&}t           j        j                            |          'S rx   r0  r  s     rv   r   z/FlexibleLayout.same_ordered.<locals>.<listcomp>{  s+    @@@A!'",,Q//@@@rw   rb  )r   r  r   __getitem__r   r  )r  r   r   s      rv   same_orderedzFlexibleLayout.same_orderedr  sm     5zzS[[((((@@@@@E#f++..F4FGGG
**5*===rw   c                    |                      | j        |          }|                                 r#|r!|                     || j        | j                  }t          | j        | j        | j        || j                  S r   )r  r   r  r  r   r  r   r  )r  r   r  r  s       rv   as_stride_orderzFlexibleLayout.as_stride_order  sz    ((E::
""$$ 	N 	N**:ty$*MMJKJIK
 
 	
rw   c                    |}|                                  r#|r!|                     || j        | j                  }t	          | j        | j        | j        || j                  S r   )r  r  r   r   r  r   r  )r  r  r  r  s       rv   as_exact_strideszFlexibleLayout.as_exact_strides  sh    "
""$$ 	N 	N**:ty$*MMJKJIK
 
 	
rw   c                    |                      | j        |          }|                                 r!|                     || j        | j                  }t          | j        | j        | j        || j                  S r   )r  r   r  r  r   r  r   r  )r  r   r  s      rv   as_fill_orderzFlexibleLayout.as_fill_order  sr    &&ty%88
""$$ 	N**:ty$*MMJKJIK
 
 	
rw   c                    |                      | j        |          }|                                 r!|                     || j        | j                  }t          | j        | j        | j        || j                  S r   )r   r   r  r  r   r  r   r  )r  r   r  s      rv   as_same_orderzFlexibleLayout.as_same_order  sr    &&ty&99
""$$ 	N**:ty$*MMJKJIK
 
 	
rw   Nc                    |rt                               ||          }nt                               |          }t                                          ||||           d S r   )r   r  r   r  r%  )r  r   r   r   r  r  r  s         rv   r%  zFlexibleLayout.__init__  sY     	>$11$EEGG$77==Gg66666rw   Fr   )r  rE  rF  r  r  rH  r   r  r  r  r   r"  r$  r&  r(  r%  r  r  s   @rv   r   r   -  s       22N 0 0 \0   \  	> 	> \	> & & \&0 
> 
> \
>
 
 
 

 
 
 


 

 



 

 

7 7 7 7 7 7 7 7 7 7rw   r   c                  0     e Zd ZdZd fdZd Zd Z xZS )NonOwningLayoutz,Is a view into the storage of another tensorviewUnion[BaseView, TensorBox]c                    |                                 }t                                          |j        |j        |j        |j                   || _        d S r   )r   r  r%  r   r   r   r   r-  )r  r-  r  r  s      rv   r%  zNonOwningLayout.__init__  sO    ""MLKM		
 	
 	
 			rw   c                N    |                                                                  S r   )r  r?  r  s    rv   r?  zNonOwningLayout.make_indexer  s    }}++---rw   c                    | j                                         j        }|dk    rdS ddlm} t
          j        j                            ||          S )Nr   Tr2   )	ALIGNMENT)	r-  r   r  utilsr2  rP   r   r   statically_known_multiple_of)r  r  r2  s      rv   maybe_guard_alignedz#NonOwningLayout.maybe_guard_aligned  sS    %%''.Q;;4$$$$$$w<<VYOOOrw   )r-  r.  )r  rE  rF  r  r%  r?  r5  r  r  s   @rv   r,  r,    sh        66     . . .P P P P P P Prw   r,  c                       e Zd Zd Zd Zd ZdS )
NoneLayoutc                4    || _         dg| _        dg| _        d S r+  )r   r   r   r  s     rv   r%  zNoneLayout.__init__  s    C	crw   c                    dS r+  rx   r  s    rv   r  zNoneLayout.storage_size  r  rw   c                    | S r   rx   r  s    rv   r  zNoneLayout.as_fixed      rw   N)r  rE  rF  r%  r  r  rx   rw   rv   r7  r7    sA          
      rw   r7  c                       e Zd Zd fdZej        j        d             ZddZdd	Zd
 Z	e
dd            Zd Zd Z xZS )MutationLayoutSHOULDREMOVEr  rY   c                T   t                                          |                                |                                |                                d            || _        |                                                                 }t          j	        
                    |           d S r   )r  r%  r   r   r   r  
get_bufferr8  rP   r   mark_buffer_mutated)r  r  rz   r  s      rv   r%  z#MutationLayoutSHOULDREMOVE.__init__  s    OO		
 	
 	
   ))++	##D)))))rw   c                4    |                                  j        S r   )real_layoutr   r  s    rv   r   z!MutationLayoutSHOULDREMOVE.stride  s    !!((rw   r]   r  c                N    |                                                                  S r   )rB  r  r  s    rv   r  z'MutationLayoutSHOULDREMOVE.storage_size  s     !!..000rw   r  c                p    fd | j                   }t          |t                    s
J d            |S )Nc                    t          | t                    r | j                  S t          | t                    r |                                           S t          | t
                    r | j                  S | S r   )rb   r=  r  r  r  
MutableBoxr  )r  unwrap_viewss    rv   rG  z;MutationLayoutSHOULDREMOVE.get_buffer.<locals>.unwrap_views  s    &"<== 3#|FM222&(++ :#|F$6$6$8$8999&*-- 1#|FK000Mrw   z1MutationLayoutSHOULDREMOVE must refer to a buffer)r  rb   r  )r  rW  rG  s     @rv   r?  z%MutationLayoutSHOULDREMOVE.get_buffer  sd    	 	 	 	 	 dk**F
 
 	? 	?>	? 	? 	? rw   c                4    |                                  j        S r   )r?  r  r  s    rv   rB  z&MutationLayoutSHOULDREMOVE.real_layout
  s      ''rw   Fc                   |                                  t          j                            |                                           t          |t                    r|j        }|                                 |st          
                    |                                |                                |                                d t          |                                |                                          D                       j        }|                                  t          |j        j        t"                    sJ t%          |          |j        _        |j        S )Nc                ^    g | ]*\  }}t           j        j                            ||          +S rx   rP   r   r   rJ  r   r  r  s      rv   r   z;MutationLayoutSHOULDREMOVE.realize_into.<locals>.<listcomp>%  sA       1 G$11!Q77  rw   r  )r2  rP   r   r@  r8  rb   rX   r  rA  r  r  r   r   r>  r   r   r  r   r=  )r  srcdstunsafe_aliass       rv   realize_intoz'MutationLayoutSHOULDREMOVE.realize_into  s"    	
##CLLNN333c9%% 	(C 	 		""~~''mmoo**  #CLLNNCLLNN C C  	 #     	#(/>:::::4S99xrw   c                    | S r   rx   r  s    rv   r  z#MutationLayoutSHOULDREMOVE.as_fixed0  r;  rw   c                4    | j                                         S r   )r  r?  r  s    rv   r?  z'MutationLayoutSHOULDREMOVE.make_indexer3  rx  rw   )r  rY   r  )r]   r  r*  )r  rE  rF  r%  ra  r   getterr  r?  rB  r  rP  r  r?  r  r  s   @rv   r=  r=    s        	* 	* 	* 	* 	* 	* ]) ) )1 1 1 1    ( ( (       [ D  * * * * * * *rw   r=  c                       e Zd ZU ded<   ded<    fdZd Zd)d	Zd
 Zd Zd*dZ	e
d             Zd Zd Zd Zd Zd Zd Zd Zd+dZd Zd Zd+dZd Zd Zd,dZd Zd  Zd! Zd-d#Zd.d%Zd.d&Zd' Z d( Z! xZ"S )/r  r   rz   ra  r  c                V    t                                                       d | _        d S r   r  r  s    rv   r  zBuffer.__post_init__A  r  rw   c                4    | j                                         S r   rw  r  s    rv   r?  zBuffer.make_indexerE  rx  rw   r]   r{   c                2    | j         s
J |             | j         S r   rf  r  s    rv   r8  zBuffer.get_nameH  s     y$yrw   c                    | j         j        S r   rf  r  s    rv   r   zBuffer.get_deviceL  rg  rw   c                    | j         S r   r  r  s    rv   rU  zBuffer.get_origin_nodeO  r  rw   Optional[Operation]c                    d S r   rx   r  s    rv   r  zBuffer.get_defining_opR  r  rw   c                .    t          | j        dd           S )Nr   )r   r  r  s    rv   r   zBuffer.dtypeU  s    t{GT222rw   c                4    t          | j        j                  S r   rk  r  s    rv   r   zBuffer.get_sizeY  rl  rw   c                4    t          | j        j                  S r   rn  r  s    rv   r;  zBuffer.get_stride\  ro  rw   c                    | j         j        S r   )r  r  r  s    rv   
get_offsetzBuffer.get_offset_  rg  rw   c                    | j         S r   rz  r  s    rv   r   zBuffer.get_layoutb  r  rw   c                *    |                                  S r   )r)  r  s    rv   r<  zBuffer.get_storage_numele  s    ~~rw   c                    dS r[  rx   r  s    rv   r\  zBuffer.is_externh  r]  rw   c                    t          | j        t          t          f          s | j                                        | _        d S d S r   )rb   r  MultiOutputLayoutr,  r  r  s    rv   r  zBuffer.freeze_layoutk  s?    $+(9?'KLL 	1+..00DKKK	1 	1rw   Fc                    t          | j        t                    sJ | j                            ||          | _        d S Nr  )rb   r  r   r"  )r  r   r  s      rv   r  z&Buffer.freeze_layout_with_stride_ordero  s:    $+~66666k11%}1UUrw   c                |    t          | j        t                    sJ | j                            |          | _        d S r   )rb   r  r   r&  r  s     rv   freeze_layout_with_fill_orderz$Buffer.freeze_layout_with_fill_orders  s5    $+~66666k//66rw   c                |    t          | j        t                    sJ | j                            |          | _        d S r   )rb   r  r   r(  )r  r   s     rv   freeze_layout_with_same_orderz$Buffer.freeze_layout_with_same_orderw  s5    $+~66666k//77rw   c                    t          | j        t                    sJ | j                            ||          | _        d S rg  )rb   r  r   r$  )r  r  r  s      rv   r  z'Buffer.freeze_layout_with_exact_strides{  sA    $+~66666k22 3 
 
rw   c                    t           j        j                            t	          j        |                                 d                    S r+  r,  r  s    rv   r/  zBuffer.is_zero_elements  r0  rw   c                                                       r(t          t                                                     S  fd}|S )Nr!  c                |    j                                         }t          j        j         ||                     S r   )r  r?  rN   rr  rz   r   r  r  s     rv   r  z"Buffer.make_loader.<locals>.loader  s2    k..00G8DIwwu~~666rw   )r/  r   r  r   ru  s   ` rv   r>  zBuffer.make_loader  sU      "" 	B=0@0@AAAA	7 	7 	7 	7 	7 rw   Nc                *    |                                  S r   r8  r4  s     rv   r6  zBuffer.codegen_reference  r  rw   c                    d S r   rx   r  s    rv   r  zBuffer.decide_layout  r|  rw   c                x    t          | j        t                    r| j        j                                        gS dS r  )rb   r  r,  r-  r8  r  s    rv   get_inputs_that_alias_outputz#Buffer.get_inputs_that_alias_output  s5    dk?33 	1K$--//00rrw   c                x    t          | j        t                    r| j        j                                        gS dS r  )rb   r  r=  r  r8  r  s    rv   get_mutation_nameszBuffer.get_mutation_names  s6    dk#=>> 	3K&//1122rrw   r  c                F    t          |                                 g          S r   )r-   r8  r  s    rv   r  zBuffer.get_read_names  s    4==??+,,,rw   rn  c                    t                      S r   r,   r  s    rv   rB  zBuffer.get_unbacked_symbol_uses  rq  rw   c                    t                      S r   r,   r  s    rv   rp  zBuffer.get_unbacked_symbol_defs  rq  rw   c                    d S r   rx   r  s    rv   r2  zBuffer.realize  r|  rw   c                    dS r[  rx   r  s    rv   should_allocatezBuffer.should_allocate  s    urw   ru  )r]   rZ  r*  r   rC  rw  )#r  rE  rF  rG  r  r?  r8  r   rU  r  rK  r   r   r;  r`  r   r<  r\  r  r  ri  rk  r  r/  r>  r6  r  ru  rw  r  rB  rp  r2  r}  r  r  s   @rv   r  r  7  s0         NNN
         * * *   " " "         3 3 X3& & &( ( (" " "         1 1 1V V V V7 7 78 8 8
 
 
 
W W W	 	 	       
  
- - - -              rw   r  c                  $    e Zd ZddZd	dZd ZdS )
OperationBufferr]   rk  c                    | gS r   rx   r  s    rv   rm  zOperationBuffer.get_outputs  s	    vrw   rM  c                    | S r   rx   r  s    rv   r  zOperationBuffer.get_defining_op  r;  rw   c                n    t                               |            t                              |            d S r   )r  r  rM  r  s    rv   r  zOperationBuffer.__post_init__  s0    T"""%%%%%rw   Nrv  r]   rM  )r  rE  rF  rm  r  r  rx   rw   rv   r  r    sK              & & & & &rw   r  c                      e Zd Zd ZdS )InputBufferc                    dS r  rx   r  s    rv   r:  zInputBuffer.num_reads  r  rw   N)r  rE  rF  r:  rx   rw   rv   r  r    s#            rw   r  c                  *    e Zd ZU dZded<   d Zd ZdS )r  NzOptional[torch.device]r  c                      fd}|S )Nc                    j                                         }t          j        t          j                                                            j                   ||                     S r   )	r  r?  rN   rr  rP   r   constant_namer8  r  rp  s     rv   r  z*ConstantBuffer.make_loader.<locals>.loader  sU    k..00G8%%dmmoot7KLL  rw   rx   ru  s   ` rv   r>  zConstantBuffer.make_loader  s#    	 	 	 	 	 rw   c                    t          t          j                            |                                 |          | j                  S r   )r  rP   r   r  r8  r  r  s     rv   r  z!ConstantBuffer.constant_to_device  s5    G!!$--//6::DK
 
 	
rw   )r  rE  rF  r  rG  r>  r  rx   rw   rv   r  r    sE         .2O2222  
 
 
 
 
rw   r  c                      e Zd ZddZddZdS )NoneAsConstantBufferr]   rn  c                    t                      S r   r,   r  s    rv   rB  z-NoneAsConstantBuffer.get_unbacked_symbol_uses  rq  rw   Nc                .    t           j        j        j        S r   )rP   r   r  none_strr4  s     rv   r6  z&NoneAsConstantBuffer.codegen_reference  s    w#,,rw   rw  r   )r  rE  rF  rB  r6  rx   rw   rv   r  r    s<           - - - - - -rw   r  c                  D     e Zd Z fdZed             ZddZd	dZ xZS )
ShapeAsConstantBufferc                V    t                                                       || _        d S r   )r  r%  _shape)r  r'  r  s     rv   r%  zShapeAsConstantBuffer.__init__  s$    rw   c                    | j         S r   )r  r  s    rv   r'  zShapeAsConstantBuffer.shape  s
    {rw   r]   rn  c                *    t          | j                  S r   )r(   r'  r  s    rv   rB  z.ShapeAsConstantBuffer.get_unbacked_symbol_uses  s    $TZ000rw   Nc                    t           j        j                            t           j        j                            | j                            S r   )rP   r   r  expr_printerr   r  r'  r4  s     rv   r6  z'ShapeAsConstantBuffer.codegen_reference  s0    w#001A1J1J4:1V1VWWWrw   rw  r   )	r  rE  rF  r%  rK  r'  rB  r6  r  r  s   @rv   r  r    s               X1 1 1 1X X X X X X X Xrw   r  c                       e Zd ZU ded<   d Zd ZddZd Zdd
Z fdZ	d Z
d Zd Zed             Z	 	 dddZe	 d d            Zd Zd Zd Zd Zd Z xZS )!r  ry  r  c                d    | j         | j         S t          | j        d          r| j        j         S dS )z
        Returns self.name if it exists, otherwise returns the name of the data node if that exists.
        If neither exist, returns None.
        Nrz   )rz   rT  r  r  s    rv   get_computed_buffer_namez'ComputedBuffer.get_computed_buffer_name  s7    
 9 949f%% 	"9>!trw   c                4    | j                                         S r   r  r:  r  s    rv   r:  zComputedBuffer.num_reads  r  rw   r]   r  c                4    | j                                         S r   r  r  s    rv   r  zComputedBuffer.get_read_names   r  rw   c                   t          j        t          dd          5  | j                                        r]t          |                                 | j                                        | j                                                  cd d d            S t          |                                 | j        	                                          cd d d            S # 1 swxY w Y   d S r  )
r   r   r   r  r  r9   get_store_functionr  r  r   r  s    rv   ra  zComputedBuffer.get_read_writes  s&   \.*:DAA 	 	y++-- 
*++--I0022I0022 	 	 	 	 	 	 	 	 +++--I&&(( 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   A*C8CCCrn  c                    t          |                                           t          |                                           z  t          |                                           z  | j                                        z  S r   )r(   r   r;  r`  r  rB  r  s    rv   rB  z'ComputedBuffer.get_unbacked_symbol_uses  sh    & "$--//22#DOO$5$5667#DOO$5$5667 i00223	
rw   c                    t          | j        d          rI| j        t          j        j        vr1|                                 dk    r| j                                        S t                                                      S )Nr>  r   )	rT  r  rz   rP   r   mutated_buffersr:  r>  r  r  s    rv   r>  zComputedBuffer.make_loader*  sl     DI}--	+	!888  A%% 9((***ww""$$$rw   c                ^   | j                                                                         }t          | j        t
          t          t          f          r t          | j        j	        | j
        |          S t          | j        t                    sJ t          | j        j        | j
        |          S r   )r  r  r?  rb   r  r9  rC  r  r   rF  rz   r  r  r  s     rv   r  z!ComputedBuffer.get_store_function5  s    +&&((5577di)T4!899 	G494diIIIdi3333349149gFFFrw   c                :   t          | j        t                    rt          j        | j                                        | j                                                  \  \  }}|                                 j	        }t          d |D                       sJ fd|D             }|rqt          | j        t          t          f          r| j                            |          n|fd|D             }ddlm}  |||                                           S dS )al  
        If our layout is still flexible, try to determine the stride order based on stride orders of reads.

        TODO(jansel): A better algorithm here would look at downstream consumers of this
                      value and try to do global graph-level layout optimization.
                      This is also something just begging to be autotuned.
        c              3  b   K   | ]*}t          |t          j        t          j        f          V  +d S r   )rb   r4   StarDep	MemoryDepr  s     rv   rh  z0ComputedBuffer.get_fill_order.<locals>.<genexpr>L  sK         1|3\5KLMM     rw   c                    g | ];}t          |t          j                  t          |j        d  D                       <S )c                D    i | ]}|d k    |t          j        d           S r   r  )r   vs     rv   r   z<ComputedBuffer.get_fill_order.<locals>.<listcomp>.<dictcomp>R  s*    TTTaQRSVVaq!1!1VVVrw   )rb   r4   r  rM   r   )r   r  rG  s     rv   r   z1ComputedBuffer.get_fill_order.<locals>.<listcomp>P  s^        a!788GTT>TTT   rw   c                Z    g | ]'}t           j        j                            |          (S rx   rP   r   r   r  )r   exprr  s     rv   r   z1ComputedBuffer.get_fill_order.<locals>.<listcomp>]  s;     " " "EIAG$11$@@" " "rw   r2   pick_loop_orderN)rb   r  r   r4   r  r  r  r  ra  rj  r  rC  r  r   	schedulerr  r   )r  
index_varsr   rj  stride_lengthsr  r  rG  s         @@rv   get_fill_orderzComputedBuffer.get_fill_order=  sn    dk>22 	H.:.M	,,..	0L0L0N0N/ /+(Z! ((**0E              	  E  
Hdi$66 )"i//
NKKGG(G" " " "MR" " " 766666&~t}}GGGtrw   c                    t          | j        t                    rC|                                 }|r|                     |           d S |                                  d S d S r   )rb   r  r   r  ri  r  r  s     rv   r  zComputedBuffer.decide_layoutf  sk    dk>22 	%''))E %22599999""$$$$$	% 	%rw   c                   t          j        | j                                        | j                                        d          \  }}t          j        t          d|                                           5  t          | 
                                |                                 r|n	|d d         |g|R  }d d d            n# 1 swxY w Y   g }g }g }g }|                                D ]t\  }}	||d         v r/|rJ |                    |           |                    |	           >||d         v sJ |                    |           |                    |	           u||f|||ffS )NqrV   r  r2   r   )r4   r  r  r  r  r   r   r  r   r;   r  r  itemsr  )
r  r~   
var_rangesr  r  reduce_vars
index_sizereduce_sizer  r   s
             rv   get_default_sizes_bodyz%ComputedBuffer.get_default_sizes_bodyn  s   ':I((**DI,H,H,J,JSV
 
 
j \.*;T__=N=NOO 	 	''))0022@RaR 	  D	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 
!#
$$&& 	& 	&DAqDG||&&&&!!!$$$!!!$$$$DG||||""1%%%""1%%%%K($[0IIIs   6A CC	CNextra_indexing_constraints*Optional[Tuple[Dict[Any, Any], List[Any]]]recompute_sizes_body_funcOptional[Callable[..., Any]]c                                                      \  \  }}}\  }}|r |||f|||f          \  \  }}}\  }}g |j                                        |t          |t                    rt          |          dk    sJ |\  }}	t          |t                    sJ t          |	t                    sJ t          d |	D                       sJ |j	        }
|
|k    sJ |
|f            fd|	D             }	|	z  g |
                                t          j                             t          j                  s'                    |                                            fd}||z   }                                 j        dk    pt(          j         } |||||          \  }}} |||||          \  }}}t-          j        ||d          \  \  }}}t1          | ||           ||          g|||          }||f|fS )	an  
        This is a main place where we do loop transformations in a
        backend-agnostic way.

        Here we:
            1) Remove any 1 dimensions
            2) Fuse contiguous dimensions together
            3) Reorder dimensions based on stride orders

        Optional argument extra_indexing_constraints can be used to append additional
        indexing expressions to existing ones derived from buffer's body. This can be useful
        to fuse scheduler nodes with compatible ranges, e.g. (s0*s1*...,) and (s0, s1, s2, ...)
        on CPU by preventing indexing simplifications and obtaining index/reduce ranges for
        the scheduler node compatible with other nodes.
        Optional argument recompute_sizes_body_func can be used to recompute sizes and body
        on the default body. This can be useful to append additional loop transformations.
        Nr   c              3  @   K   | ]}t          |t                    V  d S r   )rb   r   )r   fs     rv   rh  z6ComputedBuffer.simplify_and_reorder.<locals>.<genexpr>  s,      HHqz!T**HHHHHHrw   c                    g | ]}|v|	S rx   rx   )r   r  index_formulass     rv   r   z7ComputedBuffer.simplify_and_reorder.<locals>.<listcomp>  s*     # # #!>2I2I2I2I2Irw   c           	                              | ||
          \  }}} ||           } |rJt          j        j                            | |t          	| |                    \  }}}t          ||          }n|}|||fS r   )_apply_loop_reorderingrP   r   r   _simplify_loopsr6   r   )x_varssupport_varsr  simplify_loopsreindex0r   r   pruner   r  memory_addrsr  s            rv   simplify_and_reorderzAComputedBuffer.simplify_and_reorder.<locals>.simplify_and_reorder  s    (,(C(Ce\) )%E8X Xf%%F #)*)9)I)I,^VUKK* *&x
 *(H=="'8++rw   cudazrV   )r  indexing_exprsrf   rb   rd   r   re   rc   r  r  get_write_exprsrP   r   r  r5   PREFER_STORE_LOOP_ORDERextendget_read_exprsr   rs   r3   loop_ordering_after_fusionr4   index_vars_no_squeezer;   )r  r  r  r  r  r  r  r  extra_indexing_rangesextra_indexing_exprexpected_var_rangesr  r  should_merge_loopsiter_rangesiter_reindexr   reduce_rangesreduce_reindex	iter_varsr  r  r  s   `                    @@rv   r  z#ComputedBuffer.simplify_and_reorder  s   4 ''))		
%Z%Z % 	
 *)[)4*k1J 	)[)[
 94.55778%15u==233q88889S6!#63T:::::1488888HH4GHHHHHHHH"&/&*????#%B???
# # # #.# # # 11N0--//0w""4)OPP 	7 3 3 5 5666	, 	, 	, 	, 	, 	, 	,$ "K/OO"f,UF4U0U 	 (<';	(
 (
$\1 ,@+?{4F,
 ,
(~q
 0</Q0
 0
 0
, K*
 \)$$nn[&A&AB
 
 ]+T11rw   c           
     n    ddl m} |g }	  fd|D             }t          |          t          |          k    r&t          |d                   t                     k    sJ t          t	           |||                              }n|# t
          $ ro t          j        r7t          	                    dt          t                               |           t          t          t                                        }Y nw xY wfd|D             t          |          t          |          fS )zU
        Shuffle the order of loops around to hopefully improve performance.
        r2   r  Nc                \    g | ](}t           j        j                            |          )S rx   r  )r   r  r  r  s     rv   r   z9ComputedBuffer._apply_loop_reordering.<locals>.<listcomp>  s@         --dJMM  rw   r   z%Did not simplify complex index:
%s
%sc                     g | ]
}|         S rx   rx   )r   r   r  s     rv   r   z9ComputedBuffer._apply_loop_reordering.<locals>.<listcomp>  s    )))aq)))rw   )r  r  r   rc   r  	Exceptionr3   r  r  warningre   r   r   r   r   )r  r  r  r  priority_idxr  r  r   s   ```     rv   r  z%ComputedBuffer._apply_loop_reordering  sx    	/.....L	,    (  G w<<3|#4#444WQZCM M : : : : //'5,"O"OPPQQEE 	, 	, 	,| =Z//00   
 s5zz**++EEE	, *)))5)))l5))?5+A+AAAs   A>B A6DDc                4    | j                                         S r   )r  r  r  s    rv   r  z!ComputedBuffer.get_reduction_size       y++---rw   c                4    | j                                         S r   )r  r  r  s    rv   r  z!ComputedBuffer.get_reduction_type#  r  rw   c                4    | j                                         S r   )r  r/  r  s    rv   r_  zComputedBuffer.is_no_op&  r  rw   c                    dS NTrx   r  s    rv   r}  zComputedBuffer.should_allocate)  r  rw   c                6    | j                             |          S )r  )r  r  r  s     rv   r  z!ComputedBuffer.constant_to_device,  s    y++F333rw   rC  rw  NNr  r  r  r  r   )r  rE  rF  rG  r  r:  r  ra  rB  r>  r  r  r  rA   r  r  rH  r  r  r  r_  r}  r  r  r  s   @rv   r  r    s        KKK	 	 	% % %* * * *  
 
 
 
2	% 	% 	% 	% 	%G G G' ' 'R% % % J J ]J8 RVBFq2 q2 q2 q2 q2f  !B !B !B \!BF. . .. . ., , ,  4 4 4 4 4 4 4rw   r  c                  T     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
	 	 dddZ xZS )TemplateBufferzt
    Represents a Triton (in the future other type) of template operator
    that we can fuse an epilogue onto.
    c                    t                                          d |           t                              |          | _        || _        t          j                            |           | _	        t          j        
                    |            d S N)rz   r  )r  r%  InputsKernelunwrap_storageinputsmake_kernel_renderrP   r   register_bufferrz   register_operation)r  r  r  r  r  s       rv   r%  zTemplateBuffer.__init__7  sp    d6222"11&99"4G++D11		""4(((((rw   c                .    |                      d          S )NT	normalize)r9   r  s    rv   ra  zTemplateBuffer.get_read_writes>  s    ''$'777rw   c                
   |                                  | j                                        fd}t          j        ||                                 d|          }t          d | j        D                       |_        |S )Nc                l    t          |          dk    sJ t          j         |           d          S )Nr   fake)r   rN   r  )r   rM  r  rz   s     rv   dummyz1TemplateBuffer.extract_read_writes.<locals>.dummyE  s6    v;;!####9T775>>6:::rw   rx   r  c              3  b   K   | ]*}t          j        |                                          V  +d S r   )r4   r  r8  r  s     rv   rh  z5TemplateBuffer.extract_read_writes.<locals>.<genexpr>L  s5      XXq 4QZZ\\ B BXXXXXXrw   )	r8  r  r?  r4   r9   r   r-   r  rj  )r  r  r  depsr  rz   s       @@rv   r9   z"TemplateBuffer.extract_read_writesA  s    }}+**,,	; 	; 	; 	; 	; 	; /4==??B)
 
 
  XXDKXXXXX
rw   c                    dS r  rx   r  s    rv   r  z!TemplateBuffer.get_reduction_sizeO  r  rw   c                    d S r   rx   r  s    rv   r  z!TemplateBuffer.get_reduction_typeR  r  rw   c                    dS r[  rx   r  s    rv   r_  zTemplateBuffer.is_no_opU  r]  rw   c                    dS r  rx   r  s    rv   r}  zTemplateBuffer.should_allocateX  r  rw   Nr  r  r  r  c                2    |                                  dfd fS r  r&  )r  r  r  s      rv   r  z#TemplateBuffer.simplify_and_reorder[  s&      
 	
rw   r  r  )r  rE  rF  r  r%  ra  r9   r  r  r_  r}  r  r  r  s   @rv   r  r  1  s         
) ) ) ) )8 8 8          
 RVBF
 
 
 
 
 
 
 
 
rw   r  c                  6     e Zd Z	 	 d
d fdZddZdd	Z xZS )TritonTemplateBufferNmutated_inputsOptional[Iterable[IRNode]]c                    t                                          |||           | _        | _         g _        |t
          j        j        j        t
          j        j        j	        f}t          j        j        j        }||v sJ d| d|              j        d                                          xj         fd|D             z  c_        dS dS )a  
        NOTE:[TritonTemplates with multiple outputs]
        We want the ability for TritonTemplates to output multiple tensors. Triton
        kernels have no notion of outputs and this is done by creating tensors that
        are then mutated by the kernel. Currenlty our STORE_OUTPUT codegen doesn't
        support creating multinode outputs for triton templates.
        We work around this by creating an extra input buffer during the lowering
        and we mark them as mutated inputs.
        Nz$Mutated inputs are only allowed for z	 but got r   c                L    g | ] }t          t                    |          !S rx   MutationOutputr7  )r   r  r   r  s     rv   r   z1TritonTemplateBuffer.__init__.<locals>.<listcomp>  s:       BEz&113==  rw   )r  r%  debug_extrar  outputsrg   rN   higher_orderflex_attentionflex_attention_backwardrP   r   current_noder  r  r   )
r  r  r  r  r  r  allowed_setr  r   r  s
   `       @rv   r%  zTritonTemplateBuffer.__init__j  s    " 	);<<<&,&*V% 	&5	&>K 7/6L+++ZkZZLZZ ,++[^..00FLL     IW   LLLL &%rw   r]   rk  c                    | j         S r   )r  r  s    rv   rm  z TritonTemplateBuffer.get_outputs  r+  rw   r{   c                ,    d| j          d| j         d}|S )NzTritonTemplateBuffer(layout=r.  r  )r  r  )r  r   s     rv   r  zTritonTemplateBuffer.__str__  s$    OT[OOD<LOOO
rw   r  )r  r  rv  ru  )r  rE  rF  r%  rm  r  r  r  s   @rv   r  r  i  ss         59" " " " " " "H          rw   r  c                  X     e Zd ZdZ fdZddZddZd Zdd	ZddZ	ddZ
ddZ xZS )ChoiceCallera.  
    Represents a possible choice used in autotune_process.py.
    During autotuning, self.benchmark() is first called to get benchmark result,
    and if this choice is selected, self.output_node() is called to get the output_node.

    Children classes: TritonTemplateCaller, CUDATemplateCaller.
    c                r    t                                                       || _        || _        || _        d S r   )r  r%  rz   r  input_nodes)r  rz   r  r  r  s       rv   r%  zChoiceCaller.__init__  s5    	&rw   r]   r  c               Z    |                                  }t          j        ||d|i          S )Nr   )to_callabler>   	benchmark)r  r   r~   algos       rv   r!  zChoiceCaller.benchmark  s,    !!$T4%>>>rw   r{   c                    t           r   r  r  s    rv   	call_namezChoiceCaller.call_name  r  rw   c                    t           r   r  r  s    rv   r   zChoiceCaller.to_callable  r  rw   c                    t           r   r  r  s    rv   hash_keyzChoiceCaller.hash_key  r  rw   rX   c                    t           r   r  r  s    rv   output_nodezChoiceCaller.output_node  r  rw   <Dict[str, Union[PrimitiveInfoType, List[PrimitiveInfoType]]]c                    i S )zRInformation returned here is logged to the autotune log file when that is enabled.rx   r  s    rv   	info_dictzChoiceCaller.info_dict  s    	rw   c                    dS )Nunsupported_choicerx   r  s    rv   autoheuristic_idzChoiceCaller.autoheuristic_id  s    ##rw   )r]   r  ru  )r]   rX   )r]   r*  )r  rE  rF  r  r%  r!  r$  r   r'  r)  r,  r/  r  r  s   @rv   r  r    s         ' ' ' ' '? ? ? ?" " " "" " "" " " "" " " "   $ $ $ $ $ $ $ $rw   r  c                      e Zd ZddZdS )TritonTemplateCallerBaser]   r   c                    t           r   r  r  s    rv   get_make_kernel_renderz/TritonTemplateCallerBase.get_make_kernel_render  r  rw   N)r]   r   )r  rE  rF  r3  rx   rw   rv   r1  r1    s(        " " " " " "rw   r1  c                  n     e Zd ZdZd fdZedd            Zej        dd            Z	ddZ
ddZ xZS )MultiTemplateBufferaG  
    Represents a Buffer with multiple backing implementation choices.

    Choices can be TritonTemplates or ExternKernels. During scheduling if there is a potential
    epilogue we will benchmark each of the choices with the epilogue to determine an implementation.
    Otherwise, the fastest base choice will be chosen.
    r  ra  r  List[IRNode]choice_timings'Callable[[], Dict[ChoiceCaller, float]]c                z    t                                          ||d            || _        d | _        || _        d S )N)r  r  r  )r  r%  _choice_timings_fn_choice_timingsoriginal_inputs)r  r  r  r7  r  s       rv   r%  zMultiTemplateBuffer.__init__  sB     	v$OOO"0DH%rw   r]   Dict[ChoiceCaller, float]c                P    | j         |                                 | _         | j         S r   )r;  r:  r  s    rv   r7  z"MultiTemplateBuffer.choice_timings  s(    '#'#:#:#<#<D ##rw   callerr1  c              #     K   t          |t          j        j        j                  sJ | j        |j        k    sJ | j        }|                                | _        	 d V  || _        d S # || _        w xY wr   )rb   rg   rh   select_algorithmTritonTemplateCallerr  r  r3  )r  r?  renders      rv   swap_as_triton_callerz)MultiTemplateBuffer.swap_as_triton_caller  s      &%/"B"WXXXXX{fm++++("("?"?"A"A	-EEE&,D###fD#,,,,s   A) )	A2c                    t          |t          j        j        j                  sJ | j        j        |j        j        k    sJ | j        j        |j        j        k    sJ |                                | _	        d S r   )
rb   rg   rh   rA  rB  r  r   r   r3  r  )r  r?  s     rv   finalize_as_triton_callerz-MultiTemplateBuffer.finalize_as_triton_caller  sp    &%/"B"WXXXXX{6=#55555{!V]%99999"("?"?"A"Arw   Tuple[ChoiceCaller, float]c                `    t          | j        | j        j                  }|| j        |         fS )Nrb  )r  r7  r  )r  
min_choices     rv   get_min_choicez"MultiTemplateBuffer.get_min_choice  s0    ,$2E2IJJJ
D/
;<<rw   )r  ra  r  r6  r7  r8  )r]   r=  )r?  r1  )r]   rG  )r  rE  rF  r  r%  rK  r7  rI  rJ  rD  rF  rJ  r  r  s   @rv   r5  r5    s         	& 	& 	& 	& 	& 	& $ $ $ X$
 	- 	- 	- 	-B B B B= = = = = = = =rw   r5  c                  &     e Zd Zd fdZd Z xZS )CUDATemplateBufferworkspace_sizerq   templateCUDATemplatec                j    t                                          |||           || _        || _        d S r   )r  r%  rM  rN  )r  r  r  r  rM  rN  r  s         rv   r%  zCUDATemplateBuffer.__init__  s5     	);<<<, rw   c                "    | j         | j         ndS r+  )rM  r  s    rv   rt  z%CUDATemplateBuffer.get_workspace_size  s    &*&9&Et""1Lrw   )rM  rq   rN  rO  )r  rE  rF  r%  rt  r  r  s   @rv   rL  rL    sS        ! ! ! ! ! !M M M M M M Mrw   rL  c                       e Zd Z fdZ xZS )CppTemplateBufferc                j    t                                          |||           || _        || _        d S r   )r  r%  rN  choice)r  r  r  r  rN  rU  r  s         rv   r%  zCppTemplateBuffer.__init__	  s2    );<<< rw   r  rE  rF  r%  r  r  s   @rv   rS  rS    s8                rw   rS  c                  X    e Zd ZU ded<   d Zed             Zed             Zd Z	d Z
dS )	r  rk  r  c                   t                      }t          j        | j        D ]i}t	          |t
                    r"|                    fd|D                        9|                     |                                                     jt          fd| 	                                D                       }t          j
        ||t                                S )Nc              3  R   K   | ]!} |                                           V  "d S r   rr  )r   r   r  s     rv   rh  z/InputsKernel.get_read_writes.<locals>.<genexpr>  s5      BBqWWQZZ\\22BBBBBBrw   c              3  R   K   | ]!} |                                           V  "d S r   rr  )r   r  r  s     rv   rh  z/InputsKernel.get_read_writes.<locals>.<genexpr>  sF       :
 :
(+GGCLLNN##:
 :
 :
 :
 :
 :
rw   )rj  writesindex_exprs)r-   r4   r  r  rb   rc   updater  r8  rm  
ReadWrites)r  rj  inputr[  r  s       @rv   ra  zInputsKernel.get_read_writes  s    .8ll&[ 	5 	5E%&& 5BBBBEBBBBBBB		''%.."2"2334444/9 :
 :
 :
 :
/3/?/?/A/A:
 :
 :
 0
 0
 &"
 
 
 	
rw   c                   t          |t                    r|j        }t          |t                    r|j        }t          |t                    r/t          |t
                    st                              |          }t          |t                    r|                     |          S t          |t                    r|S t          |t          t
          f          s
J |            |S r   )rb   rX   r  r  r  r  rA  rB  unwrap_storage_for_inputTorchBindObjectr  r  r   s     rv   ra  z%InputsKernel.unwrap_storage_for_input&  s    a## 	Aa$$ 	Aa"" 	.:a+I+I 	.**1--Aa## 	3
 //222a)) 	H!fo677:::::rw   c                    g }| D ]S}t          |t                    rd |D             }nt                              |          }|                    |           T|S )Nc                B    g | ]}t                               |          S rx   )r  ra  r  s     rv   r   z/InputsKernel.unwrap_storage.<locals>.<listcomp>>  s&    III!\::1==IIIrw   )rb   rc   r  ra  r  )r  
inputs_newr   s      rv   r  zInputsKernel.unwrap_storage9  sm    
 	! 	!A!T"" =IIqIII 99!<<a    rw   c                    dS r  rx   r  s    rv   r\  zInputsKernel.is_externD  r  rw   c                    dS r  rx   r  s    rv   r:  zInputsKernel.num_readsG  r  rw   N)r  rE  rF  rG  ra  r  ra  rH  r  r\  r:  rx   rw   rv   r  r    s         
 
 
&   [$   \      rw   r  c                      e Zd Zd ZdS )	NopKernelc                    dS r  rx   r  s    rv   r_  zNopKernel.is_no_opL  r  rw   N)r  rE  rF  r_  rx   rw   rv   rj  rj  K  s#            rw   rj  c                  Z    e Zd ZdZed             Zed             Zed             Zd ZdS )ConcatKernelzn
    There isn't actually a real kernel for concat, we just change the
    storage for the upstream data.
    c                L
   |d                                          }|d                                         }t          |d                                                   }dg}||         g}d|cxk    rt	          |          k     sn J t          dt	          |                    D ]#}||                                         }	|                    ||                    t	          |	          t	          |          k    sJ ||                                         |k    sJ ||                                          |k    sJ t          t	          |                    D ]Q}
|
|k    r||
         |	|
         z   ||
<   t          j        j	        
                    ||
         |	|
                   ||
<   R|                    ||                    %t                              |          }t          t	          |                    D ]x}||         }t          |          r_|                                }t          |t                     r6t"                              |j        |j                  rt+          |          } nyt-          d |D                       }t          j        j        j        d         }t          |t                    sJ |du r(t-          d |D                       rt+          |          }t3          d t!          ||||          g           }t5          |          }g }t          t	          |                    D ]%}|                     ||         t8                              ||||         ||         d                    }|j                            |           t          ||         j        t@                    r ||         j        !                                }n||         j        }|"                                rbtG          ||                                          j$                  r6tK          |          s'|                    |&                                           't	          |          dk    rIt          j        '                    |tP          j)                  rt          j        *                    |           t          j        +                    |          |_,        | -                    |j                  |_        t          j        .                    |           |S )	Nr   r2   c              3  4   K   | ]}t          |          V  d S r   )r   r  s     rv   rh  z&ConcatKernel.create.<locals>.<genexpr>y  s+      -W-W1.CA.F.F-W-W-W-W-W-Wrw   Fc              3     K   | ]c}d |j         v oU|j         d                              t          j                  p*|j         d                              t          j                  V  ddS )r  r  N)r  r  rg   r  r  r   args     rv   rh  z&ConcatKernel.create.<locals>.<genexpr>}  s       <
 <
  SX --E<O-PP W8E?00u?U0VV	<
 <
 <
 <
 <
 <
rw   )r   r   r   r   rz   r  r  )r  )/r   r   rc   r   r   r   r  rP   r   r   rJ  r   r   r   r   rb   r  ra  r  r   r   r"   r  r  r~   rm  r  rP  r  r  r  r  r  r  is_input_bufferrH   rs   rG   rY  r  r5   FOREACHregister_operation_listr  rz   r  r  )r  r  r2  r   r   r  offsets_startoffsets_endr   
input_sizer  output_strider   r  any_input_is_storage_and_layoutfx_node_argsconcat_kernelkernelop_namesinput_bufferinput_unwrappeds                        rv   r  zConcatKernel.createV  s   %%''q	##%%q	**,,--}oC''''#h--''''''q#f++&& 	. 	.A++--J  #///z??c(mm3333!9&&((E1111!9''))V33333x==))  88"*1+
1"=HQKK"#'"2"?"? Z]# #HQKK x}----&99(CCs6{{## 		 		Aq	A$Q'' K  88fmTT %C8$L$LME*--W-WPV-W-W-W*W*W'w+03,-----*e33 <
 <
 $<
 <
 <
 9
 9
3 ;8DDM$$	   	
 	
 	
 M**s6{{## 	C 	CA++q	  Cq!1;q> !   L  ''555&).(33 1"()."<"<">">"().  //11C6!9//11677C #<00C
  ? ? A ABBBx==1!4!4V^=S!T!TG++H555W44]CC"11-2FGG	""=111rw   c                    t          |t                    r|                     |j                  S t          |j        j        t
                    ot          |j        t                     S r   )rb   rX   can_realize_into_without_copyr  r  r   ExternKernelAlloc)r  rM  s     rv   r  z*ConcatKernel.can_realize_into_without_copy  sa    c9%% 	?44SX>>>#(/>:: 
:H'D
 D
 @
 	
rw   c                b   t          |t                    s1t          |          r"t          |          \  }}t          ||          }t          |t                    s
J |            t          |t                    r|                     |j        |          S t          |t                    r`|                                 t          |j        d          sJ | 
                    |          r t          |          |j        _        |j        S t                              |                                |                                |                                d t%          |                                |                                          D                       }|                     ||          S )Nr  c                ^    g | ]*\  }}t           j        j                            ||          +S rx   rK  rL  s      rv   r   z-ConcatKernel.realize_into.<locals>.<listcomp>  sA       Aq  --a33  rw   r  )rb   r  r   r  rX   rP  r  r  r2  rT  r  r,  r  r  r  r   r   r>  r   r   )r  rM  rN  r  r  pws         rv   rP  zConcatKernel.realize_into  s   
 #// 	7$S)) 7"7"<"<%gv66#//44444c9%% 	3##CHc222c:&& 	 KKMMM38X.....0055  "1#"6"6x>>##--//__&& ??  	  
 
 C(((rw   c                    dS r  rx   r  s    rv   r}  zConcatKernel.should_allocate  r  rw   N)	r  rE  rF  r  r  r  r  rP  r}  rx   rw   rv   rm  rm  P  s         
 X X [Xt 
 
 [
 ) ) [)>    rw   rm  c                      e Zd ZU dZded<    ej        e          Zded<   dZ	ded	<   dZ
d
ed<   dZd
ed<    ej        e          Zded<   dZded<   dZded<   dZded<    ej        e          Zded<    ej        e          Zded<   	 	 	 	 	 	 	 dE fd	ZdFdZdGdZd ZdHd!Zd" Zd# Zd$ ZdIdJd%ZdKd&Zd' Zed(             Ze dLd*            Z!e d+             Z"e d,             Z#e d-             Z$e 	 	 	 dMdNd2            Z%e dHd3            Z&e dHd4            Z'e d5             Z(e d6             Z)e d7             Z*d8 Z+dIdOd;Z,d< Z-d= Z.dHd>Z/d? Z0d@ Z1dA Z2dGdBZ3dPdDZ4e4Z5 xZ6S )QrA  rx   zTuple[Any, ...]constant_args)default_factoryzDict[str, Any]r   NzOptional[ReinterpretView]output_viewr   python_kernel_namecpp_kernel_namezIterable[str]ordered_kwargs_for_cpp_kernelzFOptional[Union[torch._ops.OpOverload, torch._ops.HigherOrderOperator]]op_overloadzOptional[List[Dict[str, Any]]]arg_propertiesz#Optional[Dict[str, Dict[str, Any]]]kwarg_propertiesz"Dict[sympy.Symbol, pytree.KeyPath]unbacked_bindingszList[MutationOutput]mutation_outputsc                `   t                                          |||           || _        |r|ni | _        || _        |
| _        |                     |           |                     |           |	| _        | 	                                 i | _
        g | _        t          j        j        | _        d S r   )r  r%  r  r   r  r  set_cpp_kernel_nameset_python_kernel_namer  collect_arg_kwarg_propertiesr  r  rP   r   r  fx_node)r  rz   r  r  r  r   r  r  r  r  r  r  s              rv   r%  zExternKernel.__init__  s     		
 	
 	

 + &.ffB&&  111##$6777-J*))+++!# "w+rw   r]   rk  c                    | g| j         S r   )r  r  s    rv   rm  zExternKernel.get_outputs  s    -t,--rw   rn  c                    t                      S r   r,   r  s    rv   rp  z%ExternKernel.get_unbacked_symbol_defs  rq  rw   c                   t          | j        t          j        j                  rd | j        j        j        D             n*d t          t          | j	                            D             | _
        t          | j        t          j        j                  rd | j        j        j        D             ni | _        t          | j        t          j        j                  r)| j        s$d | j        j        j        D             | _        d S d S d S )Nc                F    g | ]}|j         	|j        |j        |j        d S ))rz   rs   r  )
kwarg_onlyrz   	real_typer  r  s     rv   r   z=ExternKernel.collect_arg_kwarg_properties.<locals>.<listcomp>  sH        |FK%&_   rw   c                    g | ]}i S rx   rx   r  s     rv   r   z=ExternKernel.collect_arg_kwarg_properties.<locals>.<listcomp>'  s    666"666rw   c                8    i | ]}|j         |j        |j        d S ))rs   r  )rz   r  r  r  s     rv   r   z=ExternKernel.collect_arg_kwarg_properties.<locals>.<dictcomp>*  s8        qOO  rw   c                *    g | ]}|j         	|j        S rx   r  rz   r  s     rv   r   z=ExternKernel.collect_arg_kwarg_properties.<locals>.<listcomp>7  s1     2 2 2al22 2 2rw   )rb   r  rg   _ops
OpOverload_schema	argumentsr   r   r  r  allarg_propertiesr  r  s    rv   r  z)ExternKernel.collect_arg_kwarg_properties  s5    $*EJ,ABB
7   )1;    76eC$4$455666 	$ $*EJ,ABB	  )1;   
  	 t')>??	6	2 2 $ 0 8 B2 2 2D...		 	 	 	rw   Fc                   t          |t          t          f          sJ t          |t                    rt          |          }| j        s
J d            t	          |          }t	          | j                  }||k     r|t
                              d| j        ||z
             t          ||          D ]G}| j        |         d         }|	                    ||v r||         n| j        |         d                    H|S )Nz/ExternKernel.arg_properties should not be emptyzv%s has %d unprovided positional arguments. Will check if they are in the keyword arguments or will use default values.rz   r  )
rb   rc   rd   r  r   r  r  r  r   r  )r  r~   r   convert_val_to_strn_args
n_pos_argsr   arg_names           rv   fill_non_provided_argsz#ExternKernel.fill_non_provided_args;  s    $u.....dE"" 	::D"UU$UUUUT,--
 JII^ V#	   6:..  .q1&96)) 8$$,Q/@   
 rw   c                    t          | j        t                    r*|                                  |                                  d S d S r   )rb   r  r   apply_constraintr  r  s    rv   r  zExternKernel.decide_layout]  sJ    dk>22 	!!!###     	! 	!rw   c                ^    t          | |          \  }}|r|                    |           d S d S r   )rF   	writeline)r  wrapper
origin_strdetailed_origin_strs       rv   codegen_commentzExternKernel.codegen_commentb  sC    *=dG*L*L'
' 	*j)))))	* 	*rw   c                    t           r   r  r  r  s     rv   codegenzExternKernel.codegeng  r  rw   c                x   || _         d | _        d | _        d | _        t          j        j        r$t          | j        t          j
        j                  sd S | j        }| j         n|j        dk    rR|j        dk    r |j                            d          d         n|j                            dd          }d| d| _         n|j        j        | _         dd	lm} |j        j        | _        | j                             d
d           d| j         | _        	  ||          | _        d S # t,          $ r d| _        Y d S w xY w)Natenr  .r   r   z
at::_ops::z::callr2   get_cpp_op_schema::r  )r  cpp_kernel_overload_namecpp_kernel_keycpp_op_schemarP   r   cpp_wrapperrb   r  rg   r  r  	namespace_overloadnamer  r  replacer  rz   codegen.wrapperr  overload_namer  )r  r  r  opnamer  s        rv   r  z ExternKernel.set_cpp_kernel_namej  sw   .(,%"!w" 	*ej3+
 +
 	 F!'6)) +y88 O))#..q1100c:: 
 (CF'B'B'B$$'-~':$ 	766666(.(D%!%!5!=!=dC!H!Hjj4Khjj	$!2!26!:!:D 	$ 	$ 	$!#D	$s   D$ $D98D9c                    || _         |d S | j        }|d S t          |t          j        j                  rd|j         | _         d S |j                            dd           d|j         | _         d S )Nztorch.ops.higher_order.z._ops.z.ops.r  )	r  r  rb   rg   r  HigherOrderOperatorr  rE  r  )r  r  r  s      rv   r  z#ExternKernel.set_python_kernel_name  s    "4)F!>D
 >?? 	&Q&Q&QD### $,,Xw??SS&/SS ###rw   c                    t           j        j        r<t          j        r)t           j        j                            | j                  n| j        n| j        S r   )	rP   r   r  r3   abi_compatibler  get_c_shim_func_namer  r  r  s    rv   get_kernel_namezExternKernel.get_kernel_name  sM     w")(*$99$:NOOO)) (	
rw   c           	     F   t                               |                                 |                                 |                                 |                                 |                                 |                                           }|                                 |S )N)r   r   r|  r~  rS  r   )	r  r  r   r   r>  r   rU  r
  r2  )r   r  s     rv   
copy_inputzExternKernel.copy_input  sx    <<>>++--]]__::<<))++oo''  
 
 	

	rw   iTuple[Any, List[Any], List[Any], Callable[[Any, Any], Any], Optional[Dict[sympy.Symbol, pytree.KeyPath]]]c                Z    ||d}t          j        |          \  }g g }g }|D ]}                    t          |t                               d         r|                    |           Ht          |t
          j                  r+t          j        j	        j
                            |d           }|                    |           fd}	 fd|D             }|D ]"}
t          |
          rt          |
d           #g }|D ]}
t          |
t                    sb|
                                t          j        j        v r=|                    t          j        j        |
                                                    zt          |
t                    sb|
                                t          j        j        v r=|                    t          j        j        |
                                                    |                    t%          |
d                      |	||          \  }} ||i |}d }t          j        j
        x}rNt)          |t          j        |           t-          ||t          j        j                            d	                    }t          |t2          t4          f          s|gn|}|D ]i}t          |t6          j                  rM|j        rFd
}t          j        j        j                            dd           x}r| d| }|t          j        _        j||||	|fS )N)r~   r   r  )r  c                ^   g }t          |           }t          |          }D ]I}|r#|                    t          |                     '|                    t          |                     Jt          j        |          }|                    dg           |                    di           fS )Nr~   r   )iterr  nextpytreetree_unflattenr  )	new_tensor_argsnew_non_tensor_argsrW  
it_tensorsit_non_tensors	is_tensorr  	args_specis_arg_tensors	          rv   unflatten_argsz3ExternKernel.process_kernel.<locals>.unflatten_args  s    Fo..J!"566N* 8 8	 8MM$z"2"23333MM$~"6"67777%fi88A55$$aeeHb&9&999rw   c                :    g | ]}                     |          S rx   rB  r   r   r  s     rv   r   z/ExternKernel.process_kernel.<locals>.<listcomp>  s'    AAAs((++AAArw   Tr  )r   r  zEsparsity not handled. Please file issue for sparse inference weights.stack_tracez Found from : 
 )r  tree_flattenr  rb   rY   rm   r   rP   r   r   r   create_symintnoder   r  r  r8  	constantstorchbind_constantsr   	fake_moder)   r  r&   r  r  rc   rd   rg   Tensor	is_sparsedisable_cudagraphs_reason)r  r  r~   r   binded_args	args_flattensor_argsnon_tensor_argsrs  r  r   example_argsnew_args
new_kwargsexample_outputr  r   example_out_lir   msgr  r  r  s   `                    @@rv   process_kernelzExternKernel.process_kernel  si     $v66%2;??	9%' 	, 	,C  C!8!8999R  ,""3''''c5:.. W'*4FFsQUFVVC&&s++++
	: 
	: 
	: 
	: 
	: 
	: BAAA[AAA  	6 	6A$Q'' 6%a5555
 JL
  	L 	LA a** Lqzz||qw?P/P/P##AG$5ajjll$CDDDDq(++LJJLLAG$???##AG$?

$MNNNN##$5aT$J$J$JKKKK-~lOLL*8Z88JN--9 	Iq~~FFF 9>1>+>+B+B5+I+I! ! ntUm<< ^ 	
   	8 	8A!U\** 8q{ 8]"#'"6";"?"?t"T"TT; A @@;@@C471 
 	
rw   c           
     T   t          |t                    sJ t          |t                    r|S |                                }t          j                            |                                          }|J |                                }|d|j	        v rt          |j
        t                    r|j	        d                             t          j                  s+|j	        d                             t          j                  r5|                    t#          |                                                     n|                                 t)          j        |                                d          \  }}|d         } |                                |          }t          j        j                            ||          }t          j        j                            ||          }	t          j        j                            ||          }
t7          ||	          |
z   }||k    r$t8                              d|	|
|           t<          t          |j        tA          |!                                |"                                |                                |	|
          	          S )
z
        In order to pass this to an extern kernel we need a
        ReinterpretView not a View.  This allows us to avoid some
        unneeded copies.
        Nr  rq  r  rV   r   z@convert_to_reinterpret_view failed: stride=%s offset=%s index=%sr  )r  r  )#rb   r  r  r  rP   r   r?  r8  rU  r  r  r   r  rg   r  r  rk  r"   r   r  r4   r  r?  r   r  stride_vars
offset_varrI   r  r  r  r  r  r   r   )r  r   x_unwrap_viewr  x_unwrap_view_fx_node
index_argsr  r  r   r  r  expecteds               rv   convert_to_reinterpret_viewz(ExternKernel.convert_to_reinterpret_view  s    !X&&&&&a)) 	H g  !7!7!9!9:: # 3 3 5 5 "-.333=/@@ 4 &*51??"'"5 @   4 )-e4BB"'"8 C   4 77.}/E/E/G/GHH    '')))!-!@JJLL"
 "
 "

J  ]
   ,, 55eZHH'"..ujAA!,,UJ??Z11F:HIIR	   &%||~~kkmmZZ\\  	
 	
 	
 		
rw   c                H   |t                      S t          |t          j        t          j        j        j        t          f          rt          |          S t          |t                    r]t          j                            t          j        |j        |                                |                                                    S t          |t$                    r|S t          |t&                    r|                     |j                  S t          |t,                    r:t-          |                     |j                  |                                          S t          |t0                    r[|                                 t5          |                                          r&	 |                     |          S # t:          $ r Y nw xY wt          |t<                    r|                                 |S t          |t>                    r|S |                      |          S )N)r   r   )!r  rb   rm   r   rn   ro   rp   rq   r  r  rP   r   add_tensor_constantrg   tensorr   r   r   r  rX   rB  r  r  r   r  r2  r   r  r   r  r  rb  r  rc  s     rv   rB  zExternKernel.realize_input\  s   9')))a%*ek&9&A3GHH 	,(+++a"" 	7..QWAKKMM!,,..QQQ   a(( 	Ha## 	-$$QV,,,a)) 	N"3#4#4QV#<#<allnnMMMa"" 	IIKKK$Q]]__55 ::1===*   Da$$ 	IIKKKHa)) 	H~~a   s   (F= =
G
	G
c                    t          |          rHt          |                                          dk    r|S |                                D ]}|dk    r|c S |                     |          S r  )r   r   r;  r  )r  r   r   s      rv   require_stride1zExternKernel.require_stride1{  sr     ## 	1<<>>""a'',,..  Q;;HHH ~~a   rw   r   Optional[Sequence[int]]r  Optional[Sequence[_IntLike]]c                ~	   ||J |                                 dk    r|S t          |          rt          |                                t                    r@|                                j        }t          |                                t                    @t          |                                t                    r|rnt          |ddt          ||          rHt          t          j        j                            |                                j                            n||           |S t          |ddd ||           |S t          |                                t                    rg|r'|                                                    |          s<|r<t#          ||                                j        |                                          r|S t          |                                t&                    rt          |                                                                t                    rt+          d          t          |                                                                t                    r|r9|                                                                                    |          sN|rNt#          ||                                                                j        |                                          r|S t          |t,                    rg|r'|                                                    |          s<|r<t#          ||                                j        |                                          r|S t          |t.                    rt          |j        t2                    rt          |j        t4                    st          |                                          rt          |                                j        t8                    se	 |                     |j                  |_        |r|                     |||          S |r|                     |||          S n# t@          $ r Y nw xY w| !                    |          }t          |dd|||           |rt          ||          sJ |S )Nr   TF)r  r  r  r  r  zHthe MutationLayoutSHOULDREMOVE's real layout shouldn't be FlexibleLayoutr  )"r)  r   rb   r   r,  r-  r   r  r  r   rP   r   r   
size_hintsr   r  r  r7  r   r=  rB  r'  r  rX   r  r  r  r  r  r   require_stride_orderrequire_exact_stridesr  r  )r  r   r   r  r  s        rv   require_strideszExternKernel.require_strides  s     M$=$=$=;;==AH ## @	Q\\^^_== (LLNN' Q\\^^_== (!,,...99 = ! *#(- >aGG&#%5G,778MNN& & & #&3
 
 
 
 H *#(-%)&3&3    HALLNNK88 <<>>;;EBB " 2%q||~~'<ajjll 	 ALLNN,FGG allnn88::NKK (b     : : < <kJJ  ||~~99;;MMeTT & 6)LLNN6688?JJLL 	 H a%% 				||~~77>>		 		 .!1<<>>#8!**,, 			 Hq)$$	168,,	 qv77	 &ammoo66		
 q}}35FGG	88@@ 335 4    # 44= 5    '    NN1!''	
 	
 	
 	
  	@5a?????s   8Q/ Q/ /
Q<;Q<c                2    |                      |||          S )N)r  r  r  )r  r   r  r  s       rv   r  z"ExternKernel.require_exact_strides   s&    ""]- # 
 
 	
rw   c                2    |                      |||          S )N)r   r  r  )r  r   r   r  s       rv   r
  z!ExternKernel.require_stride_order  s    ""1E"OOOrw   c                8    |                      |t                    S r   )r
  r  rc  s     rv   require_channels_lastz"ExternKernel.require_channels_last
  s    ''+<===rw   c                8    |                      |t                    S r   )r
  r  rc  s     rv   require_channels_last_3dz%ExternKernel.require_channels_last_3d  s    ''+=>>>rw   c                    |                      |t          t          t          t	          |                                                                                  S r   )r
  rc   r  r   r   r   rc  s     rv   require_contiguouszExternKernel.require_contiguous  s@    ''4s1::<<?P?P9Q9Q0R0R+S+STTTrw   c                    d S r   rx   r  s    rv   r  zExternKernel.apply_constraint  r|  rw   r  Optional[List[str]]c                   t           j        j        r0g }d }|rG| j        r@t	          | j                  t	          |          k    s
J d            d | j        D             }t          | j                  D ]\  }}|5|                    ||                   }|r|                    d          nd }nXt	          | j                  |z   }| j        r8|t	          | j                  k     r | j        |                             d          nd }|	                    t           j        j
                            ||                     |S t          t           j        j
        j        | j                  S )NzDnames passed to codegen_const_args does not match self.constant_argsc                :    i | ]}|                     d           |S rf  r  rr  s     rv   r   z3ExternKernel.codegen_const_args.<locals>.<dictcomp>%  s1     * * *-0CGGFOOS* * *rw   rs   )rP   r   r  r  r   r  r   r  r  r  r  val_to_arg_strr  )	r  r  rW  name_to_arg_propertiesr   r   proptype_r   s	            rv   codegen_const_argszExternKernel.codegen_const_args  s   7 	PF
 &*" , 4-..#3 3   Y  * *484G* * *& "$"455  1)5155eAh??D04>DHHV,,,$EEdk**Q.C  ."36T=P9Q9Q3Q3Q +C044V<<<! 
 G(775AA    Mqw+:D<NOOOrw   c                   g }t          | j                  D ]\  }}t          |t                    r;d |D             }dd                    |           d}|                    |           Vt          j        j        r| j	        r|t          | j	                  k     s
J d            | j	        |                             d          }|                    t          j        j                            ||                     |                    |                                           |                    |                                            |S )Nc                6    g | ]}|                                 S rx   r6  r  s     rv   r   z-ExternKernel.codegen_args.<locals>.<listcomp>?  s$    :::1,,..:::rw   [r.  ]z-Invalid access to ExternKernel.arg_propertiesrs   )r   r  rb   rc   r  r  rP   r   r  r  r   r  r  r  r6  r  r  )r  r~   r   r   r  r6  r  s          rv   codegen_argszExternKernel.codegen_args;  se   dk** 	7 	7DAq!T"" 7:::::$;		%(8(8$;$;$;!-....7& 7. G1s+8 8 4 4 4F4 4 4 !/266v>>EKK,;;u     KK 3 3 5 56666D++--...rw   c                   || j         v r| j                             |          S | j        rG| j                            |          r-| j                            |                              d          S t          | d          )Nr  z not in self.allarg_properties)r   r  r  r'  )r  r  s     rv   get_kwargs_valuezExternKernel.get_kwargs_valueR  s    t{"";??8,,,! 	Nd&<&@&@&J&J 	N)--h77;;OLLL H!L!L!LMMMrw   c                   t           j        j        rg }| j        D ]}|r|dk    r|                     |          }t          |t          j                  r|                    |           P| j	        r6|| j	        v r-| j	        
                    |          
                    d          nd }|                    t           j        j                            ||                     n#d | j                                        D             }|S )Nr   rs   c                f    g | ].\  }}| d t           j        j                            |           /S r  rP   r   r  r  )r   kr  s      rv   r   z/ExternKernel.codegen_kwargs.<locals>.<listcomp>q  sM       Aq ??qw+::1==??  rw   )rP   r   r  r  r'  rb   rm   r   r  r  r  r  r  r   r  )r  skip_outr   r  r  r  s         rv   codegen_kwargszExternKernel.codegen_kwargsZ  s3   7 	F >   E 1 1))(33a,, MM!$$$$  1"6>$BX6X6X .228<<@@HHH! 
 MM,;;u    (  K--//  F rw   c           	        t           j        rt          j        j        st          |                                           dk    rd S t          j        j                            |                                           }t          j        j                            | 	                                          }|
                    d|                                  d| d| d           d S d S d S )Nr   zassert_size_stride(r.  r  )r3   size_assertsrP   r   r  rL   r   r  codegen_shape_tupler;  r  r8  )r  r  r   r   s       rv   codegen_size_assertsz!ExternKernel.codegen_size_assertsw  s     	qw': 	T]]__--227';;DMMOOLLDW)==doo>O>OPPFJdmmooJJJJJJJ    	 	 	 	rw   c                ^    |                                  }|                                 }|g g|fS )zD
        get output sizes and strides, for template_codegen
        )r   r;  )r  _sizer  s      rv   get_group_stridezExternKernel.get_group_stride  s/     //##r{G##rw   c           	     f   t           j        j        |                                 }|                                 }fd|D             }d t          t          |                    D             t          t          t          |                    |j        d          }d t          |          D             fdt          t                              D             }fd|D             | 
                                } |          }t           j        j                            ||g          \  }}}	t          d          \  }
t          t           |fd	|D                                           }t          t!          j        |          |          }|t%          |          fS )
zC
        Manually get canonicalization of the output index
        c                :    g | ]}                     |          S rx   )r   )r   r   r   s     rv   r   z-ExternKernel.canonicalize.<locals>.<listcomp>  s'    :::Q8%%a((:::rw   c                2    g | ]}t          d |           S )d)rJ   r  s     rv   r   z-ExternKernel.canonicalize.<locals>.<listcomp>  s'    MMMa(Q11MMMrw   T)rc  rV  c                    i | ]\  }}||	S rx   rx   r   s      rv   r   z-ExternKernel.canonicalize.<locals>.<dictcomp>  s    BBBxsC#sBBBrw   c                     g | ]
}|         S rx   rx   r   s     rv   r   z-ExternKernel.canonicalize.<locals>.<listcomp>  s    777q777rw   c                     g | ]
}|         S rx   rx   )r   r   r  s     rv   r   z-ExternKernel.canonicalize.<locals>.<listcomp>  s    333jm333rw   cc                &    g | ]} |          S rx   rx   )r   r   add_vars     rv   r   z-ExternKernel.canonicalize.<locals>.<listcomp>  s!    3R3R3R1GGAJJ3R3R3Rrw   )rP   r   r   r   r;  r   r   r  r  r   r?  r  r:   re   r   rM   rm   r  rd   )r  r  r  index_orderr   r  r   	new_sizesr   r  r   replacementr>  r  r   r   s               @@@@rv   canonicalizezExternKernel.canonicalize  s   
 7#//##::::':::MM5U;L;LMMM
U3w<<00g6ISWXXXBB9[+A+ABBB7777E#f++$6$67773333U333
##%%
##$%G$4$D$Dw%
 %
!	7E !%%
73z773R3R3R3R	3R3R3R+S+STTUU5<..<<eI&&&&rw   c                    t                      }| j        D ]}|t          |          z  }| j                                        D ]}|t          |          z  }|S r   )r-   r  maybe_free_unbacked_symbolsr   rf   )r  r  rs  s      rv   rB  z%ExternKernel.get_unbacked_symbol_uses  sk     '1ll% 	2 	2C,S111AA;%%'' 	2 	2C,S111AArw   r{   c                     t           dd           }d|g}| fdt          j                   D             z  }|                    d j                                        |          S )Nr  zpython_kernel_name=c                N    g | ]!}|j          d t          |j                    "S r  )rz   r   )r   fieldr  s     rv   r   z(ExternKernel.__str__.<locals>.<listcomp>  sE     
 
 
 z77GD%*5577
 
 
rw   r  )r   dataclassesfieldsr  rS  r  )r  kernel_namer  s   `  rv   r  zExternKernel.__str__  s    d$8$??1+11
 	 
 
 
 
$+D11
 
 
 	
 	8D$488999u%%%rw   rx   NNNNrx   Nrv  rw  r*  r   r  r   )r  r   )r]   r  )NNF)r   r  r  r  )r  r  ru  )7r  rE  rF  r  rG  rH  rG  re   r   r  r  r  rc   r  r  r  r  r  r  r%  rm  rp  r  r  r  r  r  r  r  r  rH  r  r  r  r   rB  r  r  r  r
  r  r  r  r  r  r%  r'  r-  r1  r4  rB  rB  r  r  r  r  s   @rv   rA  rA    sU        %'M''''.[.tDDDFDDDD-1K1111(,,,,,%)O)))) 4E;3D4 4 4!    
 	      6:N9999<@@@@@<MK<M= = =     .?[->t-T-T-TTTTT &(, , , , , ,<. . . .        D       D! ! !
* * *
" " "$$ $$ $$ $$ $$L   	
 	
 	
 
 
 \
 ^
 ^
 ^
 [^
@ A
 A
 [A
F ! ! [!< ! ! [!  *.6:x x x x [xt 
 
 
 [

 P P P [P > > [> ? ? [? U U [U   P  P  P  P  PD  .N N N   :	 	 	$ $ $' ' '>   
& 
& 
& 
& HHHHHrw   rA  c                  :     e Zd Zd Z	 	 	 	 	 	 	 d fd	Zd Z xZS )ExternKernelOutc                   |                      |           g |                                 |                     d          }|                                 }t          j        j        r| j        dk    rt          j	        rdnd}n|                                 }|
                    ||                                 | j        r| j                                        nd |           d S )NT)r,  ztorch::inductor::_mm_plus_mmaoti_torch__mm_plus_mm_outz torch::inductor::_mm_plus_mm_out)r  r%  r-  r  rP   r   r  r  r3   r  generate_extern_kernel_outr6  r  )r  r  r~   rJ  s       rv   r  zExternKernelOut.codegen  s    W%%%J""$$Jt':':D':'I'IJ**,,G	1$(FFF
 (8,,7 K ..00K**""$$484DND..000$		
 	
 	
 	
 	
rw   rx   Nc
                   t                                          d ||                     |          ||pi d ||||	
  
         t          j                            |           | _        t          j                            |            d S r   r  r%  r  rP   r   r  rz   r  )r  r  r  r  r   r  r  r  r  r  r  s             rv   r%  zExternKernelOut.__init__  s     	''Lb)	
 	
 	
 G++D11		""4(((((rw   c                    dS r  rx   r  s    rv   r}  zExternKernelOut.should_allocate  r  rw   rK  )r  rE  rF  r  r%  r}  r  r  s   @rv   rN  rN    sr        
 
 
6 &() ) ) ) ) )6      rw   rN  c                        e Zd Zd fdZ xZS )RandomSeedscountrq   r   rz  c                   t          j        t           j                  }t                                          t          |t           j        |g          g |j        |j        |ggdt          j	        rdndt          j        j                   d S )Nr  zaten.randint.low_outzat::_ops::randint_low_out::callzat::randint_out)r  r  r  r  r  r  )rg   r  r  r  r%  r  r  r  r3   r  r  randintlow_out)r  rW  r   limitsr  s       rv   r%  zRandomSeeds.__init__  s    U[))kW  
 !:vzE7;5
 $#==", 	 	
 	
 	
 	
 	
rw   )rW  rq   r   rz  rV  r  s   @rv   rV  rV    s=        
 
 
 
 
 
 
 
 
 
rw   rV  c                  >     e Zd Zd Z	 	 	 	 	 	 d fd	Zd Zd Z xZS )r  c                0   |                      |           g |                                 |                                 }t          j        j                            | |           t          | j        t                    r| 
                    |           d S d S r   )r  r%  r-  rP   r   r  generate_extern_kernel_allocrb   r  ra  r1  r  r  r~   s      rv   r  zExternKernelAlloc.codegen  s    W%%%=""$$=t':':'<'<=	99$EEEdk6** 	/%%g.....	/ 	/rw   rx   Nc	                   t                                          d ||                     |          ||pi d ||||
  
         t          j                            |           | _        t          j                            |            d S r   rS  )
r  r  r  r  r   r  r  r  r  r  s
            rv   r%  zExternKernelAlloc.__init__  s     	''Lb)	
 	
 	
 G++D11		""4(((((rw   c                    dS r[  rx   r  s    rv   r}  z!ExternKernelAlloc.should_allocate3  r]  rw   c                    t           r   r  r  s    rv   r  z"ExternKernelAlloc.apply_constraint6  r  rw   )rx   NNNrx   N)r  rE  rF  r  r%  r}  r  r  r  s   @rv   r  r    s~        / / / &() ) ) ) ) )4  " " " " " " "rw   r  c                  8     e Zd ZdZd	 fdZd
dZd Zd Z xZS )r  zP
    An output buffer that represents the mutation of a pre-existing buffer
    mutating_noderM  c                   t                                          d |           |                                }t          j                            |           |g| _        || _        t          j                            |           | _	        d S r  )
r  r%  r8  rP   r   r@  mutation_namesrd  r  rz   )r  r  mutated_noderd  mutated_node_namer  s        rv   r%  zMutationOutput.__init__?  sw    d6222(1133	##$566601(5G++D11			rw   r]   c                    | j         S r   )rd  r  s    rv   r  zMutationOutput.get_defining_opG  s    !!rw   c                    | j         S r   )rf  r  s    rv   rw  z!MutationOutput.get_mutation_namesJ  rC  rw   c                    dS r[  rx   r  s    rv   r}  zMutationOutput.should_allocateM  r]  rw   )rd  rM  r  )	r  rE  rF  r  r%  r  rw  r}  r  r  s   @rv   r  r  :  sy         2 2 2 2 2 2" " " "# # #      rw   r  c                  N     e Zd Zd Zd Zd fdZddZ fdZdd	ZddZ	 xZ
S )UserDefinedTritonKernelc                    ddl m} ddlm} |                    | j                  }g }t          ||          r|j        }|j        }||fS )Nr   )	Autotuner)kernel_side_table)	triton.runtime.autotunerro  *torch._higher_order_ops.triton_kernel_wraprp  
get_kernel
kernel_idxrb   configsr   )r  ro  rp  r  ru  s        rv   get_kernel_and_configsz.UserDefinedTritonKernel.get_kernel_and_configsR  sj    666666PPPPPP"--do>>fi(( 	nGYFwrw   c                                                      \  }}|                    || j                  \  }} fd j        D             }g }t	           j                  D ];\  }}	|j                            |	          |j        v r|                    |           < 	                    |           |
                    || j        |||           d S )Nc                :    g | ]}                     |          S rx   )r'  )r   r+  r  s     rv   r   z3UserDefinedTritonKernel.codegen.<locals>.<listcomp>e  s4     
 
 
)*D!!!$$
 
 
rw   )rv  !define_user_defined_triton_kernelr   r  r   	arg_namesr   
constexprsr  r  #generate_user_defined_triton_kernelgrid)
r  r  r  ru  new_nametriton_metaraw_argsconstexpr_indicesr   kwargs
   `         rv   r  zUserDefinedTritonKernel.codegen^  s
   5577 !( I IGT[!
 !
+
 
 
 
.2.P
 
 
 #D$FGG 	. 	.JC%%e,,0AAA!((--- 	W%%%33h	7KAR	
 	
 	
 	
 	
rw   r]   rn  c                n    t                                                      t          | j                  z  S r   )r  rB  r(   r}  r  s    rv   rB  z0UserDefinedTritonKernel.get_unbacked_symbol_usesw  s+     ww//114I$)4T4TTTrw   c                    t                      S r   r,   r  s    rv   rp  z0UserDefinedTritonKernel.get_unbacked_symbol_defs|  rq  rw   c                   g }i }g }                                 D ]|\  }}t          |t                    rHt                                                   |                    }	|                    |	           |	||<   b|                    |           |||<   }t          |          dk    sJ |d                                          _	        t                                          d t           j	                  |t          |          |           | _        | _                                         \  }
}fd|
j        D              _        ddlm} t          |          dk    r|d         j        ni }fd ||
i |          D              _         fd j        D              _        t0          j                                        d S )Nr   c                    g | ]}|v |	S rx   rx   )r   rs  kernel_argss     rv   r   z4UserDefinedTritonKernel.__init__.<locals>.<listcomp>  s*     .
 .
 .
sk/A/AC/A/A/Arw   )identify_mutated_tensorsc                     g | ]
}|         S rx   rx   )r   rc  r  s     rv   r   z4UserDefinedTritonKernel.__init__.<locals>.<listcomp>  s.     
 
 
 
 
 
rw   c                V    g | ]%}t          t          j                  |          &S rx   )r  r7  r   )r   r  r  s     rv   r   z4UserDefinedTritonKernel.__init__.<locals>.<listcomp>  s?     !
 !
 !
 :dk22C>>!
 !
 !
rw   )r  rb   rX   r  ra  rB  r  r   r   r   r  r%  r7  rd   rt  r}  rv  rz  r  rr  r  r   mutable_argsr  rP   r   r  )r  rt  r}  r  r  r   r  r+  r  r   r  ru  r  autotuned_kwargsr  s   `  `          rv   r%  z UserDefinedTritonKernel.__init__  s,   %%'' 	 	DAq!Y''  99$:L:LQ:O:OPPa   q		$$Q'''q		6{{aQi**,,t{##-  	
 	
 	
 %	5577.
 .
 .
 .
!+.
 .
 .
* 	XWWWWW03Gq0@0@71:,,b
 
 
 
//;;;*:; 
 
 
!
 !
 !
 !
(!
 !
 !
 	
""4(((((rw   rk  c                *    t          | j                  S r   )rc   r  r  s    rv   rm  z#UserDefinedTritonKernel.get_outputs  s    D)***rw   rz  c                    | j         S r   r  r  s    rv   r   z"UserDefinedTritonKernel.get_device  r  rw   rw  rv  )r]   rz  )r  rE  rF  rv  r  rB  rp  r%  rm  r   r  r  s   @rv   rm  rm  Q  s        
 
 

 
 
2U U U U U U
   .) .) .) .) .)`+ + + +       rw   rm  c                  <     e Zd ZdZd Zd Zd Zd	dZ fdZ xZ	S )
InplaceBernoulliFallbackE
    This needs to be a custom class to handle mutation properly
    c                   d | j         D             \  }t          j        j        rpt          j        rd|                    |                                  d| dd                    t          t          | j                             d|j                    d S |                    |                                  d| dd                    t          t          | j                             d|j                    d S )Nc              3  >   K   | ]}|                                 V  d S r   r"  r   r   s     rv   rh  z3InplaceBernoulliFallback.codegen.<locals>.<genexpr>  s.      ;;!##%%;;;;;;rw   r  r.  z, NULL)r  )r  rP   r   r  r3   r  r  r  r  r  reprr  ending)r  r  r   s      rv   r  z InplaceBernoulliFallback.codegen  s   ;;t{;;;7 		6#8 		 ''))rrArr3tTEW;X;X1Y1Yrrbibprr     ''))llAll3tTEW;X;X1Y1Yll\c\jll    rw   c                    dS r[  rx   r  s    rv   r}  z(InplaceBernoulliFallback.should_allocate  r]  rw   c                B    | j         d                                         gS r+  r  r8  r  s    rv   rw  z+InplaceBernoulliFallback.get_mutation_names      A''))**rw   r]   rn  c                    t                      S r   r,   r  s    rv   rp  z1InplaceBernoulliFallback.get_unbacked_symbol_defs  rq  rw   c                   t                                          d t          |                                          |                     |g          ||           t
          j                            |                                           t
          j        	                    |           | _
        t
          j                            |            t          j        s	d| _        d S d S )Nr  zat::native::bernoulli_)r  r%  r7  r   r  rP   r   r@  r8  r  rz   r  r3   r  r  )r  r  r   r  r  s       rv   r%  z!InplaceBernoulliFallback.__init__  s    q||~~&&$$# 	 	
 	
 	
 	
##AJJLL111G++D11		""4((($ 	<#;D   	< 	<rw   rw  
r  rE  rF  r  r  r}  rw  rp  r%  r  r  s   @rv   r  r    s             + + +   < < < < < < < < <rw   r  c                  V     e Zd ZdZd Zd Zd ZddZ fdZe	ddd            Z
 xZS )InplaceCopyFallbackr  c                b    |                                  \  }}}|                    ||           d S r   )r%  codegen_device_copy)r  r  rN  rM  non_blockings        rv   r  zInplaceCopyFallback.codegen  s6    #'#4#4#6#6 c<##C-----rw   c                    dS r[  rx   r  s    rv   r}  z#InplaceCopyFallback.should_allocate  r]  rw   c                B    | j         d                                         gS r+  r  r  s    rv   rw  z&InplaceCopyFallback.get_mutation_names  r  rw   r]   rn  c                    t                      S r   r,   r  s    rv   rp  z,InplaceCopyFallback.get_unbacked_symbol_defs  rq  rw   c                f   t                                          d |||dt          j        rdnd           t          j                            |d                                                    t          j                            |           | _	        t          j        
                    |            d S )Nz
aten.copy_aoti_torch_copy_zat::_ops::copy_::call)r  r  r   )r  r%  r3   r  rP   r   r@  r8  r  rz   r  )r  r  r  r  r  s       rv   r%  zInplaceCopyFallback.__init__  s     	+&,&;X""AX 	 		
 		
 		
 	
##F1I$6$6$8$8999G++D11		""4(((((rw   Fr  r   c                      fd||fD             }|f}t          t          |                                          ||          }|S )Nc                :    g | ]}                     |          S rx   r  )r   r   r  s     rv   r   z.InplaceCopyFallback.create.<locals>.<listcomp>  s'    ;;;1###A&&;;;rw   )r  r7  r   )r  rN  rM  r  r  r  rW  s   `      rv   r  zInplaceCopyFallback.create	  sX    ;;;;c
;;;%$s~~''((
 

 rw   rw  r*  )r  r   )r  rE  rF  r  r  r}  rw  rp  r%  r  r  r  r  s   @rv   r  r    s         . . .  + + +   ) ) ) ) )(     [    rw   r  c                  2    e Zd ZdZd Zd Zd Zd
dZd Zd	S )MutatingFirstArgExternKernelr  c                    g d | j         D             t          t          | j                  }|                    |                                  dd                    |           d|j                    d S )Nc              3  >   K   | ]}|                                 V  d S r   r"  r  s     rv   rh  z7MutatingFirstArgExternKernel.codegen.<locals>.<genexpr>  s.      99a!!##999999rw   r  r.  r  )r  r  r  r  r  r  r  r  )r  r  argrefss      rv   r  z$MutatingFirstArgExternKernel.codegen  s    
99T[999
t)**
 	##%%MM		'(:(:MMW^MM	
 	
 	
 	
 	
rw   c                    dS r[  rx   r  s    rv   r}  z,MutatingFirstArgExternKernel.should_allocate#  r]  rw   c                B    | j         d                                         gS r+  r  r  s    rv   rw  z/MutatingFirstArgExternKernel.get_mutation_names&  r  rw   r]   rn  c                    t                      S r   r,   r  s    rv   rp  z5MutatingFirstArgExternKernel.get_unbacked_symbol_defs)  rq  rw   c                    dS r  rx   r  s    rv   has_side_effectsz-MutatingFirstArgExternKernel.has_side_effects,  r  rw   Nrw  )	r  rE  rF  r  r  r}  rw  rp  r  rx   rw   rv   r  r    sn         
 
 
  + + +       rw   r  c                       e Zd Z fdZ xZS )ResizeStorageBytesc                r   t          |t                    s
J d            t                                          d t	          |                                          |                     |g          |f           t          j        	                    |
                                           t          j                            |           | _        t          j                            |            d| _        d| _        t          j        j                            |j        
                                           d S )NzTODO: dynamic shapes)r  z"inductor_ops.resize_storage_bytes_z&torch::inductor::resize_storage_bytes_)rb   rq   r  r%  r7  r   r  rP   r   r@  r8  r  rz   r  r  r  never_reuse_buffersr  r  )r  variabler  r  s      rv   r%  zResizeStorageBytes.__init__1  s   (C((@@*@@@@x**,,--
++#+	 	 	
 	
 	
 	
##H$5$5$7$7888G++D11		""4((("FG	#''(>(>(@(@AAAAArw   rV  r  s   @rv   r  r  0  sA        B B B B B B B B Brw   r  c                  $     e Zd Z fdZd Z xZS )SetSourceTensorKernelc                   |                                  t                                          |                                ||gdt          j        j        j        j                   t          j
        j                            |j                                                   t          j
        j                            |                                           t          j
        j                            |                                            |                                }t!          t#          |          ||           t!          t#          |          ||           g| _        d S )Nz!torch.ops.aten.set_.source_Tensor)r  r  )r  r  r%  r   rg   rN   r  set_source_TensorrP   r   r  r  r  r8  r   r  r7  r  )r  self_tensorstorage_tensorr   r  s       rv   r%  zSetSourceTensorKernel.__init__B  s   !!###""$$.)B	+9	 	 	
 	
 	
 	
#''(8(A(A(C(CDDD	#''(?(?(A(ABBB	#''888**,,:f--{DAA:f--~tDD!
rw   c                ~    | j         d                                         | j         d                                         gS r  r  r  s    rv   ru  z2SetSourceTensorKernel.get_inputs_that_alias_outputS  s1    A''))4;q>+B+B+D+DEErw   )r  rE  rF  r%  ru  r  r  s   @rv   r  r  A  sN        
 
 
 
 
"F F F F F F Frw   r  c                  F     e Zd ZdZd Zd Zd ZddZdd	d
d fdZ xZ	S )ScatterFallbackz
    This needs to be a custom class to handle mutation properly.
    This class handles both aten.scatter_ and aten.scatter_reduce_.
    It also handle the case `src` being a scalar properly.
    c           
     v   | j         d         }t          j        j        rddd}||v r||         }| j        rd | j        D             \  }}}n!d | j        D             \  }}| j        d         }|                    ||| j        d         ||g| j        | j	        | j        || 
                                           d S )	Nr  r  r  )r  multiplyc              3  >   K   | ]}|                                 V  d S r   r"  r  s     rv   rh  z*ScatterFallback.codegen.<locals>.<genexpr>g  s.      JJq2244JJJJJJrw   c              3  >   K   | ]}|                                 V  d S r   r"  r  s     rv   rh  z*ScatterFallback.codegen.<locals>.<genexpr>i  s.      EEA!--//EEEEEErw   r2   r   )r   rP   r   r  src_is_tensorr  r  generate_scatter_fallbackr  r  r-  )r  r  r  get_operator_enumr   r   rM  s          rv   r  zScatterFallback.codegen^  s    X&7 	3(-6 B B****62 	(JJdkJJJOQssEEEEEJQ$Q'C))"1%uc2 #!!	
 	
 	
 	
 	
rw   c                    dS r[  rx   r  s    rv   r}  zScatterFallback.should_allocateu  r]  rw   c                B    | j         d                                         gS r+  r  r  s    rv   rw  z"ScatterFallback.get_mutation_namesx  r  rw   r]   rn  c                    t                      S r   r,   r  s    rv   rp  z(ScatterFallback.get_unbacked_symbol_defs{  rq  rw   NTr  include_selfr2  rq   r  r   r  r   c          
     `    t          |t                     _         j        r fd|||fD             }|f}	n fd||fD             }||f}	t                                          d t          |                                                               |          |	||dt          |          ddg|           t          j
                            |                                           t          j
                                        _        t          j
                                        d S )Nc                :    g | ]}                     |          S rx   r  r   r   r  s     rv   r   z,ScatterFallback.__init__.<locals>.<listcomp>  s'    FFFt))!,,FFFrw   c                :    g | ]}                     |          S rx   r  r  s     rv   r   z,ScatterFallback.__init__.<locals>.<listcomp>  s'    AAAt))!,,AAArw   r  r  r  )r  r  r  )rb   rX   r  r  r%  r7  r   r  r{   rP   r   r@  r8  r  rz   r  )r  r  r   r2  r   rM  r  r  tensorsr  r  s   `         rv   r%  zScatterFallback.__init__~  s2    (Y77  	'FFFFq%oFFFG FMMAAAAq%jAAAG #JMq||~~&&((|<<";//+3^*D# 	 		
 		
 		
 	
##AJJLL111G++D11		""4(((((rw   rw  )r2  rq   r  r   r  r   r  r  s   @rv   r  r  W  s         
 
 
.  + + +    !%!!) !) !) !) !) !) !) !) !) !) !) !)rw   r  c                  <     e Zd ZdZd Zd Zd Zd	dZ fdZ xZ	S )
IndexPutFallbackzQ
    This needs to be a custom class to handle mutation and indices properly
    c                   d | j         D             ^}}}g }t          |          }t          | j                  D ]^\  }}| j        |         #|                    t          |                     5|                    t          j        j        j	                   _ |j
        |                                 |||g|                                 R   d S )Nc              3  >   K   | ]}|                                 V  d S r   r"  r  s     rv   rh  z+IndexPutFallback.codegen.<locals>.<genexpr>  s.      &R&Rq':':'<'<&R&R&R&R&R&Rrw   )r  r  r   r  r  r  rP   r   r  r  generate_index_put_fallbackr  r  )	r  r  r   rf   valid_indicesr  iter_valid_indicesr   r   s	            rv   r  zIndexPutFallback.codegen  s    &R&Rdk&R&R&R#F]!-00dl++ 	> 	>DAq|A*t$6778888qw3<====++  ""Aw	
9=9P9P9R9R	
 	
 	
 	
 	
 	
rw   c                    dS r[  rx   r  s    rv   r}  z IndexPutFallback.should_allocate  r]  rw   c                B    | j         d                                         gS r+  r  r  s    rv   rw  z#IndexPutFallback.get_mutation_names  r  rw   r]   rn  c                    t                      S r   r,   r  s    rv   rp  z)IndexPutFallback.get_unbacked_symbol_defs  rq  rw   c           	     (    | _         d |D             } fd||g|D             }t          j        rdnd}t                                          d t          |                                                               |          |fd||           t          j	        
                     j        d                                                    t          j	                                        _        t          j	                                        d S )Nc                    g | ]}||S r   rx   r  s     rv   r   z-IndexPutFallback.__init__.<locals>.<listcomp>  s    ===qq}}}}rw   c                :    g | ]}                     |          S rx   r  )r   r   r  s     rv   r   z-IndexPutFallback.__init__.<locals>.<listcomp>  s'    NNNQ4%%a((NNNrw   aoti_torch_index_put_outzat::index_put_outzaten.index_put_)r  r  r  r   )r  r3   r  r  r%  r7  r   r  rP   r   r@  r  r8  r  rz   r  )
r  r  r   r  rf   
accumulater  r  r  r  s
   `        rv   r%  zIndexPutFallback.__init__  s   ==G===NNNN1f2M}2MNNN*0*?X&&EX 	 	q||~~&&((M0+# 	 	
 	
 	
 	
##DKN$;$;$=$=>>>G++D11		""4(((((rw   rw  r  r  s   @rv   r  r    s         
 
 
  + + +   ) ) ) ) ) ) ) ) )rw   r  c                  *    e Zd Zed             Zd ZdS )
DeviceCopyc                :   |                                 sQt          d |                                D                       r&t          j        j        s|                    |          S t          j        	                    |           t          j        	                    |
                                           t          d           t          t          ||                                |                                          |                     |          g          S )Nc              3  <   K   | ]}|t           j        j        v V  d S r   )rP   r   r  r  s     rv   rh  z$DeviceCopy.create.<locals>.<genexpr>  s,      GGqA**GGGGGGrw   zDeviceCopy in input programr  )r\  r  r  r3   aot_inductoruse_runtime_constant_foldingr  rP   r   add_device_infor   rE   r  r   r   r   rB  )r  r   r   s      rv   r  zDeviceCopy.create  s     	0GGA4D4D4F4FGGGGG	0 'D	0
 ''///	'''	///7888kkmmZZ\\  
 q!!"
 
 	
rw   c                ,   |                                  }t          |          dk    sJ | j        r5|                    |d         | j                                                   d S |                    |d         |                                            d S r  )r%  r   r  r  r6  r_  s      rv   r  zDeviceCopy.codegen  s      ""4yyA~~~~ 	K''Q1A1S1S1U1UVVVVV''Q1G1G1I1IJJJJJrw   N)r  rE  rF  r  r  r  rx   rw   rv   r  r    sA        
 
 [
*K K K K Krw   r  c                  <     e Zd ZdZd Zd Z fdZd	dZd Z xZ	S )
rk   z;
    The result of a call to aten._local_scalar_dense.
    c                    dS r  rx   r  s    rv   r9  zDynamicScalar.get_reads  r  rw   c                    dS r[  rx   r  s    rv   r}  zDynamicScalar.should_allocate  r]  rw   c                    |                                  t                                          d t          t	          j        d                    |                     |g                     || _        || _        d S r   )	r2  r  r%  r7  rg   r   r  symkeypath)r  r  r  r  r  s       rv   r%  zDynamicScalar.__init__  sb    z%,u*=*=>>@S@SUYTZ@[@[\\\rw   r]   rn  c                ,    t          | j        g          S r   )r-   r  r  s    rv   rp  z&DynamicScalar.get_unbacked_symbol_defs  s    48*%%%rw   c                0    |                     |            d S r   )codegen_dynamic_scalarr  s     rv   r  zDynamicScalar.codegen  s    &&t,,,,,rw   rw  )
r  rE  rF  r  r9  r}  r%  rp  r  r  r  s   @rv   rk   rk     s                 & & & &- - - - - - -rw   rk   c                  @     e Zd ZdZd Zd Z fdZd Zd Zd Z	 xZ
S )rl   z5
    The result of a call to aten._assert_scalar
    c                    dS r  rx   r  s    rv   r9  zAssertScalar.get_reads  r  rw   c                    dS r[  rx   r  s    rv   r}  zAssertScalar.should_allocate  r]  rw   c                    t                                          d t          t          j        d                    g            || _        || _        d S r   )r  r%  r7  rg   r   scalarr  )r  r  r  r  s      rv   r%  zAssertScalar.__init__  sN    u|E**++	
 	
 	
 rw   c                    dS r  rx   r  s    rv   r  zAssertScalar.has_side_effects!  r  rw   c                *    t          | j                  S r   )r(   r  r  s    rv   rB  z%AssertScalar.get_unbacked_symbol_uses$  s    $T[111rw   c                Z   t           j        j        rd S |                    dt           j        j                            | j        d           d           |                    dt          | j                   d           |                    | 	                                 d           d S )Nzif not F)r  :z    raise RuntimeError(r  z = None)
rP   r   r  r  r  codegen_python_sizevarr  r  r  r8  r  s     rv   r  zAssertScalar.codegen'  s    7 	;D e!'.EEdk\aEbbeee   ITXIIIJJJ 999:::::rw   )r  rE  rF  r  r9  r}  r%  r  rB  r  r  r  s   @rv   rl   rl     s             	 	 	 	 	  2 2 2; ; ; ; ; ; ;rw   rl   c                  $    e Zd ZU ded<   ded<   dS )ExternKernelNoder{   rz   zexport_schema.Nodert   Nr  rE  rF  rG  rx   rw   rv   r   r   ;  s'         IIIrw   r   c                       e Zd Z	 ddd fdZd ZddZd Zed	             Zd
 Z	d Z
d Zd Zd Zedd            Zed             Z fdZ xZS )FallbackKernelNr  c                   |t           j        j        k    r7t          |          dk    r$t          |          dk    rt           j        j        }t                                          |t          |          t          |          |           g  _        d _	        | _
        t          |t          j        j        t          j        j        f          sJ d| dt!          |           d            | _        | _        |i n| _        t(          j                             j                   g  _        g  _        t           j        t          j        j                  rd S d j                                        v rd S  j        j        }t          j        j                             j                  r4 j                            |d                                                     d S |j!        r!tE          |          stG          d	|           |j$        }	                      j%         j&                  \  }
} fd
}t          j        j        '                    ||
|          D ]\  }} |||           d S )Nr2   r  Fz#Fails to create FallbackKernel for r/  z not supported_c10d_functionalr   z'NYI: Can't generate FallbackKernel for c                    t           j        t          j                  rt          |t          t
          f          sJ t           j        t          j                  o0t           j                                        t          j                  }t           j        t          j                  o0t           j                                        t          j                  }|st           j        t          j                  rt          |t
          t          f          rJ |d S  j	        d S  fd}|r|D ]} ||           d S t           j        t          j                  s|sJ  ||           d S )Nc                   j                             |                                            j        j        rJj                            t          t          |                                           |                      d S d S r   )	alias_namesr  r8  
alias_infois_writer  r  r7  r   )r   infor  s    rv   	add_aliaszPFallbackKernel.__init__.<locals>.handle_aliasing_and_mutation.<locals>.add_alias  s}     ''

555?+ )00&z!,,..'A'A1dKK     rw   )
rb   rs   rg   ListTyperc   rd   OptionalTypegetElementType
TensorTyper
  )r  rs  is_optional_tensoris_list_tensorr  
tensor_argr  s   `     rv   handle_aliasing_and_mutationz=FallbackKernel.__init__.<locals>.handle_aliasing_and_mutation  s   $)U^44 6!#e}55555!+	5-" " "KTY55779IJJ  (	5>BB z	((**E,<H HN " :Z	5;K%L%L : &cE4=99999{&       "% * *JIj))))* * "$)U-=>>TBTTTT	#rw   )(r  r  r  r   Scalarr  r%  rd   r  use_runtime_dispatchr  rb   rg   r  r  r  rs   r  r  r   rP   r   warn_fallbackr  r	  rf  rz   r  _libraryr3  mutates_and_returns_first_argr  r8  
is_mutabler   r  r  r  r  
zip_schema)r  r  r  r  nontensor_argsr  r   r  schemaschema_argsr~   r  r  rs  r  s   `             rv   r%  zFallbackKernel.__init__V  s    dho%%K  A%%N##q((
 X_F+.!!	 	 	
 	
 	
 ')$)!!2
%
.
 
 	X 	X XWW4<<WWW	X 	X 	X ","Nbb	d5666 '))+d&
(FGG 	 F!1!6!6!8!888
 F!) >==d>NOO 	&&{1~'>'>'@'@AAAF 	%;F%C%C 	%B&BB   &**4;8JKKf 	  	  	  	  	D -88vNN 	4 	4ID#((s3333	4 	4rw   c                N    t           d          sd S t          t          j        j        j         j                  }|sd S |                                D ]M\  }fd fd}|                    |	                    |           d |             |j
                    Nd S )Nr  c                2   |dk    r| S t          |          dk    rnt          |d         t                    rSt          |d         t          j                  r3 |  d|d         j         d|d         j         d|dd                    S t          |d         t                    r% |  d|d         j         d|dd                    S t          |d         t          j                  r\t          j        j	        r& d	|d         j         d
|  d|dd                    n$ |  d|d         j         d|dd                    S t          |d         t                    r% |  d|d         j         d|dd                    S t          d|           )Nrx   r   r   r2   r  r  r  z()z	std::get<z>(r#  r$  z.__floordiv__(zunrecognized keypath )r   rb   r%   r  SequenceKeyrz   r   rP   r   r  r'   r^  r'  )r  r  gos     rv   r#  z7FallbackKernel.codegen_unbacked_symbol_defs.<locals>.go  s   b==K LLA%%"71:}== &"71:v/ABB & 2EE'!*/EEGAJNEEEwqrr{    
M:: L2;;
;;;WQRR[III
F,>?? L 7.J@wqz~@@@@@'!""+NNNR4 ; ;'!*. ; ; ;WQRR[II
  
K88 L 2JJWQZ5GJJJGTUTVTVKXXX()J)J)JKKKrw   c                    t           j        j        rt          j        rt          j                  dk    r)  j        d                                                   S t          d         t          j
                  sJ   j        d         j                                                 dd                    S                                             S r  )rP   r   r  r3   r  r   r  r8  rb   r  r"  r   )r#  r  r  s   rv   go_outerz=FallbackKernel.codegen_unbacked_symbol_defs.<locals>.go_outer  s    7& 86+@ 8
 4<((A--!r$,q/":":"<"<gFFF)'!*f6HIIIII!r$,wqz~">"G"G"I"I7STSUSU;WWW2dmmoow777rw   z = )rT  r*   rP   r   r   r   r  r  r  codegen_unbacked_symbol_declr  )r  r  r  r   r%  r#  r  s   `    @@rv   codegen_unbacked_symbol_defsz+FallbackKernel.codegen_unbacked_symbol_defs  s   t011 	F5G&(>
 
 ! 	F+1133 -	 -	JAwL L L L L68 8 8 8 8 8 8 77::[[xxzz[7>[[   W-	 -	rw   r]   rn  c                    t          | dd           x}r6t          t          j        j        j        |                                          S t                      S )Nr  )r   r*   rP   r   r   r   r  r-   )r  r  s     rv   rp  z'FallbackKernel.get_unbacked_symbol_defs  sR     '.A4 H HH 	 , *,= dff <<rw   c                   t           j         G d d                      fd| j        D             }|                     || j                  \  }}t
          j        j        rdt          | j	        t          j        j                  r@|                     ||          }d t          | j	        j        j        |          D             }nd |D             }| j                            |           |S )Nc                  "    e Zd ZU ded<   ddZdS ))FallbackKernel.codegen_args.<locals>.Shimr   refr]   r{   c                    | j         S r   )r,  r  s    rv   r  z2FallbackKernel.codegen_args.<locals>.Shim.__repr__  s	    xrw   Nru  )r  rE  rF  rG  r  rx   rw   rv   Shimr+    s3         HHH           rw   r.  c                J    g | ]} |                                            S rx   r"  )r   r   r.  s     rv   r   z/FallbackKernel.codegen_args.<locals>.<listcomp>   s/    HHHqttA//1122HHHrw   c                h    g | ]/\  }}t           j        j                            ||j                  0S rx   )rP   r   r  r  r  )r   paramr   s      rv   r   z/FallbackKernel.codegen_args.<locals>.<listcomp>$  sC       E1 $33AuGG  rw   c                V    g | ]&}t           j        j                            |          'S rx   r*  r  s     rv   r   z/FallbackKernel.codegen_args.<locals>.<listcomp>)  s+    IIIqAG(77::IIIrw   )rH  	dataclassr  r  r  rP   r   r  rb   r  rg   r  r  r  r   r  r  r   r]  )r  r  r~   r   r.  s       @rv   r%  zFallbackKernel.codegen_args  s   			  	  	  	  	  	  	  
		  IHHHDKHHH**;8JKKf7 	J:d.>
@U#V#V 	J..tV<<D  #D$4$<$F M M  DD
 JIDIIID 	6"""rw   c                v   | rd | D             }|d         S t          |t          j                  r|j        S t          |t          t
          f          ret          d |D                       }d |D             }t          |          dk    r|d         S |D ]}t          |j	                  r|c S |d         S d S )Nc                ^    g | ]*}|                                 |                                 +S rx   )r   rr  s     rv   r   z.FallbackKernel.find_device.<locals>.<listcomp>2  s1    SSSC#..BRBRSs~~''SSSrw   r   c              3  L   K   | ]}t                               d |          V   d S r   )r  find_devicer  s     rv   rh  z-FallbackKernel.find_device.<locals>.<genexpr>7  sC       $ $89**433$ $ $ $ $ $rw   c                    g | ]}||S rx   rx   )r   r   s     rv   r   z.FallbackKernel.find_device.<locals>.<listcomp>;  s    AAA&&AvAAArw   r2   )
rb   rg   r  r   rc   rd   r-   r   rH   rs   )r  r  devices
device_setr   s        rv   r7  zFallbackKernel.find_device/  s     	SS;SSSG1:nel33 	)!((ntUm44 	# $ $=K$ $ $  J BAJAAAG7||q  qz!! " "&+&& "!MMM"1:trw   c                    t          | j        t          j        j                  rdS t          | j                                                  S r[  )rb   r  rg   r  r  r$   r  r  s    rv   r  zFallbackKernel.has_side_effectsD  s>    d&
(FGG 	5t/00;;===rw   c                    | j         S r   )r	  r  s    rv   ru  z+FallbackKernel.get_inputs_that_alias_outputI  r  rw   c                D    t          | j                  dk    sJ | j        S r  )r   rf  r  s    rv   rw  z!FallbackKernel.get_mutation_namesL  s'    4&''1,,,,""rw   c           	       
 t          | t                    sJ |                     | j        | j                  \  }|                     |          }fd| j        D             }t          j        j	        sg ||S t          d d           }|                    | j        |          }d 
| j        }|j        j        }t          |          dk    r |d         j        } 
|| j                  g}ndt          | j        t$                    sJ t          |          t          | j                  k    sJ 
fdt'          || j                  D             }t)          |                                 t-          j        | j                                        ||i                     }	t          j        j                            |	           g ||S )Nc                <    g | ]}                     |d           S r   r  )r   rc  r   s     rv   r   z<FallbackKernel.export_extern_kernel_node.<locals>.<listcomp>Z  s4     
 
 
&)FJJsD!!
 
 
rw   c                T   t          | t          j                  r|}t          |t          t          f          rt          |          dk    sJ |d         }t          j                            t          j	        |
                                                    S t          | t          j                  rVt          |                                 t          j                  r*t          j                            d |D                       S t          dt          |                      )Nr2   r   rf  )	as_tensorc                \    g | ])}t          j        |                                           *S )rf  )export_schemaTensorArgumentr8  )r   r   s     rv   r   zZFallbackKernel.export_extern_kernel_node.<locals>.handle_single_output.<locals>.<listcomp>t  s>           &4#,,..III     rw   )
as_tensorszUnsupported return type )rb   rg   r  rc   rd   r   rC  Argumentr  rD  r8  r  r  RuntimeErrorrs   )return_typeoutputr   s      rv   handle_single_outputzFFallbackKernel.export_extern_kernel_node.<locals>.handle_single_outpute  s(   +u'788 SftUm44 $v;;!++++ )C$-44+:OOO 5    K88 SZ**,,e.>> > S %-44   #)      5    ##Qd;>O>O#Q#QRRRrw   r2   r   c                8    g | ]\  }} |j         |          S rx   )r  )r   return_schemarI  rJ  s      rv   r   z<FallbackKernel.export_extern_kernel_node.<locals>.<listcomp>  s=          )M6 %$]%<fEE     rw   )r  r  r  metadata)rz   rt   )rb   r  r  r  r  r  r  rP   r   aot_moder   serialize_inputsr  r  returnsr   r  r  rd   r   r   r8  rC  Noderz   extern_kernel_nodesr  )r  r~   ordered_kwargs
serializernamed_argumentsr  rP  rH  output_argumentsrt   rJ  r   s             @@rv   export_extern_kernel_nodez(FallbackKernel.export_extern_kernel_nodeV  s
   $/////**4;8JKKf**488
 
 
 
-1-O
 
 
 w 	,+T+N++*466
$55d6FfUU	S 	S 	S. !.(w<<1!!*.K 4 4[$, O OP dlE22222w<<3t|#4#44444       -0$,-G-G     
  #',,..&(	  
 
 
 	
#**4000''''rw   c                   | j         }|j        dk    rxt          |t          j        j                  sJ t          j        j        rEddl	m
} t          j        r3t          |          |vr"t                              d|           d| _        ni|j        dk    rFt          |t          j        j                  sJ t          j        j        rt          j        sd| _        nt          j        j        rd| _        | j        r|                     |           d }d }t          j        r|                                 }n*g |                                 |                                 }|                    |                                 | j        | j        || j        | j        | j        | j         || j        
  
         n|                     |           g |                                 |                                 }t          j        j                            | |           t          | j        t@                    r| !                    |           | "                    |           d S )Nr  r   )inductor_fallback_opszG%s is missing a c-shim implementation, using proxy executor as fallbackT
_quantized)#r  r  rb   rg   r  r  rP   r   r  torchgen.aoti.fallback_opsrY  r3   r  r{   r  r  r  r  rW  r%  r-  6generate_extern_kernel_alloc_and_find_schema_if_neededr8  r  r  r  r  r  r  r  generate_fallback_kernelr  ra  r1  r'  )r  r  r  rY  exported_argsr~   s         rv   r  zFallbackKernel.codegen  sX   !v%%fej&;<<<<<w" 
5LLLLLL( 5S[[@U-U-U KKa   15D---fej&;<<<<<w" 5, 504D- w" 1,0)$ 	3  ))) MD$ F $ > > @ @E**,,Et/B/B/D/DEJJ'$"#-       )))AT&&((A4+>+>+@+@ADG 99$EEE$+v.. 3))'222))'22222rw   rI  r   c           	         t          | j        | j        t          |                                           t          |                                                     S r   )r  r   r   rC   r   r   )rI  s    rv   tensor_to_layoutzFallbackKernel.tensor_to_layout  sD    ML%fkkmm44%fmmoo66	
 
 	
rw   c                    t           j        f}||vrt          j        j        nt                      }|5    j        |g|R i |\  }}}}	}
d d d            n# 1 swxY w Y                        ||          }|  t          |          ||||	|
          n*|s
J d              t          |          ||||	|
           fd |g           }t          |t          t          t          f          r|_        n|g_        |S )Nr  z"Not sure where to find device infoc                b    t           t          t          f          r@ t                      fdt	          t                               D                       S t           t                    r" fd                                 D             S t           t          j	                  r$t                                                   S t           t                    r S t           t          j                  r j        j        S  J dt                      d            d S )Nc              3  b   K   | ])} |         t                    |fgz             V  *d S r   rs   )r   r   generate_outputr  rI  s     rv   rh  zAFallbackKernel.create.<locals>.generate_output.<locals>.<genexpr>  sZ       $ $ $OF1Iw4<<:K9L/LMM$ $ $ $ $ $rw   c           
     V    i | ]%\  }}| |t                    |fgz             &S rx   rd  )r   rc  r  re  r  rI  s      rv   r   zBFallbackKernel.create.<locals>.generate_output.<locals>.<dictcomp>  sN        S g$v,,9L8M.MNN  rw   zFallbackKernel output type z is not supported)rb   rc   rd   rs   r   r   re   r  rg   r  MultiOutputr`  rq   SymIntrt   r  )rI  r  r  re  packeds   ``rv   re  z.FallbackKernel.create.<locals>.generate_output   sf   &4-00 #tF|| $ $ $ $ $ $"3v;;//$ $ $    FD))      $*LLNN    FEL11 "((00  
 FC(( FEL11 {'' NNPfPPP #NNtrw   )r  *_fused_moving_avg_obs_fq_helper_functionalrP   r   r  r   r  r7  r7  re  rb   rc   rd   re   r  )r  r  r~   r   fake_incorrect_kernelscontextr  r  r  r  r  r   r  re  ri  s   `            @@rv   r  zFallbackKernel.create  s   "&"Q!S!'/E!E!EAG;== 	  	< 	< #"6;D;;;F;;!	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< n==!S6"""3  FF ??????S!&))"3  F	 	 	 	 	 	 	6 "/."55geT233 	'$FNN%YFNs   AA!Ac                D    t                                                      S r   )r  r  r  s    rv   r  zFallbackKernel.apply_constraint"  s    ww'')))rw   r   rw  )rI  r   )r  rE  rF  r%  r'  rp  r%  rH  r7  r  ru  rw  rW  r  r`  r  r  r  r  r  s   @rv   r  r  U  sP        ~4 ~4 ~4 ~4 ~4 ~4 ~4 ~4@8 8 8t         .   \(> > >
     # # #@( @( @(D83 83 83t 
 
 
 \
 D D [DL* * * * * * * * *rw   r  c                  4     e Zd ZdZd Zd Zdd fd
Z xZS )ComplexViewz9View a complex number as two dtyped numbers or vice versac                    dS r[  rx   r  s    rv   r}  zComplexView.should_allocate*  r]  rw   c                B    | j         d                                         gS r+  r  r  s    rv   ru  z(ComplexView.get_inputs_that_alias_output-  s    A''))**rw   Nr  c               V    t                                          ||||||           d S )Nr  )r  r%  )r  r  r  r  r  r  r  r  s          rv   r%  zComplexView.__init__1  sB     	/ 	 	
 	
 	
 	
 	
rw   )r  rE  rF  r  r}  ru  r%  r  r  s   @rv   ro  ro  &  sk        CC  + + + 
 
 
 
 
 
 
 
 
 
 
rw   ro  c                      e Zd ZU ded<   dS )re  rz  r   Nr  rx   rw   rv   re  re  E  s         rw   re  c                  @     e Zd Zd Zd Zd fdZddZd	 Zd
 Z xZ	S )rg  c                8   t          |          dk    r|d         \  }}t          |t                    r$|                     | d| d|dd                    S t          |t                    rct
          j        j                            || 	                                t          |                    }|                     ||dd                    S t          |t                    r$|                     | d| d|dd                    S t          d|          |S )Nr   r#  r$  r2   z['z']znon supported index type: )r   
issubclassrc   codegen_list_tuple_accessrd   rP   r   r  codegen_tuple_accessr8  r{   re   r'  )r  basenamer  ityper   tuple_accesss         rv   rw  z%MultiOutput.codegen_list_tuple_accessN  s"   w<<!qzHE1%&& J556H6HA6H6H6H'RSRTRT+VVVE5)) 	J w3HHdmmoos1vv    55lGABBKPPPE4(( J556J6JQ6J6J6JGTUTVTVKXXX$%A5IIIOrw   c                    |                     |                                 |                     | j        d                                         | j                             d S r+  )codegen_multi_outputr8  rw  r  r  r  s     rv   r  zMultiOutput.codegen`  sV    $$MMOO**4;q>+B+B+D+DdlSS	
 	
 	
 	
 	
rw   r  List[Tuple[Any, ...]]c                    t                                          d ||gd           t          j                            |           | _        t          j                            |            || _        d S r  )r  r%  rP   r   r  rz   r  r  )r  r  r_  r  r  s       rv   r%  zMultiOutput.__init__f  s[    vw333G++D11		""4(((rw   r]   rn  c                @    | j         d                                         S r+  )r  rB  r  s    rv   rB  z$MultiOutput.get_unbacked_symbol_usesl  s    {1~66888rw   c                    dS r[  rx   r  s    rv   r}  zMultiOutput.should_allocateo  r]  rw   c                $    d | j         D             S )Nc                    g | ]P}t          |t                    r9t          |                                          d k    <|                                QS r   )rb   r  r   ru  r8  r   inps     rv   r   z<MultiOutput.get_inputs_that_alias_output.<locals>.<listcomp>s  s_     
 
 
#~..
 C446677!;; LLNN <;;rw   )r  r  s    rv   ru  z(MultiOutput.get_inputs_that_alias_outputr  s%    
 
{
 
 
 	
rw   )r  r~  rw  )
r  rE  rF  rw  r  r%  rB  r}  ru  r  r  s   @rv   rg  rg  J  s          $
 
 
     9 9 9 9  
 
 
 
 
 
 
rw   rg  c                      e Zd ZU dZded<   d Zd ZddZdd
Zd Z	ddZ
ed             Zd Zd Zed             ZddZeZdS )rF  zC
    TensorBox / StorageBox allow in-place mutation of Tensors
    rY   r  c                    t          | j        |          }t          |          r|S t          t	          | j                  j         d| d          )Nr  z not callable)r   r  callableAttributeErrorrs   r  )r  rz   r   s      rv   __getattr__zMutableBox.__getattr__  sP    TY%%B<< 	ITY 8NN4NNNOOOrw   c                4    | j                                         S r   r  r  s    rv   r2  zMutableBox.realize  r  rw   r]   rn  c                4    | j                                         S r   r  r  s    rv   rB  z#MutableBox.get_unbacked_symbol_uses  r  rw   r  c                4    | j                                         S r   r  r  s    rv   r  zMutableBox.get_read_names  r  rw   c                4    | j                                         S r   )r  r  r  s    rv   r  zMutableBox.get_defining_op  s    y((***rw   Nc                6    | j                             |          S r   )r  r6  r4  s     rv   r6  zMutableBox.codegen_reference  s    y**6222rw   c                4    | j                                         S r   r  r  s    rv   r  zMutableBox.layout  s    y##%%%rw   c                    | j         S r   rz  r  s    rv   r   zMutableBox.get_layout  r  rw   c                4    | j                                         S r   r  r  s    rv   r   zMutableBox.get_size  r  rw   c                    | j         j        S r   r  r  s    rv   r   zMutableBox.dtype  r  rw   r{   c                ^   t          | j        t                    r@t          |           j         dt          | j                  j         d}d}| j        j        }n t          |           j         d}| j        }d}|t          t          |                    |g}d                    |          S )Nr  z))r  
)rb   r  rF  rs   r  r  r{   r  )r  line0endlr  r  s        rv   r  zMutableBox.__str__  s    di,, 	Dzz*HHT$)__-EHHHEDINEEDzz*---EIED 3u::

 yyrw   rw  rC  r   ru  )r  rE  rF  r  rG  r  r2  rB  r  r  r6  rK  r  r   r   r   r  r  rx   rw   rv   rF  rF  {  s           LLLP P P# # #4 4 4 4* * * *+ + +3 3 3 3 & & X&  $ $ $   X       " HHHrw   rF  c                  $    e Zd Zed             ZdS )rX   c                :    t          t          |                     S r   )rX   r  )r  s    rv   r  zTensorBox.create  s    D))***rw   N)r  rE  rF  rH  r  rx   rw   rv   rX   rX     s-        + + \+ + +rw   c                  >    e Zd Zd Zd Zd Zd Zd Zd Zd Z	d Z
d	S )
r  c                    t          | j        t          t          f          r*| j                                        t
          j        j        v S dS r[  )rb   r  r  r  r8  rP   r   graph_inputsr  s    rv   ru  zStorageBox.is_input_buffer  s?    di+!?@@ 	@9%%''17+???urw   c                    t          | j        t                    o)| j                                        t          j        j        v S r   )rb   r  r  r8  rP   r   r  r  s    rv   r  zStorageBox.is_module_buffer  s6    ty>33 :	""$$(99	
rw   c           	        t          | j        t          t          t          t
          t          f          r| j                                        S t          | j        t          t          t          t          f          sJ t          | j                              | j                                        }| j                                        }t          d t          | j                                        | j                                        | j                                                  | j                  | _        t&          j                            | j                  | j        _        t&          j                            | j                   | j        | j        _        || j        _        || j        _        | j        j        S )Nr  r  )rb   r  r  r  r  r  r  r8  r  r9  rC  r  rs   rU  r
  r   r   r   r   rP   r   r  rz   r  r   rS  r   )r  rS  r   s      rv   r2  zStorageBox.realize  ss   I	
 	
 
	( 9%%'''$)iD$%GHH 	
 	
$IK
 K
 	
 	
 	
 i//11I++--	"!y++--i))++Y''))  
 
 
 
	 00;;		""49--- L	 +	'	y~rw   c                    t          | j        t          t          f          r8| j                                        j        dk    r|                                  dS dS dS )zL
        Called on buffers we expect to be forced to realize later.
        r2   N)rb   r  r  r9  r  nontrivial_read_countr2  r  s    rv   rA  zStorageBox.realize_hint  sZ    
 ty9i"899		**,,BQFFLLNNNNN		 	FFrw   c                    t          | j        t                    o5|                                 t          j        k    p|                                 S r   )rb   r  r  r:  r3   realize_acc_reads_thresholdr  r  s    rv   r=  z!StorageBox.has_exceeded_max_reads  sD    $)Y// 
NNvAA )&&((	
rw   c                ^   |dk    rt          | j        t          t          f          rt	          | j                  r:| j                                        ddg}t          fd|D                       rdS |                                 t          j	        k    p| 
                                S dS )zj
        A heuristic to decide if we should realize a tensor
        that is used multiple times.
        r2   expsigmoidc              3  *   K   | ]}|j         v V  d S r   )used_ops)r   r   opcounts     rv   rh  z5StorageBox.should_realize_on_reuse.<locals>.<genexpr>  s+      @@qG,,@@@@@@rw   TF)rb   r  r  r9  r   r  r  r:  r3   realize_reads_thresholdr  )r  r  	heavy_opsr  s      @rv   should_realize_on_reusez"StorageBox.should_realize_on_reuse  s    
 199DI	9/EFF9di    )4466"I.	@@@@i@@@@@  4  6#AA -**,, urw   c                \    |                      |          r|                                  d S d S r   )r  r2  r  s     rv   r@  zStorageBox.mark_reuse  s3    ''.. 	LLNNNNN	 	rw   c                4    | j                                         S r   r  r  s    rv   r:  zStorageBox.num_reads  r  rw   N)r  rE  rF  ru  r  r2  rA  r=  r  r@  r:  rx   rw   rv   r  r    s          

 
 
  B  
 
 
  $  % % % % %rw   r  c                  2    e Zd ZU ded<   ded<   dZded<   dS )Subgraphr{   rz   ztorch.fx.GraphModulegraph_moduleNzOptional[GraphLowering]r   )r  rE  rF  rG  r   rx   rw   rv   r  r    s8         III&&&&%)E))))))rw   r  buffersSequence[IRNode]c                    d | D             } t          t          d | D                                 t          |           k     S )Nc                d    g | ]-}t          |t                    r|                                n|.S rx   )rb   r  r  r   r  s     rv   r   z(_has_aliased_buffers.<locals>.<listcomp>  sH        !+6? C CO  rw   c              3  4   K   | ]}t          |          V  d S r   )idr  s     rv   rh  z'_has_aliased_buffers.<locals>.<genexpr>$  s(      ;;"V**;;;;;;rw   )r   r-   )r  s    rv   _has_aliased_buffersr    sR       G
 z;;7;;;;;<<s7||KKrw   c                       e Zd ZU dZded<   dZded<   dZded<   dZded<   dZd	ed
<   d fdZ	e
dd            Zd Z xZS )ConditionalNr   	predicateOptional[List[TensorBox]]operandsOptional[Subgraph]true_subgraphfalse_subgraphOptional[List[MultiOutput]]r  rY   List[TensorBox]r  r  re  c                   || _         || _        || _        || _        g }t	          |t
                    s|                    |           |                    |           t                      	                    d ||           t          j                            |           | _        t          j                            |            d S Nrt  )r  r  r  r  rb   r  r  r  r  r%  rP   r   r  rz   r  )r  r  r  r  r  r  r  r  s          rv   r%  zConditional.__init__/  s     # *,)%:;; 	%MM)$$$h 	 	
 	
 	
 G++D11		""4(((((rw   rX   true_fnfalse_fnc           	     \                          |          } fd|D             }t          j        j        j        d         }d |D             }||fD ]z}|j        qt          j                            |j        ||j                  |_        t          j        |j                  5   |j        j	        |  d d d            n# 1 swxY w Y   {|j        j
        }|j        j
        }	d|fd|	ffD ])\  }
}t          |          rt          d|
 d|           *t          |          t          |	          k    sJ ||	f            t          t          ||	                    D ]&\  }\  }}|                                |                                k    sJ |||f            |                                |                                k    sJ |||f            |                                |                                k    sJ |||f            |                                |                                k    sJ |||f            |                                j        |                                j        k    sJ |||f            (t-          |t.                    s|                                }n7t          |          d	k    s
J d
            |d	                                         }t1          ||||t3          |                    fdt          |          D             }|_        |S )Nc                :    g | ]}                     |          S rx   r  r  s     rv   r   z&Conditional.create.<locals>.<listcomp>S  s'    ;;;QC%%a((;;;rw   r  c                (    g | ]}|j         d          S r  r  r  s     rv   r   z&Conditional.create.<locals>.<listcomp>V  s    <<<1<<<rw   gmexample_inputssubgraph_namer  r  zVOutput aliasing is currently not supported in compiled torch.cond. The outputs of the z% subgraph of torch.cond are aliased: r   zQWhen predicate is not a Tensor, there must be at least one operand in torch.cond.)r  r  r  r  r  c                *   g | ]\  }}t          t          |                                |                                |                                |                                |                                j                   t          |fg          S r  	rg  r  r   r   r   r;  r   r  rc   )r   r   rI  conditionals      rv   r   z&Conditional.create.<locals>.<listcomp>  s     
 
 
 6 !,,.. **,,**!,,..!,,..5   
 

 
 
rw   )rB  rP   r   r  r~   make_subgraphr  rz   set_graph_handlerrungraph_outputsr  r'  r   r   r   r   r;  r   r   r   r  rb   r  r  re  r  )r  r  r  r  r  fx_operandsfake_operandssubgraphtrue_outputsfalse_outputsrz   r  r   tofor   r  s   `               @rv   r  zConditional.createJ  s    %%i00	;;;;(;;;g*/3<<<<< (+ 		7 		7H~%!"!6!6,#0"*- "7 " "
 (88 7 7&HN&667 7 7 7 7 7 7 7 7 7 7 7 7 7 7 }2 4(,7*m9TU 	 	MD'#L11 $_*._ _U\_ _   <  C$6$6666}8U666$S}%E%EFF 	Q 	QKAxB;;==BKKMM111Ar2;111==??bmmoo5552r{555==??bmmoo5552r{555<<>>R\\^^333aR[333==??)R]]__-CCCCaR[CCCC)%:;; 	.))++FF H!!!b "!!a[++--F!!#$V,,
 
 

 
 
 
 '|44
 
 
" &s   %CC	C	c                0    |                     |            d S r   )codegen_conditionalr  s     rv   r  zConditional.codegen  s    ##D)))))rw   )
r  rY   r  r  r  r  r  r  r  re  )r  rX   r  r  r  r  r  r  )r  rE  rF  r  rG  r  r  r  r  r%  r  r  r  r  r  s   @rv   r  r  '  s         "&I&&&&*.H....(,M,,,,)-N----+/G////) ) ) ) ) )6 O O O [Ob* * * * * * *rw   r  c                       e Zd ZU dZded<   dZded<   dZded<   dZded<   dZded	<   d fdZ	e
dd            Zd Z xZS )	WhileLoopNr  carried_inputsadditional_inputsr  cond_subgraphbody_subgraphr  r  r  r  r  re  c                   || _         || _        || _        || _        t	                                          d |||z              t          j                            |           | _	        t          j        
                    |            d S r  )r  r  r  r  r  r%  rP   r   r  rz   r  )r  r  r  r  r  r  r  s         rv   r%  zWhileLoop.__init__  s     -!2**!$55 	 	
 	
 	
 G++D11		""4(((((rw   cond_fnbody_fnc           	          fd|D             } fd|D             }||z   }t           j        j        j        d         t           j        j        j        d         z   }d |D             }||fD ]z}|j        qt           j                            |j        ||j                  |_        t          j        |j                  5   |j        j        |  d d d            n# 1 swxY w Y   {|j        j	        }	|j        j	        }
t          |
          rt          d|
           t          |	          dk    s
J |	            |	d	                                         t          j        k    s
J |	            t          |	d	                                                   d	k    s
J |	            t          |          d	k    s
J d
            |d	                                         }t          |          t          |
          k    sJ ||
f            t%          t'          ||
                    D ]0\  }\  }}|                                |                                k    sJ |||f            |                                |                                k    sJ |||f            |                                |                                cxk    r|k    sn J ||||f            |                                |                                k    sJ |||f            |                                j        |                                j        k    sJ |||f            2t/          ||||t1          |                    fdt%          |
          D             }t'          ||          D ]`\  }}|                                t           j        j        v r6t           j        j                            |                                           a|_        |S )Nc                :    g | ]}                     |          S rx   r  r  s     rv   r   z$WhileLoop.create.<locals>.<listcomp>  s'    GGG1#++A..GGGrw   c                :    g | ]}                     |          S rx   r  r  s     rv   r   z$WhileLoop.create.<locals>.<listcomp>  s'    MMMaS..q11MMMrw   r  c                (    g | ]}|j         d          S r  r  r  s     rv   r   z$WhileLoop.create.<locals>.<listcomp>  s    @@@Q16%=@@@rw   r  zOutput aliasing is currently not supported in compiled torch.while_loop. The outputs of the body_fn subgraph of torch.while_loop are aliased: r2   r   z9torch.while_loop is assumed to have at least one operand.)r  r  r  r  r  c                *   g | ]\  }}t          t          |                                |                                |                                |                                |                                j                   t          |fg          S r  r  )r   r   rI  
while_loops      rv   r   z$WhileLoop.create.<locals>.<listcomp>   s     
 
 
 6 !,,.. **,,**!,,..!,,..5   
 

 
 
rw   )rP   r   r  r~   r  r  rz   r  r  r  r  r'  r   r   rg   r   r   r   r   r   r;  r   r  r  re  r8  r  r  r  r  )r  r  r  r  r  
all_inputsfx_all_inputsfake_all_inputsr  cond_outputsbody_outputsr   r   opbor  r  r   r  s   `                 @rv   r  zWhileLoop.create  sy    HGGGGGGMMMM;LMMM#&77
,1"58L8QRT8UU@@-@@@ '* 		9 		9H~%!"!6!6,#0"*- "7 " "
 (88 9 9&HN&889 9 9 9 9 9 9 9 9 9 9 9 9 9 9 }2}2-- 	 gXdg g   <  A%%%|%%%A((**ej888,888<?++--..!333\333 
OOaF   A))++ >""c,&7&7777.,9W777$S%F%FGG 	Q 	QKAxB;;==BKKMM111Ar2;111==??bmmoo5552r{555 ==??bmmoo?????????!RVAT???<<>>R\\^^333aR[333==??)R]]__-CCCCaR[CCCC)/!!$V,,
 
 


 
 
 
 '|44
 
 
 NG44 	@ 	@HC||~~!555 +//???$
s    CC 	#C 	c                0    |                     |            d S r   )codegen_while_loopr  s     rv   r  zWhileLoop.codegen  s    ""4(((((rw   )
r  r  r  r  r  r  r  r  r  re  )r  r  r  r  r  r  r  r  )r  rE  rF  r  rG  r  r  r  r  r%  r  r  r  r  r  s   @rv   r  r    s         04N4444377777(,M,,,,(,M,,,,+/G////) ) ) ) ) ), Z Z Z [Zx) ) ) ) ) ) )rw   r  c                  8     e Zd Z	 ddd fdZ fdZd Z xZS )rr   Nr  c          	        t                                          |||||d |           ddlm}  ||g ||R |          }	|	J |	| _        t
          j        j                            |	d           | _	        | t
          j        j        |	<   d S )N)r   r  r   )get_effect_key)
r  r%  torch._higher_order_ops.effectsr  effect_typerP   r   effectful_opsr  prev_effect_buffer)r  r  r  r  r  r  r   r  r  r  r  s             rv   r%  zEffectfulKernel.__init__   s     	/ 	 	
 	
 	
 	CBBBBB$nV-L~-L-L-LfUU&&&&"#'"7";";K"N"N-1k***rw   c                    t                                                      }| j        C|j                            t          j        | j                                                             |S r   )r  ra  r 	  rj  r  r4   r  r8  )r  r  r  s     rv   ra  zEffectfulKernel.get_read_writes=  sa    gg--//".!!$T%<%E%E%G%GHH   rw   c                    dS r  rx   r  s    rv   r  z EffectfulKernel.has_side_effectsG  r  rw   r   )r  rE  rF  r%  ra  r  r  r  s   @rv   rr   rr     sy         2 2 2 2 2 2 2 2:          rw   rr   c                  8    e Zd ZU ded<   ded<   d Zd Zd	dZdS )
rb  r{   rz   ztorch._C.ScriptObjectr   c                    | j         S r   rf  r  s    rv   r8  zTorchBindObject.get_nameP  r]  rw   c                    d S r   rx   r  s    rv   r   zTorchBindObject.get_deviceS  r  rw   Nc                    | j         S r   rf  r4  s     rv   r6  z!TorchBindObject.codegen_referenceV  r]  rw   r   )r  rE  rF  rG  r8  r   r6  rx   rw   rv   rb  rb  K  s]         III             rw   rb  c                  T    e Zd Zd Zd ZdddZedd            Zedd            ZdS )_CollectiveKernelc                    dS r[  rx   r  s    rv   r}  z!_CollectiveKernel.should_allocate[  r]  rw   c                    dS r  rx   r  s    rv   r  z"_CollectiveKernel.has_side_effects^  r  rw   Nr  r   c                l   ddl m} t          | j                  t          j        j        u s
J d            | j        }|j        j        | _	        |j        j
        | _        | j	                            dd           d| j         | _         ||          | _        d |j        j        D             | _        d S )Nr2   r  z,Setting cpp kernel needs a valid op_overloadr  r   c                *    g | ]}|j         	|j        S rx   r  r  s     rv   r   z9_CollectiveKernel.set_cpp_kernel_name.<locals>.<listcomp>o  s1     .
 .
 .
.
F.
 .
 .
rw   )r  r  rs   r  rg   r  r  r  rz   r  r  r  r  r  r  r  r  )r  r  r  r  s       rv   r  z%_CollectiveKernel.set_cpp_kernel_namec  s    666666 !""ej&;;;;9 <;;!%~2(.(D%!%!5!=!=dC!H!Hjj4Khjj..v66.
 .
"N4.
 .
 .
***rw   r  !Union[TensorBox, List[TensorBox]]r]   r^   c                   t           j        j        5   | j        ||g|R i |\  }}}}}	d d d            n# 1 swxY w Y   |	rJ | d|	             |D ]}
|
                                 |d                                          | t                    ||||          t          j        |          }j	        
                    fd|D                        j        
                    d |D                        d|v rpj	                            t          t                    |d                              j                            |d                                                    d S d S )Nr"  r   c                L    g | ] }t          t                    |          !S rx   r  )r   r  r   ri  s     rv   r   z4_CollectiveKernel.create_inplace.<locals>.<listcomp>  s-    MMM^Jv..V<<MMMrw   c                6    g | ]}|                                 S rx   rr  r  s     rv   r   z4_CollectiveKernel.create_inplace.<locals>.<listcomp>  s     "B"B"Bc3<<>>"B"B"Brw   r   )rP   r   r  r  r2  r   r7  r  tree_leavesr  r  r	  r  r  r8  )r  r  r  r~   r   r  r  r  r  r  r  inpsr   ri  s               @@rv   create_inplacez _CollectiveKernel.create_inplacey  s    W 	D 	D #"66CDCCCFCC!	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D %EE&E&E2C&E&EEEE% 	! 	!J    Q**,,v
 
 !&))&&MMMMMMMM	
 	
 	

 	!!"B"BT"B"B"BCCCF??#**z&116%=&II   %%fUm&<&<&>&>????? ?   9= =c                    t           j        j        5    j        ||g|R i |\  }}}}}	d d d            n# 1 swxY w Y   |	rJ | d|	             |D ]}
|
                                 t          |t                    rZ                     ||          }  t          |          ||||           fdt          |          D             _
        j
        S                        |          ||||          g_
        S )Nr.  c                n    g | ]1\  }}t                              |          t          |fg          2S rx   )rg  r`  rc   )r   r   r  r  ri  s      rv   r   z9_CollectiveKernel.create_out_of_place.<locals>.<listcomp>  sV        Av ((00AYK   rw   )rP   r   r  r  r2  rb   rc   r7  re  r   r  r`  )r  r  r  r~   r   r  r  r  r  r  r  r   ri  s   `           @rv   create_out_of_placez%_CollectiveKernel.create_out_of_place  s    W 	D 	D #"66CDCCCFCC!	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D %FF&F&F3D&F&FFFF% 	! 	!J    nd++ 	__[.AAFS!&)) F     "+>!:!:  FN >!S$$^44 F %XFNMr	  r   rL  )r  r	  r]   r^   )r  r	  )	r  rE  rF  r}  r  r  r  r	  r	  rx   rw   rv   r	  r	  Z  s            

 
 
 
 
, $@ $@ $@ [$@x * * * [* * *rw   r	  c                  <     e Zd Zd Zedd            Z fdZ xZS )	_WaitKernelc                   | j         d         }t          |t                    r|j         d         gS t          |t                    rB|j         d         }t          |t                    r|j        d         \  }}|j         |         gS g S g S r+  )r  rb   r	  rg  r  )r  r  collr   r   s        rv   get_volatile_readsz_WaitKernel.get_volatile_reads  s    k!nc,-- 	JqM?"[)) 	 :a=D$ 122 *Q3C())I Irw   r  rX   r]   r^   c                   t           j        j        5  |                     ||          \  }}}}}d d d            n# 1 swxY w Y   |rJ | d|              | t	          |                                          ||||          }|j                            t          t	          |                                          ||                     d S )Nr"  )	rP   r   r  r  r7  r   r  r  r  )	r  r  r  r  r  r  r  r  ri  s	            rv   create_waitz_WaitKernel.create_wait  s*   W 	0 	0 ""63//!	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 %EE&E&E2C&E&EEEEs~~''((
 
 	&&:cnn&6&677fEE	
 	
 	
 	
 	
s   ;??c                    t                                                      }|                                 }|D ]@}|j                            t          j        |                                                     A|S r   )r  ra  r	  rj  r  r4   r  r8  )r  r  volatile_readsvrr  s       rv   ra  z_WaitKernel.get_read_writes  sm    gg--//0022  	G 	GB!!,"6r{{}}"E"EFFFFrw   )r  rX   r]   r^   )r  rE  rF  r	  r  r	  ra  r  r  s   @rv   r	  r	    sg          * 
 
 
 [
*        rw   r	  r   OrderedSet[Symbol]c                L   t          | t          t          f          rt          |           S t          | t          t
          f          r't                      }| D ]}|t          |          z  }|S t          | t          j	                  rt          |           S t                      S r   )
rb   r+   r   r(   rd   rc   r-   rD  rg   r  )r   r  r   s      rv   rD  rD    s    !h%&& $Q'''	At}	%	% 	&0ll 	0 	0A,Q///AA	Au|	$	$ $Q'''||rw   )r[   r\   r]   r^   )rz   r{   r]   r|   )r   r   r]   r   )r   r   r   r   r]   r   )r   r   r]   r   )r   r   r]   r   rD  )r   r   r   r   r]   r^   )r   rY   r   r   r]   r   )r   r   r   r   r]   r   )r   r   r]   r   )r   r   r]   r   )r   r   r]   r   )r   r  r   r7  r]   rO   )r  r{   r   r7  r  r   r]   r   )r(  r)  r*  r)  r   r)  r]   r   )r   rY   r]   r   )TFNFN)r   rY   r  r   r  r   r  r  r  r   r  r  r]   r  )r   rY   r  r   r]   r   )r   r)  r'  r)  r]   r   )r   r7  r]   rq   )r  r  r]   r   )r   r   r]   r"	  (  
__future__r   rI  rH  r  r  loggingtextwrapr   r   r   typingr   r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   typing_extensionsr   unittest.mockr   rm   r   r   r   torch._export.serde.schema_exportserder  rC  torch._loggingrg   torch.fxtorch.utils._pytreer3  _pytreer  torch._dynamo.device_interfacer   torch._dynamo.utilsr   torch._export.serde.serializer   *torch._higher_order_ops.auto_functionalizer   torch._inductorr   torch._prims_commonr   r    r!   r"   r#   torch._subclasses.fake_tensorr$   %torch.fx.experimental.symbolic_shapesr%   r&   r'   r(   r)   r*   r+   torch.utils._ordered_setr-   torch.utils._sympy.functionsr.   r/   r0   torch.utils._sympy.symbolr1   r  r3   r4   codegen.commonr5   r6   r7   r8   r9   r:   	loop_bodyr;   ops_handlerr<   r=   runtime.benchmarkingr>   runtime.hintsr?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   virtualizedrN   rO   rP   r   rQ   rR   rS   rT   rq   rU   rG  	getLoggerr  r  r  r  r{   rZ   ry   r   r   r   r   r  r  r   r   r   r   r   r   r   rY   r3  rM  ry  r  r  r  r  r'  r7  r9  r  rC  r}  r  r   r  r  rC  r  r  rj   r  r  r)  r  r  r  r  r  r  r  r  r  ra  r  r   r,  r7  r=  r  r  r  r  r  r  r  r  r  r  r   PrimitiveInfoTyper  r1  r5  rL  rS  r  rj  rm  rA  rN  rV  r  r  rm  r  r  r  r  r  r  r  r  rk   rl   r   _embedding_bagr  _fft_c2c'_scaled_dot_product_efficient_attention#_scaled_dot_product_flash_attention#_scaled_dot_product_cudnn_attention
_scaled_mmaddmmr   bmmcopy_mmrepeat_interleaver  nonzeror-  r   view_as_real
has_c_shimr  ro  re  rg  rF  rX   r  r  r  r  r  rr   rb  r	  r	  rD  rx   rw   rv   <module>rR	     s   " " " " " " "                       " " " " " "                                       " ( ' ' ' ' '        ' ' ' ' ' ' ' ' ' ' 2 2 2 2 2 2 2 2 2 2 2 2      $ $ $ $ $ $ $ $ $ C C C C C C ( ( ( ( ( ( ? ? ? ? ? ? M M M M M M # # # # # #              : 9 9 9 9 9                  0 / / / / / L L L L L L L L L L * * * * * * " " " " " " " " D D D D D D D D                   4 4 4 4 4 4 4 4 - - - - - - ( ( ( ( ( (                                 * ) ) ) ) ) ) ) ) )  %$$$$$$WT]]WT]]WT]]CI& & & & &g!!		8?4	8	8	8y~'T  k	sDk!12K8STU	 	 	 	 	$ $ $ $>             !LL $__        
    
 
    

 .2    8      ) ) ) )
' ' ' 'aE aE aE aE aE aE aE aEH A A A A A A A AH x
 x
 x
 x
 x
F x
 x
 x
v& & & &B B B B B B B B2 
 
 
 
 
i 
 
 
8 ;|$$;y!!;y!!K;u{=))< <      JN<N <N <N <N <N~         R
 R
 R
 R
 R
 R
 R
 R
j|
 |
 |
 |
 |
y |
 |
 |
~ v
 v
 v
 v
 v
5 v
 v
 v
t 	 	 	 	 	 	 	 	 I I I I I5 I I IX   	 	 	 	 !<@=A/ / / / /d $59#44$ $ $  
    WL WL WL WL WLv WL WL WLt C C C C C C C CL ( ( ( ( (( ( ( (V79 79 79 79 79( 79 79 79t     (   : P P P P P; P P Pf P P P P Ph P P Pf % % % % % % % %PH< H< H< H< H< H< H< H<V    6   4 8 8 8 8 8| 8 8 8$ @ @ @ @ @| @ @ @   < < < < @T @T @T @T @TV @T @T @TF! ! ! ! !& ! ! !HI7 I7 I7 I7 I7V I7 I7 I7XP P P P Pf P P P2       *M* M* M* M* M* M* M* M*` u u u u uV u u up 
& 
& 
& 
& 
&fi 
& 
& 
&    &   

 
 
 
 
[ 
 
 
&- - - - -6 - - -X X X X XF X X X  4 4 4 4 4_ 4 4 4D
5
 5
 5
 5
 5
_ 5
 5
 5
p* * * * *> * * *Z #udCeCeT<Q6R1SST $$ $$ $$ $$ $$ $$ $$ $$N" " " " "| " " "
.= .= .= .= .=. .= .= .=bM M M M M M M M$        8 8 8 8 8? 8 8 8v       
L L L L L9 L L L^ ` ` ` ` `< ` ` `F 4 4 4 4 4l 4 4 4n
 
 
 
 
/ 
 
 
,&" &" &" &" &" &" &" &"R    V   .b b b b bl b b bJ)< )< )< )< )<| )< )< )<Z/ / / / /, / / /d    <   6B B B B B5 B B B"F F F F F- F F F,H) H) H) H) H)l H) H) H)V.) .) .) .) .)| .) .) .)bK K K K K K K K@- - - - -L - - -0-; -; -; -; -;< -; -; -;`        
 Z#4<0808

%	! 
(N* N* N* N* N*& N* N* N*b 
 
 
 
 
. 
 
 
<        .
 .
 .
 .
 .
, .
 .
 .
b ; ; ; ; ; ; ; ;|+ + + + +
 + + +T% T% T% T% T% T% T% T%n * * * * *v * * *L L L L u* u* u* u* u*, u* u* u*p {) {) {) {) {) {) {) {)|) ) ) ) )n ) ) )X     f   G G G G G G G GT2 2 2 2 2# 2 2 2p     rw   