
    çg                       d dl mZmZ d dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
mZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZ ddlmZ d dlmZ ed ed	                    Z ed
          Z  G d dej!                  Z"dUdZ# G d d          Z$d Z%i Z&dVdZ' G d dee                    Z(d Z)d Z*i dddddddd d!dd"d#d$d#d%dd&d'd(d'd)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<Z+ e,e+-                                          D ]Z.e.e+e.<    G d= d>e(e                    Z/edWdA            Z0edddddddBdXdL            Z0	 dYdddddddBdZdOZ0 G dP dQ          Z1 G dR dS          Z2dT Z3dS )[    )annotationsdivisionN)defaultdict)cached_property)
CallableGenericIterableOptionalTypeVarUnionoverloadDictAnyTuple   )driver)
ModuleTypez.runtime.jitTc                  v     e Zd ZdZd fdZed             Zd Zd Zd Z	d	 Z
d
 Zd Zd Zd Zd Zd Z xZS )DependenciesFindera  
    This AST visitor is used to find dependencies of a JITFunction. This can
    be used to invalidate a JITFunction's hash when its source code -- or
    that of its dependencies -- changes.

    This visitor also keeps track of the global variables touched by the
    JITFunction.  When we launch the kernel, we check that these have the same
    values as they did when we ran this visitor.  If not, we raise an error (or
    otherwise we could recompile).
    returnNonec                    t                                                       || _        t          j        |                    d                    | _        || _        h d| _        i | _	        d| _
        d S )Nutf-8>
   intlenmaxminlistfloatprintrangegetattr
isinstanceF)super__init__namehashlibsha256encodehasherglobalssupported_python_builtinsused_global_valsvisiting_arg_default_value)selfr'   r,   src	__class__s       N/var/www/html/ai-engine/env/lib/python3.11/site-packages/triton/runtime/jit.pyr&   zDependenciesFinder.__init__$   st    	nSZZ%8%899 *
 *
 *
&. TV*/'''    c                4    | j                                         S N)r+   	hexdigestr0   s    r3   retzDependenciesFinder.retH   s    {$$&&&r4   c                   t          |j                  t          j        k    r|j        S |j        | j        v rd S | j                            |j        d           }|}| j        svt          |          t          k    r^t          |t                    sIt          |dd          s8|j        | j        vr*|| j        f| j        |j        t	          | j                  f<   |S )N__triton_builtin__F)typectxastStoreidlocal_namesr,   getr/   r   r$   JITFunctionr#   r-   r.   )r0   nodevals      r3   
visit_NamezDependenciesFinder.visit_NameL   s    >>SY&&7N7d&&&4ltw--
 O 7	  II++ #344 , >ESJ^`e=f=f , G4#AAABEt|ATD!47Bt|,<,<"=>
r4   c                *      fd|j         D             S )Nc                :    g | ]}                     |          S  )visit).0eltr0   s     r3   
<listcomp>z2DependenciesFinder.visit_Tuple.<locals>.<listcomp>m   s#    555C

3555r4   )eltsr0   rD   s   ` r3   visit_TuplezDependenciesFinder.visit_Tuplej   s!     6555495555r4   c                8   |                      |j                  }t          |t          j                  r4|                      |j                  }t          |t          j                  4|t          |dd          t          k    rd S t          ||j                  S )N__name__ )rJ   valuer$   r>   	Attributer#   TRITON_MODULEattr)r0   rD   lhss      r3   visit_Attributez"DependenciesFinder.visit_Attributeo   s    jj$$cm,, 	(**SY''C cm,, 	(;73
B77=HH4sDI&&&r4   c                    fd}                      j                  }|3 ||          s(t          |t                    sJ d|j         d            t          j        |ft           j         j                   fdj	        D                       D ]%}t          |t                    s ||          r%|j
        } j                                        |j                                        z  D ]V}|\  }} j        |         \  }	}|j        |         \  }
}|	|
k    r)t          d| d|	 d j         d|j         d	|
 d
          W j                            |j                   t!          t#          |dd                    }||z   } j                            |                    d                     'd S )Nc                    t          j        j                  rdS t          | dd          }|                    t
                    S )NT
__module__rS   )inspect	isbuiltinfuncr#   
startswithrV   )r_   modulerD   s     r3   is_triton_builtinz8DependenciesFinder.visit_Call.<locals>.is_triton_builtiny   sB     ++ tT<44F$$]333r4   z
Function "zv" is being called from a Triton function but is not a Triton function itself. Decorate it with @triton.jit to fix thisc              3  L   K   | ]}                     |j                  V  d S r6   )rJ   rT   )rK   kwr0   s     r3   	<genexpr>z0DependenciesFinder.visit_Call.<locals>.<genexpr>   s1      ::bTZZ!!::::::r4   Global variable z has value z when compiling z, but inner kernel z has conflicting value z7 from when it was first compiled.  This is not allowed.noinlineFr   )rJ   r_   r$   rC   rR   	itertoolschainmapargskeywords	cache_keyr.   keysRuntimeErrorr'   updatestrr#   r+   r*   )r0   rD   rb   r_   objfunc_cache_keykvar_name_v1v2rg   keys   ``           r3   
visit_CallzDependenciesFinder.visit_Callw   sM   	4 	4 	4 	4 	4 zz$)$$|0066|*+;
 ;
|| ^  ^  ^  ^ || ?HDJ	**::::DM:::
 
 	4 	4C
 c;//   %%  ]N *//11C4H4M4M4O4OO  !-a0A,Q/A88& T8  T  T  T  TTXT]  T  Trvr  T  T  Y[  T  T  T   
 !(()=>>>73
E::;;H 8+CKszz'2233339	4 	4r4   c                f    d |j         j         D             | _        |                     |           d S )Nc                    h | ]	}|j         
S rI   arg)rK   r~   s     r3   	<setcomp>z7DependenciesFinder.visit_FunctionDef.<locals>.<setcomp>   s    >>>CG>>>r4   )rk   rA   generic_visitrO   s     r3   visit_FunctionDefz$DependenciesFinder.visit_FunctionDef   s6    >>ty~>>>4     r4   c                .     fd}t          j        |j        |j        |j        r|j        gng |j                  D ]}                     |            ||j                   |j                             |j                    ||j	                   d S )Nc                    	 j         rJ d_         | D ]}|                    |           	 d_         d S # d_         w xY w)NTF)r/   rJ   )defaultsexprr0   s     r3   visit_defaultsz:DependenciesFinder.visit_arguments.<locals>.visit_defaults   sp    8::::26/$ ) )D'

4((() 38///%/7777s	   ,9 	A)
rh   ri   posonlyargsrk   vararg
kwonlyargsrJ   kw_defaultskwargr   )r0   rD   r   r~   s   `   r3   visit_argumentsz"DependenciesFinder.visit_arguments   s    	8 	8 	8 	8 	8 ?4#3TYQUQ\@dbdfjfuvv 	 	CJJsOOOOt'(((:!JJtz"""t}%%%%%r4   c                    |                      |          }t          |t                    r| xj        t	          |          z  c_        d S | j                            |           d S r6   )rJ   r$   r   rA   setadd)r0   rD   targets      r3   visitAssnTargetz"DependenciesFinder.visitAssnTarget   sd     D!!fd## 	)F+  (((((r4   c                    t          |j                  dk    rt          d          |                     |j        d                    |                     |           d S )N   z2Simultaneous multiple assignment is not supported.r   )r   targets	TypeErrorr   r   rO   s     r3   visit_AssignzDependenciesFinder.visit_Assign   s^    t|!!
 PQQQT\!_--- 	4     r4   c                d    |                      |j                   |                     |           d S r6   r   r   r   rO   s     r3   visit_AnnAssignz"DependenciesFinder.visit_AnnAssign   4    T[))) 	4     r4   c                d    |                      |j                   |                     |           d S r6   r   rO   s     r3   	visit_ForzDependenciesFinder.visit_For   r   r4   )r   r   )rR   r\   __qualname____doc__r&   propertyr9   rF   rP   rY   rz   r   r   r   r   r   r   __classcell__r2   s   @r3   r   r      s        	 	"0 "0 "0 "0 "0 "0H ' ' X'  <6 6 6
' ' '+4 +4 +4Z! ! !
& & &@) ) )! ! !! ! !! ! ! ! ! ! !r4   r   r   rq   c                    t          | t                    r| j        S t          | t                    r| S t	          |           S r6   )r$   r<   rR   rq   repr)tys    r3   _normalize_tyr      s>    "d {	B		 	88Or4   c                      e Zd ZdZddZed	             Zed
             Zed             Zed             Z	ed             Z
ed             Zed             ZdS )KernelParamzBRepresents a parameter (name plus metadata) to a @jit'ed function.numr   paraminspect.Parameterdo_not_specializeboolc                0    || _         || _        || _        d S r6   )r   _paramr   )r0   r   r   r   s       r3   r&   zKernelParam.__init__   s    !2r4   c                    | j         j        S r6   )r   r'   r8   s    r3   r'   zKernelParam.name  s    {r4   c                    | j         j        r| j         j        t          j        j        k    rdS t          | j         j                  S )NrS   )r   
annotationr]   	Parameteremptyr   r8   s    r3   r   zKernelParam.annotation  s<    {% 	)?7CTCZ)Z)Z2T[3444r4   c                    | j         }dD ]@\  }}||                    |          t          |          z   d          }|r||v r| | c S A|dk    rdS dS )N))uintu)r   ir   u1rS   )r   findr   )r0   r   ty1ty2widths        r3   annotation_typezKernelParam.annotation_type  s    _
5 	' 	'HCzs33c#hh>??@E '
**u&&&4rr4   c                    d| j         v S )N	constexpr)r   r8   s    r3   is_constexprzKernelParam.is_constexpr  s    do--r4   c                $    d| j         v o| j         S )Nconst)r   r   r8   s    r3   is_constzKernelParam.is_const  s    $/)C$2C.CCr4   c                    | j         j        S r6   )r   defaultr8   s    r3   r   zKernelParam.default  s    {""r4   c                @    | j         j        t          j        j        k    S r6   )r   r   r]   r   r   r8   s    r3   has_defaultzKernelParam.has_default"  s    {"g&7&===r4   N)r   r   r   r   r   r   )rR   r\   r   r   r&   r   r'   r   r   r   r   r   r   r   rI   r4   r3   r   r      s        LL3 3 3 3
     _  5 5 _5
   _ . . _. D D _D # # X# > > X> > >r4   r   c                    t          | d          r|                                 dz  dk    rdS t          | t                    r| dz  dk    rdS | dk    rdS dS )Ndata_ptr   r   Dr   1N)hasattrr   r$   r   )vs    r3   compute_spec_keyr   '  sg    q* 1::<<"#4#9#9s	As		 FaKK3!VV33r4   Fc                   | dS t          | t                    rdS t          | t                    rd| k    r| dk    rdS d| k    r| dk    rdS d	S t          | t                    rd
S | j        |f}t
                              |d           }|P|d         rdndt          t          |d                   	                    d          d                  z   }|t
          |<   |S )Nnonei1   i32                u64i64fp32r   *k*r   .)
r$   r   r   r    dtype	dtype2strrB   type_canonicalisation_dictrq   split)r~   r   dskress       r3   mangle_typer   7  s    
{v	C		 t	C		 s??si//5c\\cY..55	C		 	v y(#mmC&&;q6*44s.HSQRVIZIZ[^I_I_`bIc.ddC IcN
r4   c                  "    e Zd ZU ded<   ddZdS )KernelInterfacer   runr   c                      fdS )z
        A JIT function is launched with: fn[grid](*args, **kwargs).
        Hence JITFunction.__getitem__ returns a callable proxy that
        memorizes the grid.
        c                 $     j         | dd|S )NFgridwarmup)r   )rk   kwargsr   r0   s     r3   <lambda>z-KernelInterface.__getitem__.<locals>.<lambda>Y  s     xtx$T%'Y'YRX'Y'Y r4   rI   )r0   r   s   ``r3   __getitem__zKernelInterface.__getitem__S  s     ZYYYYYr4   N)r   r   )rR   r\   r   __annotations__r   rI   r4   r3   r   r   P  s9         
FFFZ Z Z Z Z Zr4   r   c                    d |                                 D             }dd l}| |||                                |j        |d}|                    |          }|S )Nc                X    i | ]'\  }}||j         j        d k    rt          |          n|(S r   )r2   rR   rq   rK   ry   rT   s      r3   
<dictcomp>z1serialize_specialization_data.<locals>.<dictcomp>^  s:    wwwWaWZ\aEO$<$G$Gc%jjjUwwwr4   r   )r'   	signature	constantsattrsoptionsry   )itemsjsonto_dict__dict__dumps)	r'   r   r   r   r   ry   r   rr   serialized_objs	            r3   serialize_specialization_datar  ]  sg    wwenetetevevwwwIKKK99u}} C ZZ__Nr4   c                   t          | j                  t          |          k    sJ g }g }g }g }g }g }t          | j                                        |          D ]"\  \  }}	}
|	j        t
          j        j        u r1|                    |           |                    d| d|            n5|                    | d|            |                    d| d|            |
j	        r|                    |           |                    |           |
j
        s|                    d|z             |
j        r|                    d|
j        z             |                    d|d|
j        rdnd	d
           $d                    d ||z   D                       }d                    d |D                       }d                    d |D                       }|                    d           d                    |          }d                    |          }d|d|d|d|d|d}d | j                                        D             }t          |d<   t          |d<   t!          ||           |d         S )a2  
    Equivalent to sig.bind followed by apply_defaults. This generates a
    native Python function (using exec) which can be memoized on a per-kernel
    basis to avoid having to run these expensive functions -- which constitute
    much of the kernel launch overhead -- every time we run the kernel.
    'z': z	=default_zcompute_spec_key(%s)z"%s"zmangle_type(, TrueFalse)rS   c                    g | ]}|d z   S r  rI   rK   xs     r3   rM   z2create_function_from_signature.<locals>.<listcomp>  s    MMMaTMMMr4   c                    g | ]}|d z   S r
  rI   r  s     r3   rM   z2create_function_from_signature.<locals>.<listcomp>  s    ???1a$h???r4   c                    g | ]}|d z   S r
  rI   r  s     r3   rM   z2create_function_from_signature.<locals>.<listcomp>  s    !G!G!Gq!d(!G!G!Gr4   z**excess_kwargszdef dynamic_func(z):
    return {z}, (z), (z), excess_kwargsc                Z    i | ](\  }}|j         t          j        j        ud | |j         )S )default_)r   r]   r   r   )rK   r'   r   s      r3   r   z2create_function_from_signature.<locals>.<dictcomp>  sE       D%= 1 777 	45=777r4   r   r   dynamic_func)r   
parameterszipr   r   r]   r   r   appendr   r   r   r   joinr   r   exec)sigkparams	func_argsdict_entriesconstexpr_valsnon_constexpr_valssignature_typesspecialisationsr'   spkprm   args_strdict_str	func_bodyfunc_namespaces                   r3   create_function_from_signaturer%  h  s    s~#g,,.... ILNOO 4 4 6 6@@ k k$R:*000T""" 3D 3 3T 3 3444455t55666 3D 3 3T 3 3444? 		k!!$''''%%d+++' F&&'='DEEE! k&&v0B'BCCCC&&&PRP[FhffahFhFh'ijjjjMM?_+LMMMNNIWW?????@@N!G!G4F!G!G!GHH&''' yy##Hyy&&HH(((III~~~7I7I7IKI >//11  N %0N=!)9N%& 	N### .))r4   r   r   
float8e4nvfp8e4nvfloat8e5fp8e5float8e4b15fp8e4b15float8_e4m3fn
float8e4b8fp8e4b8float8_e4m3fnuzfloat8_e5m2float8e5b16fp8e5b16float8_e5m2fnuzfloat16fp16bfloat16bf16float32r   float64fp64int8i8int16i16int32r   r   u8u16u32r   )int64uint8uint16uint32uint64c                       e Zd ZdZdZed             Zed             Zd Zedd            Z	d Z
d	 Zd
 Zd Zd Z	 	 ddZed             Zd Zd Zd Zd Z fdZd Z xZS )rC   Nr   c                >   t          | d          r| j        S t          | t                    rdS t          | t                    rd| k    r| dk    rdS d| k    r| dk    rdS d	S t          | t
                    rd
S | d S t          dt          |            d|            )Nr   r   r   r   r   r   r   r   r   r   zUnsupported type z for )r   r   r$   r   r   r    r   r<   r}   s    r3   _key_ofzJITFunction._key_of  s    3   	G9T"" 	G4S!! 	G33)#3#3u##"2"2uuU## 	G6[4ES		EEEEFFFr4   c                    t          | d          r%|                                 t          j        z  dk    S t	          | t
                    r| dz  dk    | dk    fS | d u fS )Nr   r   r   r   r   r   rC   divisibilityr$   r   r}   s    r3   _spec_ofzJITFunction._spec_of  sd    3
## 	-<<>>K$<<AAS!! 	-"HM3!8,,tr4   c                    ddl m} d fdt          | j        |          D             }d t          | j        |          D             } |t	          |          t	          |                    S )Nr   )AttrsDescriptorc                    t          | d          r%|                                 t          j        z  dk    S t	          | t
                    r| t          j        z  dk    S | dS dS )Nr   r   TFrL  )r  s    r3   is_divisible_by_16z3JITFunction._get_config.<locals>.is_divisible_by_16  sc    q*%% 9zz||k&>>!CCAs## 9;33q88yt5r4   c                H    h | ]\  }} |          |j         |j        S rI   )r   r   )rK   r   r~   rR  s      r3   r   z*JITFunction._get_config.<locals>.<setcomp>  sM     
 
 
s!!#&&
 05/F
I
 
 
r4   c                    h | ]C\  }}t          |t                    r)t          |t                    s|d k    5|j        <|j        DS )r   )r$   r   r   r   r   )rK   r   r~   s      r3   r   z*JITFunction._get_config.<locals>.<setcomp>  sa     
 
 
s#s##
 -7sD,A,A
 GJQhhW\Wnh IFNhhr4   )compilerrP  r  paramstuple)r0   rk   rP  divisible_by_16
equal_to_1rR  s        @r3   _get_configzJITFunction._get_config  s    ......	 	 	
 
 
 
!$+t44
 
 


 
!$+t44
 
 

 u_55uZ7H7HIIIr4   Fc                    | dS t          | t                    r| S t          |                               d          d         }t          |         }|rdnd}||z   S )N*i8r   r   r   r   )r$   rq   r   r   )ry   r   	dtype_str	const_strs       r3   _type_ofzJITFunction._type_of  sd     ;5S!! 	JHHNN3''+	.y9	$-DD#	9$$r4   c                J    t          t          | j        |                    }|S r6   )dictr  
constexprs)r0   constexpr_keyr   s      r3   _make_constantszJITFunction._make_constants  s!    T_m<<==	r4   c                   t           j        dS | j        j        }| j        j        }d                    d t          | j        |d                   D                       }	| d|j         d|j	         d|j
         d|j         d	|	 d
}
 G d d          }t          ||||d         ||          }||||j        |j	        |j
        |j        |j        ||d
}t                               ||
 ||||           d|i|dd          S )NFr  c                ,    g | ]\  }}|j          d | S )z: r'   )rK   r   r   s      r3   rM   z*JITFunction._call_hook.<locals>.<listcomp>"  s,    ___%*4444___r4   r   z[num_warps=z, num_ctas=z, num_stages=z, enable_fp_fusion=](r  c                      e Zd Zd ZdS )/JITFunction._call_hook.<locals>.JitFunctionInfoc                0    || _         || _        || _        d S r6   )ra   r'   jit_function)r0   ra   r'   rl  s       r3   r&   z8JITFunction._call_hook.<locals>.JitFunctionInfo.__init__'  s    $ 	$0!r4   N)rR   r\   r   r&   rI   r4   r3   JitFunctionInforj  %  s#            r4   rm  r   )
r   devicer   	num_warpsnum_ctas
num_stagesenable_fp_fusionextern_libsconfigsspecialization_datary   )ry   r   fncompileis_manual_warmupalready_compiled)rC   
cache_hookrv  rR   r\   r  r  rV  ro  rp  rq  rr  r  rs  )r0   ry   r   rn  r   r   rt  r'   ra   	arg_reprsr   rm  ru  r   s                 r3   
_call_hookzJITFunction._call_hook  s    !)5w#II__c$+WZ[\W]F^F^___``	  p  p7#4  p  pAQ  p  p`g`r  p  p  HO  H`  p  p  dm  p  p  p	 	 	 	 	 	 	 	 <D)YX_`aXbdkmpqq #" *(!, ' 8".#6
 
 %%vtT22C*6*"" & 
 
 	
r4   c                \    t          |          sJ | j                            |           dS )z
        Add a hook that will be executed prior to the execution of run
        function with args and kwargs passed into the kernel
        N)callablepre_run_hooksr  )r0   hooks     r3   add_pre_run_hookzJITFunction.add_pre_run_hookE  s3    
 ~~!!$'''''r4   c                f   ddl m}m}m}m} || _        || _        || _        || _        t          | j        | j                  | _        d t          | j                  D             | _
        d t          | j                  D             | _        d t          | j                  D             | _        dS )z1
        Precompute as much as possible.
        r   )CompiledKernelrw  	ASTSourcemake_backendc                &    g | ]\  }}|j         |S rI   r   rK   r   ps      r3   rM   z-JITFunction.create_binder.<locals>.<listcomp>W  s#    ![![![AAN![!![![![r4   c                &    g | ]\  }}|j         |S rI   r  r  s      r3   rM   z-JITFunction.create_binder.<locals>.<listcomp>X  s%    %c%c%cFQTUTb%ca%c%c%cr4   c                4    g | ]\  }}|j         |j        |S rI   )r   r   r  s      r3   rM   z-JITFunction.create_binder.<locals>.<listcomp>Y  s?     $
 $
 $
1a1;N$
YZYg$
$
 $
 $
r4   N)rU  r  rw  r  r  r%  r   rV  binder	enumerateconstexpr_indicesnon_constexpr_indicesspecialised_indices)r0   r  rw  r  r  s        r3   create_binderzJITFunction.create_binderM  s     	POOOOOOOOOOO,"(4T^T[QQ![![)DK2H2H![![![%c%ci6L6L%c%c%c"$
 $
%dk22$
 $
 $
   r4   c               <   & t           j                                        }t           j                            |          } j        |d<    j        D ]
} ||i |  j                                            j        |i |\  }}	}
}}d                    |	          t          |
|f          z   } j
        |                             |d           }|t           j                                        }                     |          }|                    |          }d|vs
J d            d|vs
J d            d|vs
J d            |D ]}||j        vrt!          d	|z            t#          |                                          } fd
 j        D             }|	d t)          |                   }d t+          ||          D             }  j        | f&&fdt+          | j                  D             }|                                D ]'\  }}t3          |          rt5          d| d          (                     |||||&          rd S                       ||&d                   }                     |||j                  }| j
        |         |<   t=                      } j                                        D ]?\  \  }}\  }}|                    ||          x} |k    rtA          d| d| d|            @|s|J t3          |          r ||          }t)          |          }!|d         }"|!dk    r|d         nd}#|!dk    r|d         nd}$ |j!        ||g|R  }% |j"        |"|#|$||j#        |j$        |% j%        j&         j%        j'        g	|R   |S )NdebugrS   device_typez=device_type option is deprecated; current target will be usedrn  z8device option is deprecated; current device will be usedstreamz8stream option is deprecated; current stream will be usedz2Keyword argument %s was specified but unrecognisedc                4    g | ]}j         |         j        S rI   )rV  r'   )rK   r   r0   s     r3   rM   z#JITFunction.run.<locals>.<listcomp>  s"    OOOqt{1~*OOOr4   c                *    i | ]\  }}||d k    rdn|S )r   r\  rI   )rK   rt   r   s      r3   r   z#JITFunction.run.<locals>.<dictcomp>  s*    ```AqF{{UU```r4   c                `    i | ]*\  }}|j         s|j        d          j        v s|"|j        |+S )r   )r   r   rY  r'   )rK   r   r  rt  s      r3   r   z#JITFunction.run.<locals>.<dictcomp>  sM       Q> &'Ugaj.C%C%Cqy GPyyr4   zCallable constexpr at index z is not supportedr   )r   r   rf   z1 has changed since we compiled this kernel, from z to r   r   )(r   activeget_current_deviceget_current_streamr  r  r  r  r  rq   cacherB   get_current_targetr  parse_optionsr   KeyErrorrW  valuesr  r   r  rZ  rV  r   r~  r   r|  r  rw  objectr.   ro   launch_metadatar   functionpacked_metadatar  launch_enter_hooklaunch_exit_hook)'r0   r   r   rk   r   rn  r  r  
bound_argssig_and_specr  r  excess_kwargsry   kernelr   backendr   rt   
bound_valssigkeyssigvalsr   r   r   r~   r1   not_presentr'   globals_dict_idrE   globals_dictnewVal	grid_sizegrid_0grid_1grid_2r  rt  s'   `                                     @r3   r   zJITFunction.run]  s   113311&99*w & 	" 	"DD$!&!!!!;   VaVZVacgVrkqVrVrS
L.2Dm ggl##c>=*I&J&JJF#''T22>]5577F''//G++F33G !...0o...6)))+e)))6)))+e)))" ] ]G,,,"#WZ[#[\\\ - z002233J POOOD4NOOOG"=CLL=1G``#gW^J_J_```I't'47G   !*dk::  I
 $//++ Y Y3C== Y#$W1$W$W$WXXXY sIvy'7SS t..y)WQZHHC\\( "  F
 '-DJvs# hh<@<Q<W<W<Y<Y 	q 	q8#T?%8c<&**4===#EE"otoo^aoogmooq q q F  	y###~~ ( tJ''D		I!WF )AT!WW1F )AT!WW1F 5f4T6WDVWWWOFJvvvvvH^`o*<d>Q>byewy y y yr4   c                L   |r|ng }| _         j        | _        || _        t	          j                  | _        || _        t	          j                  d         | _        fd| _	        || _
        d | _        g | _        t          | j        j                                                  D ]=\  }}	|o||v p|	j        |v }
| j                            t%          ||	|
                     >t'          j        t	          j                            | _        | j        t/          j        d| j        t.          j                                                  d          | _        t7          t8                    | _        d | _        i | _        d | _         tB          j"        #                    dd          dk    rdn|| _$        || _%        d | j        D             | _&        d	 | j        D             | _'        g | _(        j)        | _)        j*        | _*        j+        | _+        j        | _        d S )
Nr   c                ,    j         n
 |           S r6   )rR   )rv   rv  r   s    r3   r   z&JITFunction.__init__.<locals>.<lambda>  s    T\bkkttAww r4   z^def\s+\w+\s*\(TRITON_DEBUG0r   Tc                    g | ]	}|j         
S rI   rg  rK   r  s     r3   rM   z(JITFunction.__init__.<locals>.<listcomp>  s    666Q!&666r4   c                *    g | ]}|j         	|j        S rI   )r   r   r  s     r3   rM   z(JITFunction.__init__.<locals>.<listcomp>  s!    HHHQH15HHHr4   ),rv  r\   ra   versionr]   r   r   getsourcelinesstarting_line_numberr   r  r  rV  r  r  r  r'   r  r   textwrapdedent	getsourcer1   research	MULTILINEstartr   ra  r  hashr.   r  osenvironrB   r  rg   	arg_namesrb  r  r   rR   __globals__)r0   rv  r  r   r  rg   r   r  r   r   dnss    `    `    r3   r&   zJITFunction.__init__  s   1BJ--m *2..!2$+$:2$>$>q$A!FFFFF	.!$.";"B"B"D"DEE 	; 	;HAu#c.?)?)b5:QbCbCK{1eS99:::: ?7#4R#8#8998BI&8$(BLQQWWYYZZ[ &&
	 TV Z^^NC@@CGGTTU
  76$+666HH$+HHH   z>-r4   c                `   | j         t          | j        | j        | j                  }|                    |                                            |j        t          | j	                  z   | _         t          t          |j                                                            | _        | j         S )N)r'   r,   r1   )r  r   rR   r  r1   rJ   parser9   rq   r  ra  sortedr.   r   )r0   dependencies_finders     r3   rm   zJITFunction.cache_key  s     9"4$-QUQagkgo"p"p"p%%djjll333+/#d6O2P2PPDI$(0C0T0Z0Z0\0\)])]$^$^D!yr4   c               R     | j         t          t          j        |          |dd|S )NTr   )r   rj   
MockTensor
wrap_dtype)r0   r   rk   r   s       r3   r   zJITFunction.warmup  s.    txZ5JD1Q1QT$\\U[\\\r4   c           	        ddl m}m}m} dd l}dd lm t          j        	                                }|
                    |          }|d         | j        j        k    r%t          d|d          d| j        j                   fd|d                                         D             }t          |d	                                                   }	 || |	||                    |d
                             }
d |d                                         D             }|d         } ||
d |          }|| j        |         |<   |S )Nr   )rP  rw  r  r   r'   zSpecialization data is for z but trying to preload for c                z    i | ]7\  }}|j                             |          r                     |          n|8S rI   )r   is_dtype)rK   ry   rT   tls      r3   r   z'JITFunction.preload.<locals>.<dictcomp>  sR     
 
 
U BH$5$5e$<$<G%%
 
 
r4   r   r   r   c                b    i | ],\  }}|t          |t                    rt          |          n|-S rI   )r$   r   rW  r   s      r3   r   z'JITFunction.preload.<locals>.<dictcomp>  sG     
 
 
U E4!8!8Cue
 
 
r4   r   ry   )rU  rP  rw  r  r   triton.languagelanguager   r  r  loadsrv  rR   ro   r   ra  	from_dictr  )r0   ru  rP  rw  r  r   rn  deserialized_objr   r   r1   r   ry   r  r  s                 @r3   preloadzJITFunction.preload  s   BBBBBBBBBB$$$$$$1133::&9::F#tw'777u.>v.Fuucgcjcsuuw w w
 
 
 
.{;AACC
 
 
	 )+6<<>>??	iiO4M4MN^_fNg4h4hii
 
.y9??AA
 
 
 u%dG,,"(
63r4   c                    t          j        | j                  }t          |t           j                  sJ t          |j                  dk    sJ t          |j        d         t           j                  sJ |S )Nr   r   )r>   r  r1   r$   Moduler   bodyFunctionDef)r0   trees     r3   r  zJITFunction.parse  sg    y""$
+++++49~~""""$)A,88888r4   c                     t          d          )Nz:Cannot call @triton.jit'd outside of the scope of a kernel)ro   )r0   rk   r   s      r3   __call__zJITFunction.__call__   s    WXXXr4   c                x    t          t          |                               ||           |dk    r	d | _        d S d S )Nr1   )r%   rC   __setattr__r  )r0   r'   rT   r2   s      r3   r  zJITFunction.__setattr__#  s@    k4  ,,T5999 5==DIII =r4   c                2    d| j          d| j        j         dS )NzJITFunction(:r  )ra   rv  rR   r8   s    r3   __repr__zJITFunction.__repr__*  s"    ?dk??DG,<????r4   F)NNNNNN)rR   r\   r   rz  rM  staticmethodrJ  rN  rZ  r_  rd  r|  r  r  r   r&   r   rm   r   r  r  r  r  r  r   r   s   @r3   rC   rC     s       JLG G \G&   \J J J8 
% 
% 
% \
%  /
 /
 /
b( ( (
 
 
 X X Xt bf!%8( 8( 8( 8(t   X] ] ]  6  Y Y Y    @ @ @ @ @ @ @r4   rC   rv  JITFunction[T]c                    d S r6   rI   )rv  s    r3   jitr  3  s    Cr4   r  r   r  r   r  rg   r   Optional[Callable]r  r   Optional[Iterable[int]]r  Optional[bool]rg   Callable[[T], JITFunction[T]]c                    d S r6   rI   r  s         r3   r  r  8  s	     Cr4   Optional[T]4Union[JITFunction[T], Callable[[T], JITFunction[T]]]c               B    dfd}|  ||           S |S )a<  
    Decorator for JIT-compiling a function using the Triton compiler.

    :note: When a jit'd function is called, arguments are
        implicitly converted to pointers if they have a :code:`.data_ptr()` method
        and a `.dtype` attribute.

    :note: This function will be compiled and run on the GPU. It will only have access to:

           * python primitives,
           * builtins within the triton package,
           * arguments to this function,
           * other jit'd functions

    :param fn: the function to be jit-compiled
    :type fn: Callable
    rv  r   r   r  c           	         t          |           sJ t          j        dd          dk    rddlm}  ||           S t          |           S )NTRITON_INTERPRETr  r   r   )InterpretedFunction)r  r   r  rg   r   r  )r~  r  getenvinterpreterr  rC   )rv  r  r  r   r  rg   r   r  s     r3   	decoratorzjit.<locals>.decoratora  s|    ||9'--44888888&&r***"3! /   r4   Nrv  r   r   r  rI   )rv  r  r   r  r   r  rg   r  s    `````` r3   r  r  E  s\    8            
~y}} r4   c                  D    e Zd ZdZed             Zd Zed             ZdS )r  zr
    Can be used in place of real tensors when calling:
        kernel.warmup(MockTensor(torch.float32), ...)
    c                Z    | j         j        dk    r| j        dk    rt          |           S | S )Nr   torch)r2   rR   r\   r  r}   s    r3   r  zMockTensor.wrap_dtype  s/    =!W,,71J1Jc??"
r4   c                    || _         d S r6   r   )r0   r   s     r3   r&   zMockTensor.__init__  s    


r4   c                     dS )Nr   rI   rI   r4   r3   r   zMockTensor.data_ptr  s    qr4   N)rR   r\   r   r   r  r  r&   r   rI   r4   r3   r  r  }  sc         
   \
     \  r4   r  c                  @    e Zd Zd Zd Zd ZddZd Zd Zd	 Z	d
 Z
dS )TensorWrapperc                t    || _         || _        |j        | _        |j        | _        | j        j        | _        d S r6   )r   basedatarn  shape)r0   r	  r   s      r3   r&   zTensorWrapper.__init__  s1    
	I	kY_


r4   c                4    | j                                         S r6   )r	  r   r8   s    r3   r   zTensorWrapper.data_ptr  s    y!!###r4   c                6    | j                             |          S r6   )r	  stride)r0   r   s     r3   r  zTensorWrapper.stride  s    y"""r4   r   rq   c                (    d| j          d| j         dS )NzTensorWrapper[rh  r  )r   r	  r8   s    r3   __str__zTensorWrapper.__str__  s    :
::di::::r4   c                4    | j                                         S r6   )r	  element_sizer8   s    r3   r  zTensorWrapper.element_size  s    y%%'''r4   c                Z    t          | j                                        | j                  S r6   )r  r	  cpur   r8   s    r3   r  zTensorWrapper.cpu  s    TY]]__dj999r4   c                D    | j                             |j                    d S r6   )r	  copy_)r0   others     r3   r  zTensorWrapper.copy_  s    	
#####r4   c                \    t          | j                            |          | j                  S r6   )r  r	  tor   )r0   rn  s     r3   r  zTensorWrapper.to  s"    TY\\&114:>>>r4   Nr   rq   )rR   r\   r   r&   r   r  r  r  r  r  r  rI   r4   r3   r  r    s        % % %$ $ $# # #; ; ; ;( ( (: : :$ $ $? ? ? ? ?r4   r  c                   t          | t                    r,|| j        j        k    r| j        S t          | j        |          S t	          | d          rt          | |          S t          dt          |            d          )Nr   zCannot reinterpret a r   )r$   r  r	  r   r   r   r<   )tensorr   s     r3   reinterpretr    s    &-(( AFK%%%; !e444		$	$ AVU+++?V???@@@r4   r  r  r   )r   r  r  r  r   r  r  r  rg   r  r   r  r6   )rv  r  r   r  r  r  r   r  r  r  rg   r  r   r  )4
__future__r   r   r>   r(   r]   rh   r  r  r  collectionsr   	functoolsr   typingr   r   r	   r
   r   r   r   r   r   r   runtime.driverr   typesr   rR   r   rV   r   NodeVisitorr   r   r   r   r   r   r   r  r%  r   r   r  r   rC   r  r  r  r  rI   r4   r3   <module>r%     sy   , , , , , , , , 



       				 				  # # # # # # % % % % % % d d d d d d d d d d d d d d d d d d d d d d d d # # # # # #      .33~..../GCLLQ! Q! Q! Q! Q! Q! Q! Q!r   +> +> +> +> +> +> +> +>\
 
 
 	   2	Z 	Z 	Z 	Z 	Zgaj 	Z 	Z 	Z  >* >* >*B
D)  :	
 Y ) y 7 : z v  v v D  U!" U#$ -   2 
(//11	2	2 & &A$%q!!e@ e@ e@ e@ e@/!$ e@ e@ e@Z 
   
 
 #*.15 #	 	 	 	 	 
	 0 #*.15 #0 0 0 0 0 0p       (? ? ? ? ? ? ? ?>A A A A Ar4   