
    çg                        d dl Z d dlmZ d dlZd dlZd dlZd dlmZ	 d dl
mZ ddlmZ d dlmZ ddlmZ dd	lmZ  G d
 d          Z G d d          Z ed           G d d                      Zd Zd Zd Zd Zd Z ej        eej        g          Z ej        eej         g          Z! ej        eej"        g          Z# G d d          Z$ G d d          Z%d Z&d Z'd Z( G d d           Z) G d! d"e)          Z* G d# d$e)          Z+d% Z,d& Z-d' Z.d( Z/ e%            Z0g d)Z1 G d* d+          Z2 G d, d-          Z3dS ).    N)Tuple)	dataclass   )InterpreterError)partial   )interpreter)irc                   ,    e Zd Zd Zd Zd Zd Zd ZdS )TensorHandlec                 0    || _         || _        i | _        dS )a  
            data: numpy array
            dtype: triton type, either pointer_type or scalar_type.
            we don't store block_type here because the shape information is already availale in the data field
            attr: a dictionary of attributes
        N)datadtypeattr)selfr   r   s      V/var/www/html/ai-engine/env/lib/python3.11/site-packages/triton/runtime/interpreter.py__init__zTensorHandle.__init__   s     	
			    c                 N    t          | j                                                  S N)boolr   allr   s    r   __bool__zTensorHandle.__bool__   s    DIMMOO$$$r   c                 b    | j         }t          |d          r|j        }t          |d          |S )N
element_ty)r   hasattrr   )r   r   s     r   get_element_tyzTensorHandle.get_element_ty    s<    
e\** 	%$E e\** 	%r   c                 Z    t          | j                                        | j                  S r   )r   r   copyr   r   s    r   clonezTensorHandle.clone&   s     DINN,,dj999r   c                     || j         |<   d S r   )r   )r   keyvalues      r   set_attrzTensorHandle.set_attr)   s    	#r   N)__name__
__module____qualname__r   r   r   r!   r%    r   r   r   r      s_        	 	 	% % %  : : :    r   r   c                       e Zd Zd Zd ZdS )BlockPointerHandlec                 Z    || _         || _        || _        || _        || _        || _        d S r   )baseshapestridesoffsetstensor_shapeorder)r   r-   r.   r/   r0   r1   r2   s          r   r   zBlockPointerHandle.__init__/   s1    	
(


r   c                    | j                                         }|j        dz  }| j        }t	          j        | j         j        | j                  }t	          j        | j        t                    }t          t          |                    D ]}dgt          |          z  }||         ||<   | j        |         j        t	          j        ||                   z                       |          }	|||	z  | j        |         j        z                      t          j                  z   }||v r)t	          j        ||	| j        |         j        k               }t'          || j         j        j                  }||fS )N   r   r   )r-   r   primitive_bitwidthr1   npbroadcast_tor   onesr   rangelenr0   arangereshaper/   astypeuint64logical_andr.   r   r   scalar)
r   boundary_checkdtype_ttn_bytesr1   ptrsmasksdim
bcast_dimsoffs
             r   materialize_pointersz'BlockPointerHandle.materialize_pointers7   sF   9++---2(ty~t/@AA)666\**++ 	J 	JCs<000J*3/JsO<$)BIl36G,H,HHQQR\]]C7S=4<+<+AAII")TTTDn$$ucDJsO4H.HIID$)/"899U{r   N)r&   r'   r(   r   rJ   r)   r   r   r+   r+   -   s2              r   r+   T)frozenc                       e Zd ZU dZeed<   dZeed<   dZe	ed<   dZ
eed<   dZeed<   d	Ze	ed
<   dZee	         ed<   dZeed<   dS )InterpreterOptionsNextern_libsFdebugarchTallow_fp8e4nvallow_fp8e4b15tf32default_dot_input_precision)rS   tf32x3ieeeallowed_dot_input_precisionsr   max_num_imprecise_acc_default)r&   r'   r(   rN   dict__annotations__rO   r   rP   strrQ   rR   rT   rW   r   rX   intr)   r   r   rM   rM   H   s         KE4D#M4ND'----/I %*III)*!3*****r   rM   c                     | t           j        k    rt           j        S | t           j        k    rt           j        S | t           j        k    rt           j        S | t           j        k    rt           j        S | S r   )	r7   uint8int8uint16int16uint32int32r?   int64r5   s    r   _get_signed_np_dtypere   T   sW    w	x	x	xLr   c                 
   t          | t          j                  rt          j        t          j                  S i t          j        t          j        t                    t          j        t          j        t          j                  t          j	        t          j        t          j	                  t          j
        t          j        t          j
                  t          j        t          j        t          j                  t          j        t          j        t          j                  t          j        t          j        t          j                  t          j        t          j        t          j                  t          j        t          j        t          j                  t          j        t          j        t          j                  t          j        t          j        t          j                  t          j        t          j        t          j                  t          j        t          j        t          j                  t          j        t          j        t          j                  t          j        t          j        t          j                  t          j        t          j        t          j                  t          j        t          j        t          j                  t          j        t          j        t          j                  i}t          | t          j                  rJt          | j        t          j                  rt          j        t          j                  S || j                 S ||          S r   )
isinstancetlpointer_typer7   r   r?   int1r   float16float32float64r_   r^   ra   r`   rc   rb   rd   bfloat16float8e5float8e5b16
float8e4nv
float8e4b8float8e4b15
block_typer   )tt_dtypenp_typess     r   _get_np_dtyperw   `   sX   (BO,, #x	"""
$

BHRZ(( 	
BHRZ(( 	
BHRZ((	
 	"'"" 	"(28$$ 	"(28$$ 		28BI&& 	"(28$$ 		28BI&& 	"(28$$ 		28BI&& 	RXbi((  	RXbh''!" 	**#$ 	rx))%& 	rx))'( 	**) H, (BM** -h)2?;; 	'8BI&&&+,,Hr   c                    t          t          d|j                   }t          t          d|j                   }t          j        |                                 |          }||j        dz
  z	  dz  }|j        |j        z
  dz
  }|j        |j        z
  dz
  }	|d|j        z  dz
  z  }
|j        }|j        }||j        z	  d|z  dz
  z                      t          j                  }|dk    }t          j	        |          rt          j
        |t          j                  }t          |j                  D ]}|
|z	  dz  }|j        |z
  ||dk    <   |
dk    }d||         z
  ||<   ||z
  |||z  <   |
|         ||         z  d|j        z  dz
  z  |
|<   t          j        dt          j        ||z
  |z   d|	z  dz
                      }|                    |          }|                    |          }|j        |j        k    rl|
|j        |j        z
  z	  d|j        z  dz
  z  }|t          j        j        k    r!|
d|j        |j        z
  dz
  z  z  }||dk    z   }|                    |          }n3|
                    |          |j        |j        z
  z  d|j        z  dz
  z  }|dk    }t          j	        |          r||j        z	  d|z  dz
  z                      t          j                  }|dk    }||z  }t          j
        |t          j                  }d|z
  ||         |z
  z
  ||<   ||         ||         z	  d|j        ||         z
  z  z  ||<   ||j        dz
  z  ||j        z  z  |z  }|                    | j                  S )Nuintr5   r   r   )getattrr7   r6   
frombuffertobytesfp_mantissa_widthexponent_biasr>   rc   any
zeros_liker:   maximumminimum_irROUNDING_MODERTNEr=   r.   )inputinput_dtypeoutput_dtyperounding_modeinput_uint_dtypeoutput_unint_dtype	input_binsigninput_exponent_widthoutput_exponent_widthsignificand
bias_inputbias_outputexponentsubnormal_indexbit_posi	bit_indexzero_significand_indexexponent_outputsign_outputsignificand_outputcut_offnon_zero_exponent_indexshiftoutputs                             r   _convert_floatr      s   r#J+*H#J#JKK %ML,K%M%MNNemmoo5EFFFI+81<=ED&9K<YY\]](;l>\\_``[%B BaGHK*J,Kk;;FZAZ^_@_`hhikiqrrH!mO	vo 6
 -	:::{455 	H 	HA%*d2I&1&Ca&GGIN##!,!1$%(@$@!=G+=U'/9:(3O(DP_H`(`+//14(6O$ jBJ:0E0SWX\qWquvVv$w$wxxO%,,-?@@O++011K%(GGG)k.KlNl.lm,00A57C-222!Q;+H<Ki+ilm+m%noG!3w{!C/667IJJ)001CDD+=@]]_#$(F#F!"KM &*O	vo L
 +"??QJ^E^bcDcdllmomuvv"*a-),CCirx888"#k/h6OR\6\!]o/A//RV[\kVl/l,053IIJ/L?+l=AB<99;=OPF>>%+&&&r   c                 *    t          j        |           S r   )matherfxs    r   _erfr      s    8A;;r   c                 F    t          |           t          |          z  dz	  S )N@   )r\   )abs     r   
_umulhi_64r      s     FFSVVO""r   )otypesc                   $    e Zd Zed             ZdS )ExtraFunctionsc                 `    t          j        |                    | j        ||          |          S r   )rh   tensorcreate_fp_to_fphandle)r   dst_tyfp_downcast_rounding_builders       r   _convert_custom_typesz$ExtraFunctions._convert_custom_types   s*    y11%,H\]]_efffr   N)r&   r'   r(   staticmethodr   r)   r   r   r   r      s2        g g \g g gr   r   c                      e Zd Zej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j	        ej        j	        iZ
ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        i
ZddZd Zd Zd Zd Zd Zd	 Zd
 Zd Z d Z!d Z"d Z#d Z$d Z%d Z&d Z'd Z(d Z)d Z*d Z+d Z,d Z-d Z.d Z/d Z0d Z1d Z2d Z3d Z4d  Z5d! Z6d" Z7d# Z8d$ Z9d% Z:d& Z;d' Z<d( Z=d) Z>d* Z?d+ Z@d, ZAd- ZBd. ZCd/ ZDd0 ZEd1 ZFd2 ZGd3 ZHd4 ZId5 ZJd6 ZKd7 ZLd8 ZMd9 ZNd: ZOd; ZPd< ZQd= ZRd> ZSd? ZTd@ ZUdA ZVdB ZWdC ZXdD ZYdE ZZdF Z[dG Z\dH Z]dI Z^dJ Z_dK Z`dL ZadM ZbdN ZcdO ZddP ZedQ ZfdR ZgdS ZhdT ZidU ZjdV ZkdW ZldX ZmdY ZndZ Zod[ Zpd\ Zqd] Zrd^ Zsd_ Ztd` Zuda Zvdb Zwdc Zxdd Zyde Zzdf Z{dg Z|dh Z}di Z~dj Zdk Zdl Zdm Zdn Zdo Zdp Zdq Zdr Zds Zdt Zdu Zdv Zdw Zdx Zdy Zdz Zd{ Zd| Zd} Zd~ Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd ZdS )InterpreterBuilderreturnNc                 p    d | _         t                      | _        i | _        t          j        | j        d<   d S )Nconvert_custom_types)rP   rM   optionscodegen_fnsr   r   r   s    r   r   zInterpreterBuilder.__init__   s5    	)++3A3W/000r   c                     || j         d         k     st          d          || j         d         k     st          d          || j         d         k     st          d          |||f| _        d S )Nr   zx >= grid_dim[0]r   zy >= grid_dim[1]r   zz >= grid_dim[2])grid_dim
ValueErrorgrid_idxr   r   yzs       r   set_grid_idxzInterpreterBuilder.set_grid_idx   sv    4=###/0004=###/0004=###/000Aq	r   c                     |||f| _         d S r   )r   )r   nxnynzs       r   set_grid_dimzInterpreterBuilder.set_grid_dim   s    Rr   c                     t           j        S r   )rh   rk   r   s    r   get_half_tyzInterpreterBuilder.get_half_ty   
    zr   c                     t           j        S r   )rh   rn   r   s    r   get_bf16_tyzInterpreterBuilder.get_bf16_ty  
    {r   c                     t           j        S r   )rh   rl   r   s    r   get_float_tyzInterpreterBuilder.get_float_ty  r   r   c                     t           j        S r   )rh   rm   r   s    r   get_double_tyz InterpreterBuilder.get_double_ty	  r   r   c                     t           j        S r   )rh   r_   r   s    r   get_int8_tyzInterpreterBuilder.get_int8_ty  s	    wr   c                     t           j        S r   )rh   r^   r   s    r   get_uint8_tyzInterpreterBuilder.get_uint8_ty  	    xr   c                     t           j        S r   )rh   ra   r   s    r   get_int16_tyzInterpreterBuilder.get_int16_ty  r   r   c                     t           j        S r   )rh   r`   r   s    r   get_uint16_tyz InterpreterBuilder.get_uint16_ty  
    yr   c                     t           j        S r   )rh   rc   r   s    r   get_int32_tyzInterpreterBuilder.get_int32_ty  r   r   c                     t           j        S r   )rh   rb   r   s    r   get_uint32_tyz InterpreterBuilder.get_uint32_ty  r   r   c                     t           j        S r   )rh   rd   r   s    r   get_int64_tyzInterpreterBuilder.get_int64_ty  r   r   c                     t           j        S r   )rh   r?   r   s    r   get_uint64_tyz InterpreterBuilder.get_uint64_ty!  r   r   c                     t           j        S r   )rh   rq   r   s    r   get_fp8e4nv_tyz!InterpreterBuilder.get_fp8e4nv_ty$  
    }r   c                     t           j        S r   )rh   rs   r   s    r   get_fp8e4b15_tyz"InterpreterBuilder.get_fp8e4b15_ty'  
    ~r   c                     t           j        S r   )rh   rr   r   s    r   get_fp8e4b8_tyz!InterpreterBuilder.get_fp8e4b8_ty*  r   r   c                     t           j        S r   )rh   ro   r   s    r   get_fp8e5_tyzInterpreterBuilder.get_fp8e5_ty-  r   r   c                     t           j        S r   )rh   rp   r   s    r   get_fp8e5b16_tyz"InterpreterBuilder.get_fp8e5b16_ty0  r   r   c                 ,    t          j        ||          S r   )rh   ri   )r   elt_ty
addr_spaces      r   
get_ptr_tyzInterpreterBuilder.get_ptr_ty3  s    vz222r   c                 ,    t          j        ||          S r   )rh   rt   )r   r   r.   s      r   get_block_tyzInterpreterBuilder.get_block_ty6  s    }UE***r   c                 t    t          t          j        |gt          j                  t          j                  S Nr5   )r   r7   arraybool_rh   rj   r   r$   s     r   get_int1zInterpreterBuilder.get_int19  s'    BHeWBH===rwGGGr   c                 t    t          t          j        |gt          j                  t          j                  S r   )r   r7   r   r^   rh   r   s     r   	get_uint8zInterpreterBuilder.get_uint8<  '    BHeWBH===rxHHHr   c                 t    t          t          j        |gt          j                  t          j                  S r   )r   r7   r   r_   rh   r   s     r   get_int8zInterpreterBuilder.get_int8?  s'    BHeWBG<<<bgFFFr   c                 t    t          t          j        |gt          j                  t          j                  S r   )r   r7   r   r`   rh   r   s     r   
get_uint16zInterpreterBuilder.get_uint16B  '    BHeWBI>>>	JJJr   c                 t    t          t          j        |gt          j                  t          j                  S r   )r   r7   r   ra   rh   r   s     r   	get_int16zInterpreterBuilder.get_int16E  r  r   c                 t    t          t          j        |gt          j                  t          j                  S r   )r   r7   r   rb   rh   r   s     r   
get_uint32zInterpreterBuilder.get_uint32H  r  r   c                 t    t          t          j        |gt          j                  t          j                  S r   )r   r7   r   rc   rh   r   s     r   	get_int32zInterpreterBuilder.get_int32K  r  r   c                 t    t          t          j        |gt          j                  t          j                  S r   )r   r7   r   r?   rh   r   s     r   
get_uint64zInterpreterBuilder.get_uint64N  r  r   c                 t    t          t          j        |gt          j                  t          j                  S r   )r   r7   r   rd   rh   r   s     r   	get_int64zInterpreterBuilder.get_int64Q  r  r   c                 t    t          t          j        |gt          j                  t          j                  S r   )r   r7   r   rk   rh   r   s     r   get_fp16zInterpreterBuilder.get_fp16T  '    BHeWBJ???LLLr   c                 t    t          t          j        |gt          j                  t          j                  S r   )r   r7   r   rl   rh   r   s     r   get_fp32zInterpreterBuilder.get_fp32W  r  r   c                 t    t          t          j        |gt          j                  t          j                  S r   )r   r7   r   rm   rh   r   s     r   get_fp64zInterpreterBuilder.get_fp64Z  r  r   c                 f    t          t          j        dgt          |                    |          S Nr   r5   )r   r7   r   rw   )r   types     r   get_null_valuez!InterpreterBuilder.get_null_value]  s+    BHaSd0C0CDDDdKKKr   c                     | j         t          d          t          t          j        | j         |         gt          j                  t          j                  S )Nzgrid_idx is Noner5   )r   r   r   r7   r   rc   rh   r   axiss     r   create_get_program_idz(InterpreterBuilder.create_get_program_ida  sF    = /000BHdmD&9%:"(KKKRXVVVr   c                     t          t          j        | j        |         gt          j                  t
          j                  S r   )r   r7   r   r   rc   rh   r  s     r   create_get_num_programsz*InterpreterBuilder.create_get_num_programsf  s/    BHdmD&9%:"(KKKRXVVVr   c                     t          t          j        |j        t                    t
          j                  }d }|                     ||||||          S r   )r   r7   	ones_liker   r   rh   rj   create_masked_load)r   ptr_0_1is_volatilemaskothers          r   create_loadzInterpreterBuilder.create_loadj  sG    BL>>>HH&&sD%RMMMr   c                     t          t          j        |j        t                    t
          j                  }|                     |||d d           S r   )r   r7   r$  r   r   rh   rj   create_masked_store)r   r&  valr'  r(  r*  s         r   create_storezInterpreterBuilder.create_storeo  s@    BL>>>HH''S$dCCCr   c                 
   |                                 }t          |          }|)t          t          j        |j        |          |          }t          j        |j        |j        |j        |          }	t          |	|          S r   )r   rw   r   r7   r   r   _interpreterload)
r   rE   r*  r+  cache_modifiereviction_policyr)  rC   dtype_nprets
             r   r%  z%InterpreterBuilder.create_masked_loads  sq    &&(( **= ty!I!I!I8TTE	49ej(KKC***r   c                 L    t          j        |j        |j        |j                  S r   )r2  storer   )r   rE   r$   r*  r4  r5  s         r   r.  z&InterpreterBuilder.create_masked_store{  s    !$)UZCCCr   c                    |j         j        }|j        }|t          j        k    r|t          j        k    s |t          j        k    r\|t          j        k    rLt          |j        ||d                               t          |                    }t          ||j                  S t          |j        
                    t          |                    |j                  S r   )r   rA   rh   rn   rl   r   r   viewrw   r   r>   )r   srcdst_typesrc_element_typedst_element_typer   s         r   	cast_implzInterpreterBuilder.cast_impl  s    9+#?++0@BJ0N0N
**/?2;/N/N!#(,<>NPTUUZZ[hiq[r[rssDho666h0G0G H H(/ZZZr   c                 .    |                      ||          S r   r@  r   r<  r=  s      r   <lambda>zInterpreterBuilder.<lambda>      $..h2O2O r   c                 .    |                      ||          S r   rB  rC  s      r   rD  zInterpreterBuilder.<lambda>  rE  r   c                 .    |                      ||          S r   rB  rC  s      r   rD  zInterpreterBuilder.<lambda>  rE  r   c                 .    |                      ||          S r   rB  rC  s      r   rD  zInterpreterBuilder.<lambda>  rE  r   c                 .    |                      ||          S r   rB  rC  s      r   rD  zInterpreterBuilder.<lambda>  s    sH0M0M r   c                 .    |                      ||          S r   rB  rC  s      r   rD  zInterpreterBuilder.<lambda>  rE  r   c                 .    |                      ||          S r   rB  )r   r<  r=  	is_signeds       r   rD  zInterpreterBuilder.<lambda>  s    T^^CQY=Z=Z r   c                     |j         j        }|j        }t          |j        |||                              t          |                    }t          ||j                  S r   )r   rA   r   r   r;  rw   r   )r   r<  r=  r   r>  r?  r   s          r   r   z"InterpreterBuilder.create_fp_to_fp  sV    9+#?ch(8:JMZZ__`mnv`w`wxxD(/222r   c                 v    t          |j                            t          |                    |j                  S r   )r   r   r;  rw   rA   rC  s      r   create_bitcastz!InterpreterBuilder.create_bitcast  s*    CHMM-*A*ABBHOTTTr   c                 ^    t           ||j        |j                  |j        j                  S r   r   r   r   rA   )r   lhsrhsops       r   	binary_opzInterpreterBuilder.binary_op  s'    BBsx22CI4DEEEr   c                 D    |                      ||t          j                  S r   rU  r7   addr   rR  rS  s      r   rD  zInterpreterBuilder.<lambda>  s    S"&)I)I r   c                 D    |                      ||t          j                  S r   rU  r7   multiplyrY  s      r   rD  zInterpreterBuilder.<lambda>      S"+)N)N r   c                 D    |                      ||t          j                  S r   rU  r7   dividerY  s      r   rD  zInterpreterBuilder.<lambda>  s    S"))L)L r   c                 D    |                      ||t          j                  S r   )rU  r7   	remainderrY  s      r   rD  zInterpreterBuilder.<lambda>  s    S",)O)O r   c                 D    |                      ||t          j                  S r   rU  r7   subtractrY  s      r   rD  zInterpreterBuilder.<lambda>  r]  r   c                 D    |                      ||t          j                  S r   r[  rY  s      r   rD  zInterpreterBuilder.<lambda>      sC(M(M r   c                 D    |                      ||t          j                  S r   r_  rY  s      r   rD  zInterpreterBuilder.<lambda>  s    S")1T1T r   c                 .    |                      ||          S r   create_idivrY  s      r   rD  zInterpreterBuilder.<lambda>      )9)9#s)C)C r   c                 .    |                      ||          S r   rj  rY  s      r   rD  zInterpreterBuilder.<lambda>  rl  r   c                 D    |                      ||t          j                  S r   rU  r7   fmodrY  s      r   rD  zInterpreterBuilder.<lambda>      S"')J)J r   c                 D    |                      ||t          j                  S r   ro  rY  s      r   rD  zInterpreterBuilder.<lambda>  rq  r   c                 D    |                      ||t          j                  S r   rW  rY  s      r   rD  zInterpreterBuilder.<lambda>  s    sC(H(H r   c                 D    |                      ||t          j                  S r   rd  rY  s      r   rD  zInterpreterBuilder.<lambda>  rg  r   c                 D    |                      ||t          j                  S r   )rU  r7   
left_shiftrY  s      r   rD  zInterpreterBuilder.<lambda>  s    sC(O(O r   c                 D    |                      ||t          j                  S r   )rU  r7   right_shiftrY  s      r   rD  zInterpreterBuilder.<lambda>  s    S".)Q)Q r   c                 D    |                      ||t          j                  S r   rU  r7   r   rY  s      r   rD  zInterpreterBuilder.<lambda>      $..c2:*N*N r   c                 D    |                      ||t          j                  S r   rz  rY  s      r   rD  zInterpreterBuilder.<lambda>  r{  r   c                 D    |                      ||t          j                  S r   rz  rY  s      r   rD  zInterpreterBuilder.<lambda>      T^^Cbj-Q-Q r   c                 D    |                      ||t          j                  S r   rz  rY  s      r   rD  zInterpreterBuilder.<lambda>      DNN3RZ,P,P r   c                 D    |                      ||t          j                  S r   rU  r7   r   rY  s      r   rD  zInterpreterBuilder.<lambda>  r{  r   c                 D    |                      ||t          j                  S r   r  rY  s      r   rD  zInterpreterBuilder.<lambda>  r{  r   c                 D    |                      ||t          j                  S r   r  rY  s      r   rD  zInterpreterBuilder.<lambda>  r~  r   c                 D    |                      ||t          j                  S r   r  rY  s      r   rD  zInterpreterBuilder.<lambda>  r  r   c                 D    |                      ||t          j                  S r   rU  r7   
less_equalrY  s      r   rD  zInterpreterBuilder.<lambda>      DNN3R],S,S r   c                 D    |                      ||t          j                  S r   rU  r7   lessrY  s      r   rD  zInterpreterBuilder.<lambda>      DNN3RW,M,M r   c                 D    |                      ||t          j                  S r   rU  r7   greater_equalrY  s      r   rD  zInterpreterBuilder.<lambda>      DNN3REU,V,V r   c                 D    |                      ||t          j                  S r   rU  r7   greaterrY  s      r   rD  zInterpreterBuilder.<lambda>  r  r   c                 D    |                      ||t          j                  S r   r  rY  s      r   rD  zInterpreterBuilder.<lambda>  r  r   c                 D    |                      ||t          j                  S r   r  rY  s      r   rD  zInterpreterBuilder.<lambda>  r  r   c                 D    |                      ||t          j                  S r   r  rY  s      r   rD  zInterpreterBuilder.<lambda>  r  r   c                 D    |                      ||t          j                  S r   r  rY  s      r   rD  zInterpreterBuilder.<lambda>  r  r   c                 D    |                      ||t          j                  S r   rU  r7   equalrY  s      r   rD  zInterpreterBuilder.<lambda>  s    4>>#sBH+M+M r   c                 D    |                      ||t          j                  S r   rU  r7   	not_equalrY  s      r   rD  zInterpreterBuilder.<lambda>  s    4>>#sBL+Q+Q r   c                 D    |                      ||t          j                  S r   r  rY  s      r   rD  zInterpreterBuilder.<lambda>  r  r   c                 D    |                      ||t          j                  S r   r  rY  s      r   rD  zInterpreterBuilder.<lambda>  r  r   c                 D    |                      ||t          j                  S r   r  rY  s      r   rD  zInterpreterBuilder.<lambda>  r  r   c                 D    |                      ||t          j                  S r   r  rY  s      r   rD  zInterpreterBuilder.<lambda>  r  r   c                 D    |                      ||t          j                  S r   r  rY  s      r   rD  zInterpreterBuilder.<lambda>      DNN3RX,N,N r   c                 D    |                      ||t          j                  S r   r  rY  s      r   rD  zInterpreterBuilder.<lambda>      DNN3R\,R,R r   c                 D    |                      ||t          j                  S r   r  rY  s      r   rD  zInterpreterBuilder.<lambda>  r  r   c                 D    |                      ||t          j                  S r   r  rY  s      r   rD  zInterpreterBuilder.<lambda>  r  r   c                 D    |                      ||t          j                  S r   r  rY  s      r   rD  zInterpreterBuilder.<lambda>  r  r   c                 D    |                      ||t          j                  S r   r  rY  s      r   rD  zInterpreterBuilder.<lambda>  r  r   c                 D    |                      ||t          j                  S r   r  rY  s      r   rD  zInterpreterBuilder.<lambda>  r  r   c                 D    |                      ||t          j                  S r   r  rY  s      r   rD  zInterpreterBuilder.<lambda>  r  r   c                 D    |                      ||t          j                  S r   )rU  r7   bitwise_andrY  s      r   rD  zInterpreterBuilder.<lambda>      sC(P(P r   c                 D    |                      ||t          j                  S r   )rU  r7   bitwise_xorrY  s      r   rD  zInterpreterBuilder.<lambda>  r  r   c                 D    |                      ||t          j                  S r   )rU  r7   
bitwise_orrY  s      r   rD  zInterpreterBuilder.<lambda>  s    t~~c3'N'N r   c                     t          |j        t          j        |j        |j                  z
  |j        z  |j        j                  S r   )r   r   r7   rp  r   rA   rY  s      r   rk  zInterpreterBuilder.create_idiv  s9     SX#((C(CCPRUR[Rbcccr   c                 $   t          |j        j                  }t          |j        j                  }|j                            |          |_        |j                            |          |_        |                     ||t
          j                  S r   )re   r   r   r>   rU  r7   rx  )r   rR  rS  	lhs_dtype	rhs_dtypes        r   create_ashrzInterpreterBuilder.create_ashr  sf    (88	(88	8??9--8??9--~~c3777r   c                    |j         j        }|t          j        k    s|t          j        k    r2t          t          |j         |j                   |j        j                  S t          t          d|j	        dz  dz             }|j         
                    |          }|j         
                    |          }t          j        ||          |j	        dz  z	  }t          |
                    |          |j        j                  S )Nry   r4   r   )r   r   r7   rd   r?   r   np_umulhi_u64rA   rz   itemsizer>   r\  )r   rR  rS  r   compute_dtypelhs_datarhs_dataret_datas           r   create_umulhiz InterpreterBuilder.create_umulhi  s    BH 2 2ch A A39CSTTT#B(Gu~/AA/E(G(GHHMx}55Hx}55H{8X665>A;MNH 6 6	8HIIIr   c                 j    t           ||j        |j        |j                  |j        j                  S r   rQ  )r   rR  rS  r+  rT  s        r   
ternary_opzInterpreterBuilder.ternary_op  s+    BBsx5:>>@RSSSr   c                 F    |                      |||t          j                  S r   )r  r7   clip)r   arglohipropagate_nanss        r   rD  zInterpreterBuilder.<lambda>  s    doocSUWY[][b>c>c r   c                 F    |                      |||t          j                  S r   )r  r7   where)r   condrR  rS  s       r   rD  zInterpreterBuilder.<lambda>  s    sCQSQY1Z1Z r   c                 `    t          |j        |j        z  |j        z   |j        j                  S r   rQ  r   s       r   
create_fmazInterpreterBuilder.create_fma  s%    AFQVOaf4agnEEEr   c                 R    t           ||j                  |j        j                  S r   rQ  )r   r  rT  s      r   unary_opzInterpreterBuilder.unary_op  s!    BBsxLL#)*:;;;r   c                     |j         }|j        dz
  }t          t          d|j                   }|j                            |          }d|z  dz
  }||z                      t          |                    }t          ||j         j                  S )Nr   ry   )	r   r6   rz   r7   r   r;  rw   r   rA   )r   r  rC   mask_bitwidthnp_uint_dtyper   r*  r7  s           r   create_fabszInterpreterBuilder.create_fabs  s    9 3a7$H8+F$H$HIIx}}]++]"a'd{  x!8!899C!1222r   c                 B    |                      |t          j                  S r   )r  r7   cosr   r  s     r   rD  zInterpreterBuilder.<lambda>      4==bf#=#= r   c                 B    |                      |t          j                  S r   )r  r7   expr  s     r   rD  zInterpreterBuilder.<lambda>  r  r   c                 B    |                      |t          j                  S r   )r  r7   exp2r  s     r   rD  zInterpreterBuilder.<lambda>      DMM#rw$?$? r   c                 B    |                      |t          j                  S r   )r  r7   absr  s     r   rD  zInterpreterBuilder.<lambda>  s    DMM#rv$>$> r   c                 B    |                      |t          j                  S r   )r  r7   floorr  s     r   rD  zInterpreterBuilder.<lambda>  s    T]]3%A%A r   c                 B    |                      |t          j                  S r   )r  r7   ceilr  s     r   rD  zInterpreterBuilder.<lambda>  r  r   c                 B    |                      |t          j                  S r   )r  r7   logr  s     r   rD  zInterpreterBuilder.<lambda>  r  r   c                 B    |                      |t          j                  S r   )r  r7   log2r  s     r   rD  zInterpreterBuilder.<lambda>  r  r   c                 B    |                      |t          j                  S r   r  r7   sqrtr  s     r   rD  zInterpreterBuilder.<lambda>	  s    DMM#rw,G,G r   c                 B    |                      |t          j                  S r   r  r  s     r   rD  zInterpreterBuilder.<lambda>
  r  r   c                 B    |                      |t          j                  S r   )r  r7   sinr  s     r   rD  zInterpreterBuilder.<lambda>  r  r   c                     |j         j        t          j        k    rt	          |j                   nt          |j                   }t          ||j        j                  S r   )r   r   r7   rl   np_erf_fp32np_erf_fp64r   rA   )r   r  r7  s      r   
create_erfzInterpreterBuilder.create_erf  sG    '*x~'C'Ck#(###UXU]I^I^C!1222r   c                 j    t          dt          j        |j                  z  |j        j                  S )Nr   )r   r7   r  r   r   rA   r  s     r   create_rsqrtzInterpreterBuilder.create_rsqrt  s(    A 1 11393CDDDr   c                 f    t          |j                            |          |j        j                  S r   )r   r   r=   r   rA   )r   r  r.   allow_reorders       r   rD  zInterpreterBuilder.<lambda>  s(    \#(JZJZ[`JaJacfclcs=t=t r   c                 f    t          t          j        |j        |          |j        j                  S r   )r   r7   	transposer   r   rA   )r   r  perms      r   create_transzInterpreterBuilder.create_trans  s%    BL488#):JKKKr   c                 <   |j         }|j         }|j        j        dk    r|j                                        s)|j        j        dk    r|j                                        r|t	          ||j        t
          j        d                               t          j                  }t	          ||j        t
          j        d                               t          j                  }t          t          j
        |||j         j                  |j         z   |j        j                  S )Nr4   r5   )r   r   r6   is_floatingr   rh   rk   r;  r7   r   matmulrA   )r   r   r   dinput_precisionmax_num_imprecise_acca_datab_datas           r   
create_dotzInterpreterBuilder.create_dot  s    G&!++0C0C0E0E+G&!++0C0C0E0E+#FAGRZFFKKBJWWF#FAGRZFFKKBJWWFBIffAFLIIIAFRTUT[Tbcccr   c                 t    t          t          j        ||t          j                  t          j                  S r   )r   r7   r<   rc   rh   )r   startstops      r   create_make_rangez$InterpreterBuilder.create_make_range#  s'    BIeTBBBBHMMMr   c                 z    t          t          j        |j        |d|f          d         t          j                  S )Nr   )binsr:   )r   r7   	histogramr   rh   rc   )r   r   r  s      r   create_histogramz#InterpreterBuilder.create_histogram&  s3    BLaYOOOPQRTVT\]]]r   c                     |                                 }|j        }t          d|dz            }t          |j        ||j                            t          j                  z  z   |j                  S )Nr   r4   )	r   r6   maxr   r   r>   r7   r?   r   )r   r&  offsetrC   element_bitwidthelement_bytewidths         r   create_addptrz InterpreterBuilder.create_addptr+  se    %%''#6#3q#899CH'86;;M;Mbi;X;X'XXZ]Zcdddr   c                    |                     |          \  }}|                                }	t          |	          }
|d }n|t          j        j        k    r*t          t          j        |j	        |
          |	          }n_|t          j        j
        k    r8t          t          j        |j	        t          d          |
          |	          }nt          d|           |                     ||||||          S )Nr5   nanzunsupported padding option )rJ   r   rw   r   PADDING_OPTIONPAD_ZEROr   r7   r   r   PAD_NAN	full_likefloatr   r%  )r   r&  rB   padding_optionr4  r5  r)  rE   rF   rC   r6  r+  s               r   create_tensor_pointer_loadz-InterpreterBuilder.create_tensor_pointer_load2  s    ..~>>e&&(( **!EEs1::: ty!I!I!I8TTEEs1999 diuX!V!V!VX`aaEEK>KKLLL&&tUE>?\ghhhr   c                 d    |                     |          \  }}|                     |||||          S r   )rJ   r.  )r   r&  r$   rB   r4  r5  rE   rF   s           r   create_tensor_pointer_storez.InterpreterBuilder.create_tensor_pointer_storeA  s5    ..~>>e''eUNO\\\r   c                 f    t          t          j        |j        |          |j        j                  S r   )r   r7   expand_dimsr   r   rA   )r   r  r  s      r   create_expand_dimsz%InterpreterBuilder.create_expand_dimsE  s%    BN38T::CI<LMMMr   c                 f    t          t          j        |j        |          |j        j                  S r   )r   r7   r8   r   r   rA   r   r  r.   s      r   create_broadcastz#InterpreterBuilder.create_broadcastH  s%    BOCHe<<ci>NOOOr   c                 p    t          |j                            t          j                  |j                  S r   r   r   r>   r7   r?   rA   r   r/  r   s      r   create_int_to_ptrz$InterpreterBuilder.create_int_to_ptrK  $    CHOOBI66FFFr   c                 p    t          |j                            t          j                  |j                  S r   r"  r#  s      r   create_ptr_to_intz$InterpreterBuilder.create_ptr_to_intN  r%  r   c                 r    t          t          j        |j        |j        g          |j        j                  S r   )r   r7   concatenater   r   rA   rY  s      r   
create_catzInterpreterBuilder.create_catQ  s*    BNCHch+?@@#)BRSSSr   c                 v    t          t          j        |j        |j        gd          |j        j                  S )Nr  )r   r7   stackr   r   rA   rY  s      r   create_joinzInterpreterBuilder.create_joinT  s/    BHch%9CCCSYEUVVVr   c                     t          |j        d         |j        j                  t          |j        d         |j        j                  fS )N).r   ).r   rQ  )r   r/  s     r   create_splitzInterpreterBuilder.create_splitX  s<    SXf-sy/?@@,sxX^O_adajaqBrBrssr   c           	      d   t          |j        t          j                  rLt	          t          j        ||j        d         t          |j                            |j        j	                  S t	          t          j        ||j        t          |j                            |j        j	                  S r  )
rg   r   rh   rt   r   r7   fullr   rw   rA   r  s      r   create_splatzInterpreterBuilder.create_splat\  s    ci// 	lsx{-PSPYBZBZ [ [ []`]f]mnnnsx}SY?W?W X X XZ]ZcZjkkkr   c                     || j         vrt          d|           | j         |         }t          t          j        |j        |j        |j        |          |j        j                  S )Nunsupported semantic )ir_sem_to_interpreter_semr   r   r2  
atomic_casr   r   rA   )r   r&  cmpr/  semscopes         r   create_atomic_casz$InterpreterBuilder.create_atomic_casb  sc    d444:S::;;;,S1L3CHchRUVVX[XaXhiiir   c           	          || j         vrt          d|           || j        vrt          d|           | j         |         }| j        |         }t          t	          j        ||j        |j        |j        |          |j        j                  S )Nzunsupported rmwOp r6  )	ir_rmw_op_to_interpreter_rmw_opr   r7  r   r2  
atomic_rmwr   r   rA   )r   rmwOpr&  r/  r*  r:  r;  s          r   create_atomic_rmwz$InterpreterBuilder.create_atomic_rmwh  s    <<<9%99:::d444:S::;;;4U;,S1L3E38SXtyZ]^^`c`i`pqqqr   c                      t          d          )Nz4extern_elementwise not supported in interpreter modeNotImplementedError)r   libNamelibPathsymbolargListretTypeisPures          r   create_extern_elementwisez,InterpreterBuilder.create_extern_elementwiseq  s    !"XYYYr   c                      t          d          )Nz,inline_asm not supported in interpreter moderC  )r   	inlineAsmconstraintsvaluesr  rJ  packs          r   create_inline_asmz$InterpreterBuilder.create_inline_asmt  s    !"PQQQr   c                    d| j         d          d| j         d          d| j         d          d}|r|d| z  }|rt          j        dd	 i
           |D ]}t          |d|j         z              |rt          j        d 
           d S d S )N(r   z, r   r   ) r   c                     d| dS )N0x02xr)   r   s    r   rD  z1InterpreterBuilder.create_print.<locals>.<lambda>}  s    LLLL r   )	formatter)r   r7   set_printoptionsprintr   )r   prefixhexrO  msgr$   s         r   create_printzInterpreterBuilder.create_printw  s    M$-"MMdmA&6MM$-:JMMM 	 <v<<C 	K52H2H*IJJJJ 	* 	*E#(EJ((()))) 	0$//////	0 	0r   c                 4    |sJ | d| d| d|             d S )Nz in :r)   )r   	conditionmessagefileNamefuncNamelineNos         r   create_assertz InterpreterBuilder.create_assert  s:    HHWHH(HHXHHHHHHHHHr   c                     d S r   r)   r   s    r   create_barrierz!InterpreterBuilder.create_barrier  s    r   c                 B    d |D             }t          ||||||          S )Nc                 6    g | ]}|                                 S r)   r!   .0r  s     r   
<listcomp>z<InterpreterBuilder.create_make_block_ptr.<locals>.<listcomp>  s     <<<&v||~~<<<r   )r+   )r   r-   r.   r/   r0   r1   r2   new_offsetss           r   create_make_block_ptrz(InterpreterBuilder.create_make_block_ptr  s.    <<G<<<!$w\SXYYYr   c                 t   t          |j                  t          |          k    rt          d          d |j        D             }t          |j        |j        |j        ||j        |j                  }t          t          |                    D ](}|j        |         xj
        ||         j
        z  c_
        )|S )Nz len(ptr.offsets) != len(offsets)c                 6    g | ]}|                                 S r)   rl  rm  s     r   ro  z5InterpreterBuilder.create_advance.<locals>.<listcomp>  s     @@@&v||~~@@@r   )r;   r0   r   r+   r-   r.   r/   r1   r2   r:   r   )r   r&  r0   rp  r7  r   s         r   create_advancez!InterpreterBuilder.create_advance  s    s{s7||++?@@@@@CK@@@ 39ck;PSP`bebklls7||$$ 	3 	3AKN71:?2
r   c                     t          |          }d|j        v r*t          t          j        dd|          |j                  S t          d|           )Nr\   r   r,  r5   zunsupported type )rw   namer   r7   r3  rA   	TypeError)r   r  np_types      r   get_all_ones_valuez%InterpreterBuilder.get_all_ones_value  sV    %%GL  2W = = =t{KKK666777r   r   N)r&   r'   r(   r   MEM_SEMANTICACQUIREr2  RELEASERELAXEDACQUIRE_RELEASEr7  	ATOMIC_OPADDRMW_OPFADDMINUMINMAXUMAXANDORXORXCHGr>  r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r	  r  r  r  r  r  r  r  r  r   r"  r,  r0  r%  r.  r@  create_si_to_fpcreate_ui_to_fpcreate_fp_to_sicreate_fp_to_uicreate_fp_extcreate_fp_trunccreate_int_castr   rO  rU  create_faddcreate_fmulcreate_fdivcreate_fremcreate_fsub
create_mulcreate_precise_divfcreate_sdivcreate_udivcreate_sremcreate_urem
create_add
create_sub
create_shlcreate_lshrcreate_minsicreate_minuicreate_minimumfcreate_minnumfcreate_maxsicreate_maxuicreate_maximumfcreate_maxnumfcreate_icmpSLEcreate_icmpSLTcreate_icmpSGEcreate_icmpSGTcreate_icmpULEcreate_icmpULTcreate_icmpUGEcreate_icmpUGTcreate_icmpEQcreate_icmpNEcreate_fcmpOLTcreate_fcmpOGTcreate_fcmpOLEcreate_fcmpOGEcreate_fcmpOEQcreate_fcmpONEcreate_fcmpULTcreate_fcmpUGTcreate_fcmpULEcreate_fcmpUGEcreate_fcmpUEQcreate_fcmpUNE
create_and
create_xor	create_orrk  r  r  r  create_clampfcreate_selectr  r  r  
create_cos
create_expcreate_exp2create_iabscreate_floorcreate_ceil
create_logcreate_log2create_precise_sqrtcreate_sqrt
create_sinr  r  create_reshaper  r  r  r	  r  r  r  r  r   r$  r'  r*  r/  r1  r4  r<  rA  rK  rQ  r_  rg  ri  rq  rt  ry  r)   r   r   r   r      sy        ,";"C ,";"C ,";"C(,*C*S	! 	<.2L/4<.2L/4<.2L/4<.2,-0<.2L/4'#X X X X" " "% % %
                                  3 3 3+ + +H H HI I IG G GK K KI I IK K KI I IK K KI I IM M MM M MM M ML L LW W W
W W WN N N
D D D+ + +D D D[ [ [ POOOOOOOOOOOMMMOOOZZO3 3 3U U UF F F JIKNNKLLKOOKNNKMMJTTCCKCCKJJKJJKHHJMMJOOJQQKNNLNNLQQOPPNNNLNNLQQOPPNSSNMMNVVNPPNSSNMMNVVNPPNMMMQQMMMNPPNSSNVVNNNNRRNMMNPPNSSNVVNNNNRRNPPJPPJNNId d d8 8 8	J 	J 	JT T T dcMZZMF F F< < <3 3 3 >=J==J??K>>KAAL??K==J??KGG??K==J3 3 3E E E utNL L Ld d dN N N^ ^ ^
e e ei i i] ] ]N N NP P PG G GG G GT T TW W Wt t tl l lj j jr r rZ Z ZR R R
0 
0 
0I I I  Z Z Z
  8 8 8 8 8r   r   c                 :    |dfd
}t          | ||           d S )N)memberc                 T     | |i d |                                 D             diS )Nc                 &    i | ]\  }}|d k    ||S )r   r)   rn  kvs      r   
<dictcomp>z1_patch_attr.<locals>.<lambda>.<locals>.<dictcomp>  s7     AU AU AUEIQDEOO BCADSOOr   r   )items)r  argskwargsbuilders      r   rD  z_patch_attr.<locals>.<lambda>  sd     :hAU AUMS\\^^AU AU AU:h :h `g:h :h :h r   )setattr)objrv  r  r  
new_members      ` r   _patch_attrr    sF    &, i i i i i iJ Cz"""""r   c                     t          j        |           D ]6\  }}t          j                            |          rt          | |||           7d S r   )inspect
getmembersrh   core
is_builtinr  )pkgr  rv  r  s       r   _patch_builtinr    sW    *3// 4 4f7f%% 	4T673334 4r   c                     d d }d | _         fd| _        d | _        d | _        t	          |          | _        d S )Nc                 R    | j         j        }|j        dk    rt          |          ndS )Nr   T)r   r   sizer   )r   r   s     r   	_get_boolz%_patch_lang_tensor.<locals>._get_bool  s(    { "Y!^^tDzzz5r   c                     t           j                            t          t	          j        | j        j                  | j        j                  | j        j	                  S r   )
rh   r  r   r   r7   r  r   r   r   rA   r   s    r   _get_transposez*_patch_lang_tensor.<locals>._get_transpose  s=    w~~l2<8H+I+I4;K\]]_c_i_pqqqr   c                 4    t          | j        j                  S r   )r\   r   r   r   s    r   rD  z$_patch_lang_tensor.<locals>.<lambda>  s    C(8$9$9 r   c                      |           S r   r)   )r   r  s    r   rD  z$_patch_lang_tensor.<locals>.<lambda>  s    99T?? r   c                 4    t          | j        j                  S r   )reprr   r   r   s    r   rD  z$_patch_lang_tensor.<locals>.<lambda>  s    4(8#9#9 r   c                 4    t          | j        j                  S r   )r[   r   r   r   s    r   rD  z$_patch_lang_tensor.<locals>.<lambda>  s    #dk&6"7"7 r   )	__index__r   __repr____str__propertyT)r   r  r  s     @r   _patch_lang_tensorr    sh    6 6 6r r r :9F2222FO99FO77FN''FHHHr   c                   2    e Zd Zd Zd Zd Zd Zd Zd ZdS )ReduceScanOpIneterfacec                 "    || _         || _        d S r   )r  
combine_fn)r   r  r  s      r   r   zReduceScanOpIneterface.__init__  s    	$r   c                 ^    |(|t          |          k    rt          d| d|           d S d S )Nzaxis z out of bounds for shape )r;   r   )r   r.   r  s      r   
check_axisz!ReduceScanOpIneterface.check_axis  sE    E

 2 2KTKKEKKLLL  2 2r   c                     |D ]`}t          |t          j        j                  st	          dt          |                     |                     |j        | j                   ad S )Nzinput must be a tensor, got )	rg   rh   r  r   r   r  r  r.   r  )r   r   r  s      r   check_tensorz#ReduceScanOpIneterface.check_tensor  sj     	2 	2Cc27>22 M !KS		!K!KLLLOOCIty1111	2 	2r   c                    t          |d          r"|j        rt          j        ||j                  }n&t	          j        |gt          |                    }|}t          j                            t          ||j
                  |          S )Nr.   r5   )r   r.   rh   rt   r7   r   rw   r  r   r   rA   )r   r7  r   ret_types       r   	to_tensorz ReduceScanOpIneterface.to_tensor  sw    3   	SY 	}UCI66HH(C5e(<(<===CHw~~l3==xHHHr   c                     t          |t                    s|f}|                     |           |                     |          S r   )rg   tupler   
apply_implr   r   s     r   applyzReduceScanOpIneterface.apply  sA    %'' 	IE%   u%%%r   c                      t          d          )Nzapply_impl not implementedrC  r  s     r   r  z!ReduceScanOpIneterface.apply_impl  s    !">???r   N)	r&   r'   r(   r   r  r   r  r  r  r)   r   r   r  r    sy        % % %M M M2 2 2I I I& & &@ @ @ @ @r   r  c                   >     e Zd Z fdZd Zd ZddZd Zd Z xZ	S )		ReduceOpsc                 Z    t                                          ||           || _        d S r   )superr   	keep_dims)r   r  r  r  	__class__s       r   r   zReduceOps.__init__  s(    z***"r   c                     g }|D ]f}||                     |           d}|                     |                     |j        j                                        |j                             gt          |          |fS )Nr   )appendr  r   r   flattenr   r  )r   r   r  r7  r   s        r   unravelzReduceOps.unravel  s     	S 	SD

4    

4>>$+*:*B*B*D*DdjQQRRRRSzz4r   c                 p     j         }                      j                   \  }g }g }d         j        j        j        }|d|         ||dz   d          z   }D ]Y}|                    |j        j                   |                    t          j        ||j        j        j                             Zt          |d         j
                  D ]}	t          j        |	|          d|         |dz   d          z   t           fdt          |          D                       }
|         dk    rMt          t          |                    D ]/}|
|         j        j                                        ||         <   0t           fdt          |          D                       }  j        j        g ||
R  }t%          |t                    s|fn|}t          t          |                    D ]\}t%          ||         t&          j        j                  r$||         j        j                                        n||         ||         <   ]g }t          |          D ]\  }	} j        rM|t          j        ||          }nKt          t          |                    D ]}t          j        |d          }n||                                }|                                         ||	         j                             t          |          dk    r|d         nt          |          S )Nr   r   r5   c              3   l   K   | ].\  }}                     |         |         j                  V  /d S r   r  r   )rn  iir  r   input_indexr   s      r   	<genexpr>z+ReduceOps.generic_reduce.<locals>.<genexpr>  sB      ssTYTVXYq~uRy O Ossssssr   c              3   l   K   | ].\  }}                     |         |         j                  V  /d S r   r  )rn  oior   output_indexr   s      r   r  z+ReduceOps.generic_reduce.<locals>.<genexpr>	  sB      !w!wW\WY[\$..<%)/"R"R!w!w!w!w!w!wr   )r  r  r   r   r.   r  r7   zerosr   r:   r  unravel_indexr  	enumerater;   itemr  fnrg   rh   r  r   r  r  r  )r   r   original_axisr  
input_dataoutput_datainput_shapeoutput_shaper  r   input_tuplej	acc_tuplecombine_fn_retr7  r   _r  r  s   ``               @@r   generic_reducezReduceOps.generic_reduce  sb   	ll5$)44t
Aho*0"1T6*[-CC 	T 	TCcjo...rxCJO<QRRRSSSSz!})** 	H 	HA*1k::K&qv.TAXYY1GGLssssss]fgq]r]rsssssK4 A%%s;//00 U UA3>q>3H3M3R3R3T3TKN<00U "!w!w!w!w!w!w`iju`v`v!w!w!www	!3!3!MY!M!M!M!M6@QV6W6Wk^..]k	s;//00 H HAV`!!bgnW6 W6 4H9Q<3F3K3P3P3R3R3R;DQ<  N<00H  -- 	= 	=GAt~ 	# ,>$55DD"3{#3#344 7 7!~dA667 &yy{{JJt~~dE!HN;;<<<<SQs1vvE#JJ6r   Nc                    t          |t                    r|d         n|}d }d }|r;|                      ||j        j        | j        | j                  |j                  }|r@|                      ||j        j        | j        | j                  t          j	                  }||||fS ||S ||S t          d          )Nr   r  keepdimsz-val_reduce_op and idx_reduce_op are both None)rg   r  r  r   r   r  r  r   rh   rc   r   )r   r   val_reduce_opidx_reduce_opr/  idxs         r   min_maxzReduceOps.min_max  s    &ue44?a% 	y..u|/@ty[_[i!j!j!jlqlwxxC 	v..u|/@ty[_[i!j!j!jlnltuuC?s8O_J_JLMMMr   c                     |                      t          j        |j        j        | j        | j                  |j                  S )Nr/  )r  r7   sumr   r   r  r  r   r  s     r   r6  zReduceOps.sum1  s6    ~~bfU\%6TYQUQ_```bgbmnnnr   c                    | j         t          j        j        k    r2|                     |d         t
          j        t
          j                  S | j         t          j        j        k    r2|                     |d         t
          j	        t
          j
                  S | j         t          j        j        k    r(|                     |d         t
          j	        d           S | j         t          j        j        k    r(|                     |d         t
          j        d           S | j         t          j        j        k    r|                     |d                   S |                     |          S )Nr   )r1  r2  )r  rh   standard_argmin_combine_tie_break_leftr4  r7   minargmin_argmax_combine_tie_break_leftr  argmax_elementwise_max_elementwise_min_sum_combiner6  r-  r  s     r   r  zReduceOps.apply_impl4  s   ?bkHHH<<abi<XXX_ JJJ<<abi<XXX_ <<<<<ad<SSS_ <<<<<ad<SSS_ 88888E!H%%% &&u---r   r   )
r&   r'   r(   r   r  r-  r4  r6  r  __classcell__r  s   @r   r  r    s        # # # # #     )7 )7 )7VN N N N$o o o. . . . . . .r   r  c                   6     e Zd Z fdZd Zd Zd Zd Z xZS )ScanOpsc                 Z    t                                          ||           || _        d S r   )r  r   reverse)r   r  r  rF  r  s       r   r   zScanOps.__init__F  s(    z***r   c                     |                      t          j        |j        j        | j                  |j                  gS Nr-  r5   )r  r7   cumsumr   r   r  r   r  s     r   rI  zScanOps.cumsumJ  s5    ry):KKKSXS^__``r   c                     |                      t          j        |j        j        | j                  |j                  gS rH  )r  r7   cumprodr   r   r  r   r  s     r   rK  zScanOps.cumprodM  s5    rz%,*;$)LLLTYT_``aar   c           	          g }g }d         j         j        j        }D ]Y}|                    |j         j                   |                    t	          j        ||j         j        j                             Zt          |d         j                  D ]}t	          j	        ||          t           fdt          |          D                       } j                 dk    rMt          t          |                    D ]/}||         j         j                                        ||         <   0t           fdt          t                              D                       t           fdt          |          D                       }	  j        j        g |	|R  }
t#          |
t                    s|
fn|
}	t          t          |                    D ]\}t#          |	|         t$          j        j                  r$|	|         j         j                                        n|	|         ||         <   ]g }t          |          D ]9\  }}|                                         ||         j                             :|S )Nr   r5   c              3   l   K   | ].\  }}                     |         |         j                  V  /d S r   r  )rn  r  r  indexr   r   s      r   r  z'ScanOps.generic_scan.<locals>.<genexpr>[  s?      ffur1%%)/BBffffffr   c              3   T   K   | ]"}|j         k    r|         d z
  n|         V  #dS )r   Nr-  )rn  r   rN  r   s     r   r  z'ScanOps.generic_scan.<locals>.<genexpr>a  s>      "k"kTU1	>>58a<<uQx"k"k"k"k"k"kr   c              3   l   K   | ].\  }}                     |         |         j                  V  /d S r   r  )rn  r  r  r   
prev_indexr   s      r   r  z'ScanOps.generic_scan.<locals>.<genexpr>b  sB      !u!uUZUWYZ$..:b	"P"P!u!u!u!u!u!ur   )r   r   r.   r  r7   r  r   r:   r  r  r  r   r  r;   r!  r  r"  rg   rh   r  r   r  )r   r   r$  r%  r.   r  r   r   r)  r*  r+  r7  rN  rQ  s   ``          @@r   generic_scanzScanOps.generic_scanP  s   
a$* 	M 	MCcjo...rxSZ_5JKKKLLLLz!})** 	H 	HA$Q..EffffffPYZdPePefffffDTY1$$s;//00 G GA,0GN,?,D,D,F,FKN5))G #"k"k"k"k"kY^_bch_i_iYjYj"k"k"kkk
!!u!u!u!u!u!u^ghs^t^t!u!u!uuu	!3!3!FY!F!F!F!F6@QV6W6Wk^..]k	s;//00 H HAOY!!bgnP6 P6 -HIaL,?,D,I,I,K,K,K;DQ<  N5))H  -- 	= 	=GAtJJt~~dE!HN;;<<<<
r   c           	         g }| j         rW|D ]S}|                    |                     t          j        |j        j        | j                  |j                             Tn|}| j	        t          j        j        k    r|                     |d                   }nK| j	        t          j        j        k    r|                     |d                   }n|                     |          }| j         r4|D ]1}t          j        |j        j        | j                  |j        _        2t#          |          dk    r|d         pt%          |          S )Nr-  r   r   )rF  r  r  r7   flipr   r   r  r   r  rh   r8  r@  rI  _prod_combinerK  rR  r;   r  )r   r   	new_inputr  r7  s        r   r  zScanOps.apply_impln  s4   	< 	 f f  
di0X0X0XZ]Zc!d!deeeef I?bk666++il++CC_ 999,,y|,,CC ##I..C< 	K K K"$'#*/	"J"J"J
3xx1}'Q55::5r   )	r&   r'   r(   r   rI  rK  rR  r  rA  rB  s   @r   rD  rD  D  sz            a a ab b b  <6 6 6 6 6 6 6r   rD  c                      dd} dd}| t           _        |t           _        | t           j        _        |t           j        _        d S )NFc                 J    t          |||                              |           S r   )r  r  )r   r  r  r  r  s        r   _new_reducez'_patch_reduce_scan.<locals>._new_reduce  s"    z955;;EBBBr   c                 J    t          |||                              |           S r   )rD  r  )r   r  r  rF  r  s        r   	_new_scanz%_patch_reduce_scan.<locals>._new_scan  s"    tZ1177>>>r   )F)rh   reduceassociative_scanr  )rY  r[  s     r   _patch_reduce_scanr^    sX    C C C C? ? ? ? BI#B BGN(BGr   c                    d }d
d}dd}d }|| _         || _        || _        t          | _        || j        _        t          |d          | _        t          |d          | _	        t          |d	          | _
        t                       d S )Nc                 F   | j         dk    r|                                S | j         dk    r|                                S | j         dk    r|                                S | j         dk    r|                                S | j         dk    r|                                S | j         dk    r|                                S | j         dk    r|                                S | j         dk    r|                                S | j         d	k    r|	                                S | j         d
k    r|
                                S | j         dk    r|                                S | j         dk    r|                                S | j         dk    r|                                S | j         dk    r|                                S | j         dk    r|                                S | j         dk    r|                                S | j         dk    r|                                S t%          d|  d          )Nvoidrj   r_   r^   ra   r`   rc   rb   rd   r?   fp8e5fp8e4nvfp8e4b15fp16bf16fp32fp64zfail to convert z to ir type)rv  get_void_tyget_int1_tyr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r  s     r   
_new_to_irz$_patch_lang_core.<locals>._new_to_ir  s+   9&&(((Y&  &&(((Y&  &&(((Y'!!'')))Y'!!'')))Y(""((***Y'!!'')))Y(""((***Y'!!'')))Y(""((***Y'!!'')))Y)##))+++Y*$$**,,,Y&  &&(((Y&  &&(((Y&  '')))Y&  ((***=D===>>>r   c                 B    |d}|d| }}n| |}}t          |||          S )Nr   r   )r:   )arg1arg2stepr  r  ends         r   
_new_rangez$_patch_lang_core.<locals>._new_range  s7    <D<D3EEt3EUC&&&r    c                     | s
J |            d S r   r)   )r  r^  s     r   _new_static_assertz,_patch_lang_core.<locals>._new_static_assert  s    Sr   c                 X   t          | t          j                  s| S t          |t          t          f          s|gn|}d |D             }t          |          t          dt          | j                            k    rt          d|           | j	        
                    ||           | S )Nc                 T    g | ]%}t          |t          j                  r|j        n|&S r)   )rg   rh   	constexprr$   rn  r  s     r   ro  z7_patch_lang_core.<locals>._set_attr.<locals>.<listcomp>  s/    PPPAZ2<88?!''aPPPr   r   z$len(values) != len(input.shape) for )rg   rh   r   listr  r;   r  r.   r   r   r%   )r   rO  rv  s      r   	_set_attrz#_patch_lang_core.<locals>._set_attr  s    %++ 	L!+FT5M!B!BN&PPPPPv;;#aU[!1!12222JDJJKKKdF+++r   ztt.divisiblityrv  ztt.contiguityztt.constancy)NN)rr  )r:   static_rangestatic_assertr[  static_printr   to_irr   multiple_ofmax_contiguousmax_constancyr^  )langrk  rq  rt  rz  s        r   _patch_lang_corer    s    $? $? $?P' ' ' '   
 
 
 DJ"D+DD!DJy/?@@@D!)/BBBD @@@Dr   c                    d | j                                         D             }t          |          dk    s
J d            t          |d         t                     t          |d         j        t                     |d         t          k    r t          |d         j        t                     t          |d         j                   t          |d                    d S )Nc                 B    g | ]\  }}|t           t           j        fv |S r)   )rh   r  )rn  r,  r$   s      r   ro  z_patch_lang.<locals>.<listcomp>  s,    SSShaEb"']<R<RE<R<R<Rr   r   z:triton.language must be visible from within jit'd functionr   )
__globals__r  r;   r  interpreter_builderr   rh   r   r  r  )r"  r  s     r   _patch_langr    s    SS"."6"6"8"8SSSDt99>>>W>>>47/00047>#6777Aw"}}tAw|%8999tAw~&&&T!Wr   c                    t          | t                    r.t          j        t          j        j        j                            t          j        j        j        	                    |                               }t          j        }d| cxk    rdk     rn nt          j        }nid| cxk    rdk     rn nt          j        }nLd| cxk    rdk     rn nt          j        }n/d| cxk    rdk     rn nt          j        }nt          d|            t!          t          j        | g|          |          }t          j        ||          S t'          | d	          rt          j        t          j        j        j                            t          j        j        j        	                    |                               }t!          t          j        |                                 gt          j                  |          }t          j        ||          S | S )
Ni   l        l        l         l            l            zUnsupported integer value r5   data_ptr)rg   r\   rh   	str_to_tytritonruntimejitJITFunction_type_of_key_ofr7   rc   rb   rd   r?   r   r   r   r   r   r  )r  tyr   r   s       r   _implicit_cvtr    s   #s %\&.,8AA&.BTB`BhBhilBmBmnnooS    5     HEEc!!!!E!!!!!IEEs""""U"""""HEEc!!!!E!!!!!IEE?#??@@@bhuE:::B??y$$$sJ %\&.,8AA&.BTB`BhBhilBmBmnnoobh'7ryIII2NNy$$$Jr   )	num_warps
num_stagesnum_ctasenable_fp_fusiongridmaxnregc                   &    e Zd Zd Zd Zd Zd ZdS )GridExecutorc                     ddl m || _        || _        || _        fd|j                                        D             fd|D             | _        d S )Nr   )_normalize_tyc                 .    i | ]\  }}| |          S r)   r)   )rn  rv  r  r  s      r   r  z)GridExecutor.__init__.<locals>.<dictcomp>  s)    ^^^xtR4r!2!2^^^r   c                 F    g | ]}                     |          d k    |S )rw  )get)rn  rv  rZ   s     r   ro  z)GridExecutor.__init__.<locals>.<listcomp>  s2    bbbD9L9LT9R9RVa9a9a49a9a9ar   )r  r  r"  	arg_namesr  rZ   r  
constexprs)r   r"  r  r  rZ   r  s       @@r   r   zGridExecutor.__init__  sr    &&&&&&"	^^^^2CUC[C[C]C]^^^bbbbIbbbr   c                 D   g }|D ]O}t          |d          r(|                    |                                           :|                    |           Pi }|                                D ]2\  }}t          |d          r|                                ||<   -|||<   3||fS Nr  )r   r  cpur  )r   args_devr  args_hstr  
kwargs_hstr#   r$   s           r   _init_args_hstzGridExecutor._init_args_hst  s     	% 	%CsJ'' %		****$$$$
 ,,.. 	( 	(JCuj)) ("'))++
3"'
3##r   c                    t          ||          D ]L\  }}t          |d          r7|j                            |                    |j                  j                   M|                                D ]T\  }}||         }	t          |d          r7|j                            |	                    |j                  j                   Ud S r  )zipr   r   copy_todevicer  )
r   r  r  r  r  arg_devarg_hstr#   	kwarg_dev	kwarg_hsts
             r   _restore_args_devzGridExecutor._restore_args_dev$  s     #Hh 7 7 	D 	DGWw
++ D""7::gn#=#=#BCCC %llnn 	J 	JNC"3Iy*-- J$$Y\\)2B%C%C%HIII	J 	Jr   c                     d |                                 D             }|                    dd          rd S                      ||          \  }}t           j                   t          j         j        g|R i |} fd|                                 D             }t           j                  r                     |          n j        }t          |          dk    s
J d            |ddt          |          z
  z  z   }t          j        |  	 t          |d                   D ][}t          |d	                   D ]C}t          |d
                   D ]+}	t                              |||	             j        di | ,D\n/# t          $ r"}
t          t!          |
                    |
d }
~
ww xY w                     ||||           d S )Nc                 ,    i | ]\  }}|t           v||S r)   )RESERVED_KWSr  s      r   r  z)GridExecutor.__call__.<locals>.<dictcomp>1  s(    KKK41aQl5J5J!Q5J5J5Jr   warmupFc                 L    i | ] \  }}||j         v r|nt          |          !S r)   )r  r  )rn  rv  r  r   s      r   r  z)GridExecutor.__call__.<locals>.<dictcomp>;  s:    kkkQZQUWZTT_44cc-:L:Lkkkr      z#grid must have at most 3 dimensions)r   r   r   r   r)   )r  popr  r  r"  r  getcallargscallabler  r;   r  r   r:   r   	Exceptionr   r  r  )r   r  r  r  r  r  r  r   r   r   es   `          r   __call__zGridExecutor.__call__/  s   KK6<<>>KKK::h&& 	F#228VDD*DG "47DXDDDDDkkkk^b^h^h^j^jkkk"*49"5"5Dtyy494yyA~~~D~~~eq3t99}--($//	347^^ ( (tAw ( (A"47^^ ( (+88AqAAA$(((
  	3 	3 	3"477++2	3 	x6:FFFFFs   A1F 
F1F,,F1N)r&   r'   r(   r   r  r  r  r)   r   r   r  r  	  s[        c c c$ $ $ 	J 	J 	JG G G G Gr   r  c                   8    e Zd ZddZed             Z d Zd ZdS )InterpretedFunctionr   Nc                      | _          fd}| _        t          j        |          }d |j                                        D              _        d S )Nc                  V    |d         } t          j        j        |          | i |S )Nr  r  r"  r  )r  r  r  r   s      r   runz)InterpretedFunction.__init__.<locals>.runR  s1    &>D><>>OOOOr   c                     g | ]	}|j         
S r)   r{  rx  s     r   ro  z0InterpretedFunction.__init__.<locals>.<listcomp>X  s    HHHQ!&HHHr   )r"  r  r  	signature
parametersrO  r  )r   r"  r  r  s   `   r   r   zInterpretedFunction.__init__O  si    	P 	P 	P 	P 	P %b))	HH)*>*E*E*G*GHHHr   c                     | j         j        S r   )r"  r&   r   s    r   r&   zInterpretedFunction.__name__Z  s    wr   c                 8    t          | j        | j        |          S r   r  )r   r  s     r   __getitem__zInterpretedFunction.__getitem__^  s    DGT^T:::r   c                     t          | j                   	  | j        |i |S # t          $ r"}t          t	          |                    |d }~ww xY wr   )r  r"  r  r   r  )r   r  r  r  s       r   r  zInterpretedFunction.__call__a  sc    DG	347D+F+++ 	3 	3 	3"477++2	3s   # 
AA

Arz  )r&   r'   r(   r   r  r  r  r)   r   r   r  r  M  se        	I 	I 	I 	I     X ; ; ;3 3 3 3 3r   r  )4r  typingr   r   numpyr7   r  triton.languagelanguagerh   dataclassesr   errorsr   	functoolsr   _C.libtritonr	   r2  r
   r   r   r+   rM   re   rw   r   r   r   	vectorizerl   r  rm   r  r?   r  r   r   r  r  r  r  r  rD  r^  r  r  r  r  r  r  r  r)   r   r   <module>r     s                      ! ! ! ! ! ! $ $ $ $ $ $       6 6 6 6 6 6 $ $ $ $ $ $       :       6 $+ + + + + + + +	 	 	  @=' =' ='@  
# # # bl4555bl4555Z<<<g g g g g g g gH8 H8 H8 H8 H8 H8 H8 H8V# # #4 4 4( ( ($@ @ @ @ @ @ @ @D]. ]. ]. ]. ].& ]. ]. ].@;6 ;6 ;6 ;6 ;6$ ;6 ;6 ;6|) ) ) K K K\    . )(**  ^]]AG AG AG AG AG AG AG AGH3 3 3 3 3 3 3 3 3 3r   