
    çg\                      d dl mZ d dlmZ d dlmZ d dlmZ d dlm	Z	m
Z
 d dlZd dlmZmZmZmZmZmZ d dlZdd	lmZ d dlZd dlZdd
lmZ ddlmZ  ed          ZdZdZej        ZddZ ddZ!d Z"ddZ#e dd            Z$d Z% G d d          Z&e&Z' G d de&          Z( G d de(          Z) G d d e&          Z* G d! d"e&          Z+ e&d#          Z, e&d$          Z- e&d%          Z. e&d&          Z/ e&d'          Z0 e&d(          Z1 e&d)          Z2 e&d*          Z3 e&d+          Z4 e&d,          Z5 e&d-          Z6 e&d.          Z7 e&d/          Z8 e&d0          Z9 e&d1          Z: e&d2          Z; e&d3          Z< e&d4          Z= e&d5          Z> e(e0          Z?dd9Z@ G d: d;          ZA G d< d=          ZB eBd           ZCd> ZD G d? d@          ZEdA ZFdB ZGe ddC            ZHe ddD            ZIe ddE            ZJdF ZKe ddG            ZLe ddH            ZMe!e ddIdJ                        ZNe!e ddIddL                        ZOe!e ddIdM                        ZPe ddO            ZQe ddP            ZRedQ             ZSe!e dddS                        ZTe!e ddIdT                        ZUe!e dNddUdV                        ZVdW ZWe!e ddX                        ZXe!e ddd\                        ZYe dddde=dfd]            ZZe 	 	 dd`            Z[e dda            Z\e ddb            Z]e!e ddc                        Z^e ddde            Z_e!e ddf                        Z`dddkZae!e  eadldmn          ddo                                    Zbe!e  eadp          ddq                                    Zce!e  eadr          dds                                    Zde!e  eadt          ddu                                    Zee!e  eadv          ddw                                    Zfe!e  eadx          ddy                                    Zge!e  eadz          dd{                                    Zhe!e  ead|          dd}                                    Zie dd~            Zje ejk        dfdd            Zle ejk        dfdd            Zme ejk        dfdd            ZndddZoed             Zpe!e dd                        Zqe dd            Zre dd            ZsddZte!e dd                        Zue!e dd                        Zve dd            Zwe dd            Zxe dd            Zye dd            Zze ddddNdddd            Z{e dd            Z|e dNddd            Z}e dd            Z~e 	 ddd            Z G d d          Z G d d          Z	 dddZe 	 ddd            Zd Zd ZdS )    )annotations)warn)contextmanager)Enum)partialwrapsN)UnionCallableListSequenceTypeVarOptional   )jit)ir   )semanticTi   __triton_builtin__fnreturnc                     t                     sJ t                      fd            }t          |t          d           |S )zMark a function as a builtin.c                 J    d|vs|d         t          d           | i |S )N_builderzdDid you forget to add @triton.jit ? (`_builder` argument must be provided outside of JIT functions.)
ValueErrorargskwargsr   s     P/var/www/html/ai-engine/env/lib/python3.11/site-packages/triton/language/core.pywrapperzbuiltin.<locals>.wrapper   sH    V##vj'9'A ` a a ar4"6"""    T)callabler   setattrTRITON_BUILTIN)r   r!   s   ` r    builtinr&      sW    B<<
2YY# # # # Y# G^T***Nr"   c                    t                     sJ t          j                   }t          |j                                        ddhz
            dk    } j        sd _         xj        d j         d|rdnd d j         d	|rd
nd d	z  c_         fd}t          |j        	                                          }|d         
                    d          |d<   |
                    |          }||_        d j         d|_        t                     rt          |t          d           t          t           j        |            S )a  Decorator that adds this free function as a member fn on class tensor.

    When called as a member function on class tensor, the first argument to `fn`
    is `self`, i.e. the tensor object.

    If there are multiple decorators on a function, you probably want this one
    to be the highest one (i.e. furthest from the function's `def`), so it's
    applied last.

    Unfortunately you still need to add a type stub to the body of class tensor
    in order for pytype to know about it.
    r   
_generatorr    zb
    This function can also be called as a member function on :py:class:`tensor`,
    as :code:`x.(z...z)` instead of
    :code:`z(xz, ...z)`.
    c                      | i |S N r   s     r    r!   z"_tensor_member_fn.<locals>.wrapperD   s    r4"6"""r"   r   selfname)
parameterszForwards to :py:func:`z` free functionT)r#   inspect	signaturelenr1   keys__doc____name__listvaluesreplace__signature__
is_builtinr$   r%   tensor)r   orig_sighas_argsr!   
new_paramsnew_sigs   `     r    _tensor_member_fnrB   *   s    B<< $$H8&++--\0JJKKaOH: 
JJ  (08uub  K  '/677B   JJ# # # # #
 h)002233JqM))v)66JqM*55G#GKr{KKKGO"~~ /...FBK)))Ir"   c                    t          |           dk    r.	 t          | d                    | d         S # t          $ r Y nw xY w| S )z7Returns x[0] if x has one element and x[0] is iterable.r   r   )r4   iter	TypeError)xs    r    _unwrap_iterablerG   V   sU    
1vv{{	1JJJQ4K 	 	 	D	 Hs   2 
??boolc                .    t          | t          d          S )z-Is this a registered triton builtin function?F)getattrr%   r   s    r    r<   r<   l   s    2~u---r"   c                "    t          | |          S r,   
_to_tensor)rF   r   s     r    	to_tensorrO   q   s    a"""r"   c                   t          | t                    r(t          |                    |           t                    S t          | t
                    rd| cxk    rdk     r+n n(t          |                    |           t                    S d| cxk    rdk     r+n n(t          |                    |           t                    S d| cxk    rdk     r+n n(t          |
                    |           t                    S d| cxk    rdk     r+n n(t          |                    |           t                    S t          d|  d          t          | t                    rd	}d
ddz  z  }t!          d         |           }|t          d          k    s|dk    s| | k    s||cxk    r|k    r+n n(t          |                    |           t$                    S t          |                    |           t(                    S t          | t*                    rt-          | j        |          S t          | t                    r| S J d|  dt1          |            d            )Ni   l        l        l         l            l            zNonrepresentable integer .g      8g   ?r      absinfg        Fzcannot convert z	 of type z
 to tensor)
isinstancerH   r=   get_int1int1int	get_int32int32
get_uint32uint32	get_int64int64
get_uint64uint64RuntimeErrorfloat__builtins__get_fp32float32get_fp64float64	constexprrN   valuetype)rF   buildermin_float32max_float32abs_xs        r    rN   rN   v   sy   !T g&&q))4000	As		 Q'++A..666a%',,Q//888q    5     '++A..666a%',,Q//888?1???@@@	Au		 !QV+U#A&&E%LL  C<<66%....;.....'**1--w777'**1--w777	Ay	!	! !'7+++	Av		 CCACCQCCCCCCr"   c                     e Zd Zg dZg dZg dZg dZdgZ G d de          Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z d Z!d  Z"d! Z#e$d"             Z%e$d#             Z&e$d$             Z'e$d%             Z(e$d&             Z)d7d(Z*d7d)Z+d* Z,e-d+             Z.d8d0Z/d1 Z0d2 Z1e-d9d4            Z2d5 Z3d6S ):dtype)int8int16rZ   r^   )rW   uint8uint16r\   r`   )	fp8e4b15fp8e4nvfp8e4b8fp8e5fp8e5b16fp16bf16fp32fp64)rz   r{   r|   r}   voidc                      e Zd ZdZdZdS )dtype.SIGNEDNESSr   r   N)r7   
__module____qualname__SIGNEDUNSIGNEDr-   r"   r    
SIGNEDNESSr      s        r"   r   c                   t          |d          r|j        }|| _        |t          j        t          j        z   t          j        z   t          j        z   v s
J |            |t          j        v rQt          j        j	        | _
        t          |                    d          d                   | _        | j        | _        d S |t          j        v rQt          j        j        | _
        t          |                    d          d                   | _        | j        | _        d S |t          j        v r|dk    rd| _        d| _        d| _        d S |dk    rd| _        d| _        d	| _        d S |d
k    rd| _        d| _        d| _        d S |dk    rd| _        d| _        d| _        d S |dk    rd| _        d| _        d| _        d S |dk    rd| _        d| _        d| _        d S |dk    rd	| _        d| _        d| _        d S |dk    rd| _        d| _        d| _        d S |dk    rd| _        d| _        d| _        d S t%          d|           |dk    r	d| _        d S d S )Nri   rX   ru            rv      rw   rx   r   ry      rz   
   r{   rR   r|          r}   5   @   i  z Unsupported floating-point type r~   r   )hasattrri   r0   rp   
SINT_TYPES
UINT_TYPESFP_TYPESOTHER_TYPESr   r   int_signednessrX   splitint_bitwidthprimitive_bitwidthr   fp_mantissa_widthexponent_biasra   )r.   r0   s     r    __init__zdtype.__init__   s   4!! 	:D	u'%*::U^KeN_____ae___5###"'"2"9D #DJJu$5$5b$9 : :D&*&7D###U%%%"'"2";D #DJJu$5$5b$9 : :D&*&7D###U^##z!!)*&*+'%'""""")*&*+'%&""""")*&*+'%&""")*&*+'%'"""##)*&*+'%'""")+&*,'%'""")*&*,'%(""")+&*,'%(""")+&*,'%)""""#Ld#L#LMMMV^^&'D### ^r"   c                    d| j         v S )Nfp8r/   r.   s    r    is_fp8zdtype.is_fp8   s    	!!r"   c                    | j         dk    S )Nrv   r/   r   s    r    
is_fp8e4nvzdtype.is_fp8e4nv       yI%%r"   c                    | j         dk    S )Nrw   r/   r   s    r    
is_fp8e4b8zdtype.is_fp8e4b8   r   r"   c                    | j         dk    S )Nru   r/   r   s    r    is_fp8e4b15zdtype.is_fp8e4b15       yJ&&r"   c                    | j         dk    S )Nrx   r/   r   s    r    is_fp8e5zdtype.is_fp8e5       yG##r"   c                    | j         dk    S )Nry   r/   r   s    r    is_fp8e5b16zdtype.is_fp8e5b16   r   r"   c                    | j         dk    S )Nrz   r/   r   s    r    is_fp16zdtype.is_fp16       yF""r"   c                    | j         dk    S )Nr{   r/   r   s    r    is_bf16zdtype.is_bf16   r   r"   c                    | j         dk    S )Nr|   r/   r   s    r    is_fp32zdtype.is_fp32   r   r"   c                    | j         dk    S )Nr}   r/   r   s    r    is_fp64zdtype.is_fp64   r   r"   c                    | j         dk    S )NrW   r/   r   s    r    is_int1zdtype.is_int1   r   r"   c                    | j         dk    S )Nrq   r/   r   s    r    is_int8zdtype.is_int8   r   r"   c                    | j         dk    S )Nrr   r/   r   s    r    is_int16zdtype.is_int16   r   r"   c                    | j         dk    S )NrZ   r/   r   s    r    is_int32zdtype.is_int32  r   r"   c                    | j         dk    S )Nr^   r/   r   s    r    is_int64zdtype.is_int64  r   r"   c                    | j         dk    S )Nrs   r/   r   s    r    is_uint8zdtype.is_uint8  r   r"   c                    | j         dk    S )Nrt   r/   r   s    r    	is_uint16zdtype.is_uint16
      yH$$r"   c                    | j         dk    S )Nr\   r/   r   s    r    	is_uint32zdtype.is_uint32  r   r"   c                    | j         dk    S )Nr`   r/   r   s    r    	is_uint64zdtype.is_uint64  r   r"   c                (    | j         t          j        v S r,   )r0   rp   r   r   s    r    is_floatingzdtype.is_floating  s    yEN**r"   c                (    | j         t          j        v S r,   )r0   rp   STANDARD_FP_TYPESr   s    r    is_standard_floatingzdtype.is_standard_floating  s    yE333r"   c                (    | j         t          j        v S r,   )r0   rp   r   r   s    r    is_int_signedzdtype.is_int_signed      yE,,,r"   c                (    | j         t          j        v S r,   )r0   rp   r   r   s    r    is_int_unsignedzdtype.is_int_unsigned  r   r"   c                B    | j         t          j        t          j        z   v S r,   )r0   rp   r   r   r   s    r    is_intzdtype.is_int  s    yE,u/????r"   c                *    |                                  S r,   )r   r   s    r    is_boolzdtype.is_bool"      ||~~r"   c                l    | t           j        t           j        z   t           j        z   t           j        z   v S r,   )rp   r   r   r   r   )type_strs    r    is_dtypezdtype.is_dtype%  s(    5+e.>>ORWRccccr"   c                      t          d          )NzNot implementedra   r-   r"   r    is_voidzdtype.is_void)  s    ,---r"   c                     dS NFr-   r-   r"   r    is_blockzdtype.is_block-      ur"   c                     dS r   r-   r-   r"   r    is_ptrzdtype.is_ptr1  r   r"   c                     dS r   r-   r-   r"   r    is_constzdtype.is_const5  r   r"   otherc                P    t          |t                    sdS | j        |j        k    S r   )rU   rp   r0   r.   r   s     r    __eq__zdtype.__eq__9  s(    %'' 	5yEJ&&r"   c                .    |                      |           S r,   r   r   s     r    __ne__zdtype.__ne__>      ;;u%%%%r"   c                ,    t          | j        f          S r,   )hashr0   r   s    r    __hash__zdtype.__hash__A  s    TYM"""r"   c                    | S r,   r-   r   s    r    scalarzdtype.scalarD      r"   rk   
ir.builderr   ir.typec                   | j         dk    r|                                S | j         dk    r|                                S | j         dv r|                                S | j         dv r|                                S | j         dv r|                                S | j         dv r|                                S | j         dk    r|                                S | j         dk    r|                                S | j         d	k    r|	                                S | j         d
k    r|
                                S | j         dk    r|                                S | j         dk    r|                                S | j         dk    r|                                S | j         dk    r|                                S | j         dk    r|                                S t!          d|  d          )Nr~   rW   )rq   rs   )rr   rt   )rZ   r\   )r^   r`   rx   ry   rv   rw   ru   rz   r{   r|   r}   zfail to convert z to ir type)r0   get_void_tyget_int1_tyget_int8_tyget_int16_tyget_int32_tyget_int64_tyget_fp8e5_tyget_fp8e5b16_tyget_fp8e4nv_tyget_fp8e4b8_tyget_fp8e4b15_tyget_half_tyget_bf16_tyget_float_tyget_double_tyr   r.   rk   s     r    to_irzdtype.to_irH  s   9&&(((Y&  &&(((Y+++&&(((Y---'')))Y---'')))Y---'')))Y'!!'')))Y*$$**,,,Y)##))+++Y)##))+++Y*$$**,,,Y&  &&(((Y&  &&(((Y&  '')))Y&  ((***=D===>>>r"   c                    | j         S r,   r/   r   s    r    __str__zdtype.__str__i  
    yr"   c                    | j                             d          rd| j         dd          z   S | j                             d          rd| j         dd          z   S | j         S )Nfprb   r   bfbfloat)r0   
startswithr   s    r    codegen_namezdtype.codegen_namel  sd    9%% 	TYqrr]**Y!!$'' 	dim++9r"   strc                    | j         S )z"See cache_key_part() in triton.cc.r/   r   s    r    cache_key_partzdtype.cache_key_partt  s     yr"   c                0    d|                                   S )z4Output of repr needs to be an evaluatable expressionztriton.language.)r  r   s    r    __repr__zdtype.__repr__y  s    7$"3"3"5"5777r"   N)r   rp   )rk   r   r   r   r   r  )4r7   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   staticmethodr   r   r   r   r   r   r   r   propertyr   r	  r  r  r  r  r-   r"   r    rp   rp      s:       444J@@@JfffH888(K    T   5( 5( 5(n" " "& & && & &' ' '$ $ $' ' '# # ## # ## # ## # ## # ## # #$ $ $$ $ $$ $ $$ $ $% % %% % %% % %+ + +4 4 4- - -- - -@ @ @   d d \d . . \.   \   \   \' ' ' '
& & & &# # #   X? ? ? ?B        X8 8 8 8 8r"   rp   c                  X    e Zd ZdddZddZd Zd Zd ZddZddZ	e
d             ZdS )pointer_typer   
element_tyrp   address_spacerX   c                    t          |t                    s%t          dt          |          j         d          || _        || _        d| d| _        d S )Nzelement_ty is a rQ   zpointer<>)rU   rp   rE   rj   r7   r  r  r0   )r.   r  r  s      r    r   zpointer_type.__init__  s_    *e,, 	MKtJ/?/?/HKKKLLL$*,z,,,			r"   rk   r   r   ir.pointer_typec                ^    |                     | j                            |          d          S )Nr   )
get_ptr_tyr  r	  r  s     r    r	  zpointer_type.to_ir  s(    !!$/"7"7"@"@!DDDr"   c                    | j         S r,   r/   r   s    r    r  zpointer_type.__str__  r  r"   c                *    |                                  S r,   r  r   s    r    r  zpointer_type.__repr__  r   r"   c                    dS NTr-   r   s    r    r   zpointer_type.is_ptr      tr"   r   rH   c                p    t          |t                    sdS | j        |j        k    o| j        |j        k    S r   )rU   r  r  r  r   s     r    r   zpointer_type.__eq__  s9    %.. 	5%"22`t7IUM`7``r"   c                .    |                      |           S r,   r   r   s     r    r   zpointer_type.__ne__  r   r"   c                    | S r,   r-   r   s    r    r   zpointer_type.scalar  r   r"   Nr   r  rp   r  rX   )rk   r   r   r!  )r   r  r   rH   )r7   r   r   r   r	  r  r  r   r   r   r  r   r-   r"   r    r  r    s        - - - - -E E E E      a a a a
& & & &   X  r"   r  c                  6     e Zd Zdd fdZd Zd ZddZ xZS )const_pointer_typer   r  rp   r  rX   c                L    t                                          ||           d S r,   )superr   )r.   r  r  	__class__s      r    r   zconst_pointer_type.__init__  s#    ]33333r"   c                    d| j          dS )Nzconst_pointer<r   r  r   s    r    r  zconst_pointer_type.__str__  s    22222r"   c                    dS r(  r-   r   s    r    r   zconst_pointer_type.is_const  r)  r"   r   rH   c                p    t          |t                    sdS | j        |j        k    o| j        |j        k    S r   )rU   r0  r  r  r   s     r    r   zconst_pointer_type.__eq__  s:    %!344 	5%"22`t7IUM`7``r"   r-  r.  r   rH   )r7   r   r   r   r  r   r   __classcell__)r3  s   @r    r0  r0    s|        4 4 4 4 4 4 43 3 3  a a a a a a a ar"   r0  c                  ^    e Zd ZddZdd
Zd Zd Zd ZddZddZ	ddZ
ed             ZdS )
block_typer  rp   shaper   c                d   || _         |st          d          t          |d         t                    rd |D             }|| _        d| _        | j        D ]}| xj        |z  c_        | j        t          k    r t          d| j         dt           d          d| j         d	| j          d
| _        d S )Nz0d block_type is forbiddenr   c                    g | ]	}|j         
S r-   ri   .0ss     r    
<listcomp>z'block_type.__init__.<locals>.<listcomp>  s    ,,,QW,,,r"   r   znumel (z') exceeds triton maximum tensor numel ()<, r   )	r  rE   rU   rh   r<  numelTRITON_MAX_TENSOR_NUMELr   r0   )r.   r  r<  rB  s       r    r   zblock_type.__init__  s    $  	:8999eAh	** 	-,,e,,,E

 	 	AJJ!OJJJ:///ttzttZqtttuuu8
88do888			r"   rk   r   r   ir.block_typec                h    |                     | j                            |          | j                  S r,   )get_block_tyr  r	  r<  r  s     r    r	  zblock_type.to_ir  s*    ##DO$9$9'$B$BDJOOOr"   c                    | j         S r,   r/   r   s    r    r  zblock_type.__str__  r  r"   c                *    |                                  S r,   r&  r   s    r    r  zblock_type.__repr__  r   r"   c                    dS r(  r-   r   s    r    r   zblock_type.is_block  r)  r"   	List[int]c                    | j         S r,   )r<  r   s    r    get_block_shapeszblock_type.get_block_shapes  
    zr"   r   rH   c                p    t          |t                    sdS | j        |j        k    o| j        |j        k    S r   )rU   r;  r  r<  r   s     r    r   zblock_type.__eq__  s7    %,, 	5%"22PtzU[7PPr"   c                .    |                      |           S r,   r   r   s     r    r   zblock_type.__ne__  r   r"   c                    | j         S r,   r5  r   s    r    r   zblock_type.scalar  s
    r"   N)r  rp   r<  r   )rk   r   r   rI  )r   rO  )r   r;  r   rH   )r7   r   r   r   r	  r  r  r   rQ  r   r   r  r   r-   r"   r    r;  r;    s        9 9 9 9*P P P P         Q Q Q Q
& & & &   X  r"   r;  c                  $    e Zd ZddZd Zdd
ZdS )function_type	ret_typesList[dtype]param_typesr   Nonec                "    || _         || _        d S r,   )rX  rZ  )r.   rX  rZ  s      r    r   zfunction_type.__init__  s    "&r"   c                &    d| j          d| j         S )Nzfn (z) -> )rZ  rX  r   s    r    r  zfunction_type.__str__  s    =d&==T^===r"   rk   r   c                |    fd| j         D             }fd| j        D             }                    ||          S )Nc                :    g | ]}|                               S r-   r	  )rA  tyrk   s     r    rC  z'function_type.to_ir.<locals>.<listcomp>  s%    GGG"((7++GGGr"   c                :    g | ]}|                               S r-   r`  )rA  ret_typerk   s     r    rC  z'function_type.to_ir.<locals>.<listcomp>  s%    LLLX^^G,,LLLr"   )rZ  rX  get_function_ty)r.   rk   ir_param_typesrX  s    `  r    r	  zfunction_type.to_ir  sP    GGGGd6FGGGLLLLT^LLL	&&~yAAAr"   N)rX  rY  rZ  rY  r   r[  )rk   r   )r7   r   r   r   r  r	  r-   r"   r    rW  rW    sQ        ' ' ' '> > >B B B B B Br"   rW  r~   rW   rq   rr   rZ   r^   rs   rt   r\   r`   rx   ry   rv   rw   ru   rz   r{   r|   r}   bitwidthrX   signedc                6   | dk    rt           S | dk    r	|rt          S | dk    r	|st          S | dk    r	|rt          S | dk    r	|st          S | dk    r	|rt
          S | dk    r	|st          S | dk    r	|rt          S | dk    r	|st          S t          d|  d|           )Nr   r   r   r   r   zUnsupported bitwidth z and signedness )
rW   rq   rs   rr   rt   rZ   r\   r^   r`   r   )rf  rg  s     r    get_int_dtyperi    s    1}}	Q6	Qv	RF	R	RF	R	RF	RSSS6SSTTTr"   c                      e Zd ZdZdS )consta~  
    This class is used as a type annotation to mark pointers to constant data.
    The `store` function cannot be called with a pointer to const. Constness
    is part of the pointer type and the usual Triton type consistency rules
    apply. For example you cannot have a function that returns constant pointer
    in one return statement and non-constant pointer in another.
    N)r7   r   r   r6   r-   r"   r    rk  rk  .  s          	Dr"   rk  c                     e Zd ZdZd Zd-dZd Zd Zd Zd	 Z	d
 Z
d Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd  Z d! Z!d" Z"d# Z#d$ Z$d% Z%d& Z&d' Z'd( Z(d) Z)d* Z*d+ Z+d,S ).rh   zL
    This class is used to store a value that is known at compile-time.
    c                Z    t          |t                    r|j        | _        d S || _        d S r,   rU   rh   ri   )r.   ri   s     r    r   zconstexpr.__init__>  s,    eY'' 	DJJJDJJJr"   r   r  c                    d| j          dS )Nz
constexpr[]r?  r   s    r    r  zconstexpr.__repr__D  s    )DJ))))r"   c                    | j         S r,   r?  r   s    r    	__index__zconstexpr.__index__G  rR  r"   c                J    t          | j        t          |          z             S r,   rh   ri   _constexpr_to_valuer   s     r    __add__zconstexpr.__add__N       &9%&@&@@AAAr"   c                J    t          t          |          | j        z             S r,   rh   ru  ri   r   s     r    __radd__zconstexpr.__radd__Q       ,U33dj@AAAr"   c                J    t          | j        t          |          z
            S r,   rt  r   s     r    __sub__zconstexpr.__sub__T  rw  r"   c                J    t          t          |          | j        z
            S r,   ry  r   s     r    __rsub__zconstexpr.__rsub__W  r{  r"   c                J    t          | j        t          |          z            S r,   rt  r   s     r    __mul__zconstexpr.__mul__Z  rw  r"   c                J    t          | j        t          |          z            S r,   rt  r   s     r    __mod__zconstexpr.__mod__]  rw  r"   c                J    t          t          |          | j        z            S r,   ry  r   s     r    __rmul__zconstexpr.__rmul__`  r{  r"   c                J    t          | j        t          |          z            S r,   rt  r   s     r    __truediv__zconstexpr.__truediv__c  rw  r"   c                J    t          t          |          | j        z            S r,   ry  r   s     r    __rtruediv__zconstexpr.__rtruediv__f  r{  r"   c                J    t          | j        t          |          z            S r,   rt  r   s     r    __floordiv__zconstexpr.__floordiv__i       ':5'A'AABBBr"   c                J    t          t          |          | j        z            S r,   ry  r   s     r    __rfloordiv__zconstexpr.__rfloordiv__l  s     ,U33tzABBBr"   c                L    t          | j        t          |          k              S r,   rt  r   s     r    __gt__zconstexpr.__gt__o       &9%&@&@@AAAr"   c                L    t          t          |          | j        k              S r,   ry  r   s     r    __rgt__zconstexpr.__rgt__r       ,U33dj@AAAr"   c                L    t          | j        t          |          k              S r,   rt  r   s     r    __ge__zconstexpr.__ge__u       ':5'A'AABBBr"   c                L    t          t          |          | j        k              S r,   ry  r   s     r    __rge__zconstexpr.__rge__x       ,U33tzABBBr"   c                L    t          | j        t          |          k               S r,   rt  r   s     r    __lt__zconstexpr.__lt__{  r  r"   c                L    t          t          |          | j        k               S r,   ry  r   s     r    __rlt__zconstexpr.__rlt__~  r  r"   c                L    t          | j        t          |          k              S r,   rt  r   s     r    __le__zconstexpr.__le__  r  r"   c                L    t          t          |          | j        k              S r,   ry  r   s     r    __rle__zconstexpr.__rle__  r  r"   c                L    t          | j        t          |          k              S r,   rt  r   s     r    r   zconstexpr.__eq__  r  r"   c                L    t          | j        t          |          k              S r,   rt  r   s     r    r   zconstexpr.__ne__  r  r"   c                *    t          | j                  S r,   )rH   ri   r   s    r    __bool__zconstexpr.__bool__      DJr"   c                ,    t          | j                   S r,   rh   ri   r   s    r    __neg__zconstexpr.__neg__      $*%%%r"   c                J    t          | j        t          |          z            S r,   rt  r   s     r    __and__zconstexpr.__and__  rw  r"   c                H    t          | j        ot          |                    S r,   rt  r   s     r    logical_andzconstexpr.logical_and  s     B(;E(B(BCCCr"   c                J    t          | j        t          |          z            S r,   rt  r   s     r    __or__zconstexpr.__or__  rw  r"   c                J    t          | j        t          |          z            S r,   rt  r   s     r    __xor__zconstexpr.__xor__  rw  r"   c                H    t          | j        pt          |                    S r,   rt  r   s     r    
logical_orzconstexpr.logical_or  s     A':5'A'ABBBr"   c                ,    t          | j        
           S r,   r  r   s    r    __pos__zconstexpr.__pos__  r  r"   c                ,    t          | j                   S r,   r  r   s    r    
__invert__zconstexpr.__invert__  r  r"   c                J    t          | j        t          |          z            S r,   rt  r   s     r    __pow__zconstexpr.__pow__  s     %8%?%??@@@r"   c                J    t          t          |          | j        z            S r,   ry  r   s     r    __rpow__zconstexpr.__rpow__  s     ,U33TZ?@@@r"   c                J    t          | j        t          |          z	            S r,   rt  r   s     r    
__rshift__zconstexpr.__rshift__  r  r"   c                J    t          | j        t          |          z            S r,   rt  r   s     r    
__lshift__zconstexpr.__lshift__  r  r"   c                ,    t          | j                   S r,   r  r   s    r    __not__zconstexpr.__not__  s    TZ(((r"   c                *    t          | j                  S r,   )rD   ri   r   s    r    __iter__zconstexpr.__iter__  r  r"   c                     | j         |i |S r,   r?  )r.   r   kwdss      r    __call__zconstexpr.__call__  s    tz4(4(((r"   Nr  ),r7   r   r   r6   r   r  rr  rv  rz  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r-   r"   r    rh   rh   9  s          * * * *  B B BB B BB B BB B BB B BB B BB B BB B BB B BC C CC C CB B BB B BC C CC C CB B BB B BC C CC C CC C CC C C     & & &B B BD D DB B BB B BC C C& & && & &A A AA A AC C CC C C) ) )     ) ) ) ) )r"   rh   c           	         t          | t                    rVt          |t                    rC| j        j        j        }|j        |k    r)t          d|j         d| d| j         d           d S d S d S d S )NzValue z exceeds the maximum bitwidth (z) for type 'z)'. This may result in undefined behavior.)	rU   r=   rh   rj   r   r   ri   r   rp   )ri   shift_valuerf  s      r    check_bit_widthr    s    %   ZY%G%G :$7(( X*  X  X8  X  Xafal  X  X  X       ((r"   c                     e Zd ZdZdjdZdkdZedld	            Zedld
            Zedld            Z	edld            Z
edld            Zedld            Zedld            Zedld            Zedld            Zedld            Zedld            Zedld            Zedld            Zedld            Zedld            Zedld            Zedld            Zedld            Zedld            Zedld            Zedld            Zedld            Zedld            Zedld             Zedld!            Zedld"            Z edld#            Z!edld$            Z"edld%            Z#edld&            Z$edld'            Z%edld(            Z&edld)            Z'edld*            Z(edld+            Z)edld,            Z*edld-            Z+edld.            Z,edld/            Z-edld0            Z.e/d1             Z0edmdnd7            Z1dod8Z2dod9Z3dod:Z4dpd<Z5dod=Z6dod>Z7dod?Z8dqdod@Z9drdodCZ:dodDZ;dsdodEZ<dtdodFZ=dtdodGZ>dtdodHZ?dtdodIZ@dtdodJZAdtdodKZBdtdodLZCdodMZDdodNZEdodOZFdodPZGdodQZHdodRZIdodSZJdudodTZKdudodUZLdodVZMdodWZNdodXZOdudodYZPdodZZQdvdod\ZRdwdod]ZSdvdod^ZTdwdod_ZUdqdod`ZVdqdodaZWdxdodcZXdxdoddZYdeZfdydhZ[dldodiZ\dS )zr=   a  Represents an N-dimensional array of values or pointers.

    :code:`tensor` is the fundamental data structure in Triton programs.  Most
    functions in :py:mod:`triton.language` operate on and return tensors.

    Most of the named member functions here are duplicates of the free functions
    in :code:`triton.language`.  For example, :code:`triton.language.sqrt(x)` is
    equivalent to :code:`x.sqrt()`.

    :code:`tensor` also defines most of the magic/dunder methods, so you can
    write :code:`x+y`, :code:`x << 2`, etc.

    .. rubric:: Constructors
    ..
       For some reason Sphinx includes __init__ before printing the full table
       of methods.  Not what I want, but I can't figure out how to fix it.  Give
       it its own section so it looks intentional. :)
    rj   rp   c                   || _         |                                r|j        nd| _        d| _        | j        D ]}| xj        |z  c_        t	          | j                  | _        || _        |j        | _        d | j        D             | _        dS )zNot called by user code.r-   r   c                ,    g | ]}t          |          S r-   rh   r@  s     r    rC  z#tensor.__init__.<locals>.<listcomp>  s    777qill777r"   N)handler   r<  rG  rh   rj   r   rp   )r.   r  rj   rB  s       r    r   ztensor.__init__  s     #'==??:TZZ

 	 	AJJ!OJJJtz**
	[
77DJ777


r"   r   r  c                    t          | j                  dz   d                    d | j        D                       z   dz   S )N[rF  c              3  4   K   | ]}t          |          V  d S r,   )r  r@  s     r    	<genexpr>z!tensor.__str__.<locals>.<genexpr>  s(      0L0LAQ0L0L0L0L0L0Lr"   rp  )r  rp   joinr<  r   s    r    r  ztensor.__str__  s<    4:$tyy0L0L0L0L0L'L'LLsRRr"   Nc                N    t          ||          }t          j        | ||          S r,   )rN   r   addr.   r   r   s      r    rv  ztensor.__add__  %    5(++|D%222r"   c                0    |                      ||          S Nr   )rv  r  s      r    rz  ztensor.__radd__      ||EH|555r"   c                N    t          ||          }t          j        | ||          S r,   rN   r   subr  s      r    r}  ztensor.__sub__  r  r"   c                N    t          ||          }t          j        || |          S r,   r  r  s      r    r  ztensor.__rsub__  %    5(++|E4222r"   c                N    t          ||          }t          j        | ||          S r,   )rN   r   mulr  s      r    r  ztensor.__mul__  r  r"   c                0    |                      ||          S r  )r  r  s      r    r  ztensor.__rmul__	  r  r"   c                N    t          ||          }t          j        | ||          S r,   rN   r   truedivr  s      r    r  ztensor.__truediv__  s&    5(++eX666r"   c                N    t          ||          }t          j        || |          S r,   r  r  s      r    r  ztensor.__rtruediv__  s&    5(++tX666r"   c                N    t          ||          }t          j        | ||          S r,   rN   r   floordivr  s      r    r  ztensor.__floordiv__  s&    5(++ uh777r"   c                N    t          ||          }t          j        || |          S r,   r  r  s      r    r  ztensor.__rfloordiv__  s&    5(++ h777r"   c                N    t          ||          }t          j        | ||          S r,   rN   r   modr  s      r    r  ztensor.__mod__!  r  r"   c                N    t          ||          }t          j        || |          S r,   r  r  s      r    __rmod__ztensor.__rmod__&  r  r"   c                ,    t          j        | |          S r,   )r   minusr.   r   s     r    r  ztensor.__neg__,  s    ~dH---r"   c                ,    t          j        | |          S r,   )r   invertr  s     r    r  ztensor.__invert__0  s    tX...r"   c                N    t          ||          }t          j        | ||          S r,   rN   r   and_r  s      r    r  ztensor.__and__6  %    5(++}T5(333r"   c                N    t          ||          }t          j        || |          S r,   r  r  s      r    __rand__ztensor.__rand__;  %    5(++}UD(333r"   c                N    t          ||          }t          j        | ||          S r,   rN   r   or_r  s      r    r  ztensor.__or__@  r  r"   c                N    t          ||          }t          j        || |          S r,   r   r  s      r    __ror__ztensor.__ror__E  r  r"   c                N    t          ||          }t          j        | ||          S r,   rN   r   xor_r  s      r    r  ztensor.__xor__J  r  r"   c                N    t          ||          }t          j        || |          S r,   r  r  s      r    __rxor__ztensor.__rxor__O  r  r"   c                n    t          | |           t          ||          }t          j        | ||          S r,   r  rN   r   shlr  s      r    r  ztensor.__lshift__T  s5    e$$$5(++|D%222r"   c                n    t          ||            t          ||          }t          j        || |          S r,   r
  r  s      r    __rlshift__ztensor.__rlshift__Z  s5    t$$$5(++|E4222r"   c                    t          | |           t          ||          }| j                                        rt	          j        | ||          S t	          j        | ||          S r,   r  rN   rp   r   r   ashrlshrr  s      r    r  ztensor.__rshift__`  s_    e$$$5(++:##%% 	8=uh777=uh777r"   c                    t          ||            t          ||          }| j                                        rt	          j        || |          S t	          j        || |          S r,   r  r  s      r    __rrshift__ztensor.__rrshift__i  s_    t$$$5(++:##%% 	8=h777=h777r"   c                N    t          ||          }t          j        | ||          S r,   rN   r   greater_thanr  s      r    r  ztensor.__gt__s  s&    5(++$T5(;;;r"   c                N    t          ||          }t          j        || |          S r,   r  r  s      r    r  ztensor.__rgt__x  s&    5(++$UD(;;;r"   c                N    t          ||          }t          j        | ||          S r,   rN   r   greater_equalr  s      r    r  ztensor.__ge__~  s&    5(++%dE8<<<r"   c                N    t          ||          }t          j        || |          S r,   r  r  s      r    r  ztensor.__rge__  s&    5(++%eT8<<<r"   c                N    t          ||          }t          j        | ||          S r,   rN   r   	less_thanr  s      r    r  ztensor.__lt__  &    5(++!$x888r"   c                N    t          ||          }t          j        || |          S r,   r  r  s      r    r  ztensor.__rlt__  &    5(++!%x888r"   c                N    t          ||          }t          j        | ||          S r,   rN   r   
less_equalr  s      r    r  ztensor.__le__  &    5(++"4999r"   c                N    t          ||          }t          j        || |          S r,   r#  r  s      r    r  ztensor.__rle__  s&    5(++"5$999r"   c                N    t          ||          }t          j        | ||          S r,   rN   r   equalr  s      r    r   ztensor.__eq__  s%    5(++~dE8444r"   c                N    t          ||          }t          j        || |          S r,   r(  r  s      r    __req__ztensor.__req__  s%    5(++~eT8444r"   c                N    t          ||          }t          j        | ||          S r,   rN   r   	not_equalr  s      r    r   ztensor.__ne__  r  r"   c                N    t          ||          }t          j        || |          S r,   r-  r  s      r    __rne__ztensor.__rne__  r!  r"   c                N    t          ||          }t          j        | ||          S r,   )rN   r   r  r  s      r    r  ztensor.logical_and  s&    5(++#D%:::r"   c                N    t          ||          }t          j        | ||          S r,   )rN   r   r  r  s      r    r  ztensor.logical_or  r%  r"   c                ,    t          j        | |          S r,   )r   not_r  s     r    r  ztensor.__not__  s    }T8,,,r"   c                X   t          |t          t          f          s||g}| }t          |          D ]v\  }}|t          |t                    r|j        t          j        |||          }:t          |t                    r|j        |j        |j	        et          d|           |S )Nzunsupported tensor index: )rU   slicerh   	enumerateri   r   expand_dimsstartstopstepr   )r.   slicesr   retdimsls         r    __getitem__ztensor.__getitem__  s    fui011 	V^XF (( 	D 	DGCzZI66z28;K*3X>>B&& D28+;TVT[Tc !Bb!B!BCCC
r"   c                    J d            )zTransposes a 2D tensor.Fz0Transposition must be created by the AST Visitorr-   r   s    r    r   ztensor.T  s     	IHHHHr"   Ffp_downcast_roundingOptional[str]bitcastrH   c                    t          |t                    r|j        }|rt          j        | ||          S t          j        | |||          S )z3
        Alias for :py:func:`tensor.cast`.
        )rU   rh   ri   r   rD  cast)r.   rp   rB  rD  r   s        r    toz	tensor.to  sQ     gy)) 	$mG 	;#D%:::}T5(4HIIIr"   c                    d S r,   r-   r.   r<  s     r    broadcast_toztensor.broadcast_to      r"   c                    d S r,   r-   r.   dimss     r    transztensor.trans  rK  r"   c                    d S r,   r-   rM  s     r    permuteztensor.permute  rK  r"   tuple[tensor, tensor]c                    d S r,   r-   r   s    r    r   ztensor.split  rK  r"   c                    d S r,   r-   rI  s     r    viewztensor.view  rK  r"   c                    d S r,   r-   rI  s     r    reshapeztensor.reshape  rK  r"   c                    d S r,   r-   )r.   axiss     r    r8  ztensor.expand_dims  rK  r"   c                    d S r,   r-   )r.   rp   rB  rD  s       r    rF  ztensor.cast  rK  r"   r-   r)   c                    d S r,   r-   )r.   ri   maskboundary_checkcache_modifiereviction_policys         r    storeztensor.store   rK  r"   c                    d S r,   r-   )r.   offsetss     r    advanceztensor.advance  rK  r"   c                    d S r,   r-   )r.   cmpvalsemscopes        r    
atomic_casztensor.atomic_cas  rK  r"   c                    d S r,   r-   r.   rf  r\  rg  rh  s        r    atomic_xchgztensor.atomic_xchg	  rK  r"   c                    d S r,   r-   rk  s        r    
atomic_addztensor.atomic_add  rK  r"   c                    d S r,   r-   rk  s        r    
atomic_maxztensor.atomic_max  rK  r"   c                    d S r,   r-   rk  s        r    
atomic_minztensor.atomic_min  rK  r"   c                    d S r,   r-   rk  s        r    
atomic_andztensor.atomic_and  rK  r"   c                    d S r,   r-   rk  s        r    	atomic_orztensor.atomic_or  rK  r"   c                    d S r,   r-   rk  s        r    
atomic_xorztensor.atomic_xor  rK  r"   c                    d S r,   r-   r   s    r    expz
tensor.exp  rK  r"   c                    d S r,   r-   r   s    r    logz
tensor.log!  rK  r"   c                    d S r,   r-   r   s    r    cosz
tensor.cos$  rK  r"   c                    d S r,   r-   r   s    r    sinz
tensor.sin'  rK  r"   c                    d S r,   r-   r   s    r    sqrtztensor.sqrt*  rK  r"   c                    d S r,   r-   r   s    r    rsqrtztensor.rsqrt-  rK  r"   c                    d S r,   r-   r   s    r    rS   z
tensor.abs0  rK  r"   c                    d S r,   r-   )r.   rY  
combine_fn	keep_dimss       r    reduceztensor.reduce3  rK  r"   c                    d S r,   r-   )r.   rY  r  reverses       r    associative_scanztensor.associative_scan6  rK  r"   c                    d S r,   r-   )r.   num_binss     r    	histogramztensor.histogram9  rK  r"   c                    d S r,   r-   )r.   divs     r    cdivztensor.cdiv<  rK  r"   c                    d S r,   r-   r   s    r    sigmoidztensor.sigmoid?  rK  r"   c                    d S r,   r-   )r.   ieee_roundings     r    softmaxztensor.softmaxB  rK  r"   c                    d S r,   r-   r   s    r    ravelztensor.ravelE  rK  r"   Tc                    d S r,   r-   r.   rY  return_indicesreturn_indices_tie_break_leftr  s        r    maxz
tensor.maxH  rK  r"   c                    d S r,   r-   r.   rY  tie_break_leftr  s       r    argmaxztensor.argmaxK  rK  r"   c                    d S r,   r-   r  s        r    minz
tensor.minN  rK  r"   c                    d S r,   r-   r  s       r    argminztensor.argminQ  rK  r"   c                    d S r,   r-   r.   rY  r  s      r    sumz
tensor.sumT  rK  r"   c                    d S r,   r-   r  s      r    xor_sumztensor.xor_sumW  rK  r"   r   c                    d S r,   r-   r.   rY  r  s      r    cumsumztensor.cumsumZ  rK  r"   c                    d S r,   r-   r  s      r    cumprodztensor.cumprod]  rK  r"   r>  rh   
descendingc                    d S r,   r-   )r.   r>  r  s      r    sortztensor.sort`  rK  r"   c                    d S r,   r-   )r.   r>  s     r    flipztensor.flipc  rK  r"   )rj   rp   r  r,   NFNrp   rp   rB  rC  rD  rH   )r   r=   r   rR  r   )Nr-   r)   r)   NNNNNF)NFTF)TF)r   F)r>  rh   r  rh   r   r=   )]r7   r   r   r6   r   r  r&   rv  rz  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r+  r   r0  r  r  r  r@  r  r   rG  rJ  rO  rQ  r   rU  rW  r8  rF  r`  rc  ri  rl  rn  rp  rr  rt  rv  rx  rz  r|  r~  r  r  r  rS   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  CONSTEXPR_0r  r  r-   r"   r    r=   r=     sT	        &8 8 8 8S S S S 3 3 3 W3 6 6 6 W6 3 3 3 W3 3 3 3 W3 3 3 3 W3 6 6 6 W6 7 7 7 W7 7 7 7 W7 8 8 8 W8 8 8 8 W8 3 3 3 W3 3 3 3 W3
 . . . W. / / / W/
 4 4 4 W4 4 4 4 W4 3 3 3 W3 3 3 3 W3 4 4 4 W4 4 4 4 W4 3 3 3 W3
 3 3 3 W3
 8 8 8 W8 8 8 8 W8 < < < W< < < < W<
 = = = W= = = = W=
 9 9 9 W9 9 9 9 W9
 : : : W: : : : W:
 5 5 5 W5 5 5 5 W5 9 9 9 W9 9 9 9 W9 ; ; ; W; : : : W: - - - W-    W I I XI 
J 
J 
J 
J W
J"                                                                                                                                              %)+           r"   r=   c                P    t          j        | d          }|dk    p|dk    p|dk    S )N01trueon)osgetenv)var_namevs     r    get_bool_env_varr  g  s/    
	(C  A8/qF{/a4i/r"   c                >    t          | t                    r| j        S | S r,   rn  )r  s    r    ru  ru  o  s     !Y wHr"   c                J    t          |           } t          j        | |          S )z
    Returns the id of the current program instance along the given :code:`axis`.

    :param axis: The axis of the 3D launch grid. Must be 0, 1 or 2.
    :type axis: int
    )ru  r   
program_idrY  r   s     r    r  r  u  s$     t$$DtX...r"   c                J    t          |           } t          j        | |          S )z
    Returns the number of program instances launched along the given :code:`axis`.

    :param axis: The axis of the 3D launch grid. Must be 0, 1 or 2.
    :type axis: int
    )ru  r   num_programsr  s     r    r  r    s$     t$$D x000r"   c                j    t          |           } t          |          }t          j        | ||          S )a  
    Returns contiguous values within the half-open interval :code:`[start,
    end)`.  :code:`end - start` must be less than or equal to
    :code:`TRITON_MAX_TENSOR_NUMEL = 131072`

    :param start: Start of the interval. Must be a power of two.
    :type start: int32
    :param end: End of the interval. Must be a power of two greater than
        :code:`start`.
    :type end: int32
    )ru  r   arange)r9  endr   s      r    r  r    s2      &&E
c
"
"C?5#x000r"   c           	        t          |           } t          |           D ]\  }}t          |t                    rt	          |          }t          |t                    st          d| d          t          |j        t                    s(t          d| dt          |j                   d          |j        |j        dz
  z  dk    rt          d| d          d | D             S )	NzShape element  must have type `constexpr`1 must have type `constexpr[int]`, got `constexpr[rp  r   r   z must be a power of 2c                ,    g | ]}t          |          S r-   )ru  rA  rF   s     r    rC  z%_shape_check_impl.<locals>.<listcomp>  s!    222q""222r"   )	ru  r7  rU   rX   rh   rE   ri   rj   r   )r<  ids      r    _shape_check_implr    s   &&E%   H H1a 	!A!Y'' 	MKQKKKLLL!'3'' 	sqQqqaefgfmananqqqrrr7agk"a''FaFFFGGG (22E2222r"   c                    t          |           } t          |          }t          |          }t          j        | |||          S )a]  
    Returns a tensor filled with the scalar value for the given :code:`shape` and :code:`dtype`.

    :param shape: Shape of the new array, e.g., (8, 16) or (8, )
    :value value: A scalar value to fill the array with
    :type shape: tuple of ints
    :param dtype: Data-type of the new array, e.g., :code:`tl.float16`
    :type dtype: DType
    )r  ru  r   full)r<  ri   rp   r   s       r    r  r    sA     e$$E&&E&&E=uh777r"   c                .    t          j        | ||          S )z
    Tries to broadcast the two given blocks to a common compatible shape.

    :param input: The first input tensor.
    :type input: Block
    :param other: The second input tensor.
    :type other: Block
    )r   broadcast_impl_value)inputr   r   s      r    	broadcastr    s     (x@@@r"   r  c               f    t          t          |                    }t          j        | ||          S )ae  
    Tries to broadcast the given tensor to a new :code:`shape`.

    :param input: The input tensor.
    :type input: Block
    :param shape: The desired shape.
    :type shape:

    :code:`shape` can be passed as a tuple or as individual parameters: ::

        # These are equivalent
        broadcast_to(x, (32, 32))
        broadcast_to(x, 32, 32)
    )r  rG   r   broadcast_impl_shaper  r   r<  s      r    rJ  rJ    s/    " .u5566E(x@@@r"   r  c               6    |sd}t          j        | ||          S )ap  
    Permutes the dimensions of a tensor.

    If no permutation is specified, tries to do a (1,0) permutation, i.e. tries
    to transpose a 2D tensor.

    :param input: The input tensor.
    :param dims: The desired ordering of dimensions.  For example,
        :code:`(2, 1, 0)` reverses the order dims in a a 3D tensor.

    :code:`dims` can be passed as a tuple or as individual parameters: ::

        # These are equivalent
        trans(x, (2, 1, 0))
        trans(x, 2, 1, 0)

    :py:func:`permute` is equivalent to this function, except it doesn't
    have the special case when no permutation is specified.
    )r   r   )r   rQ  r  r   rN  s      r    rO  rO    s&    ,  E4222r"   c               L    t          |          }t          j        | ||          S )a  
    Permutes the dimensions of a tensor.

    :param input: The input tensor.
    :type input: Block
    :param dims: The desired ordering of dimensions.  For example,
        :code:`(2, 1, 0)` reverses the order dims in a a 3D tensor.

    :code:`dims` can be passed as a tuple or as individual parameters: ::

        # These are equivalent
        permute(x, (2, 1, 0))
        permute(x, 2, 1, 0)

    :py:func:`trans` is equivalent to this function, except when
    :code:`dims` is empty, it tries to do a (1,0) permutation.
    )rG   r   rQ  r  s      r    rQ  rQ    s&    ( D!!DE4222r"   Fc                0    t          j        | |||          S )av  
    Concatenate the given blocks

    :param input: The first input tensor.
    :type input:
    :param other: The second input tensor.
    :type other:
    :param reorder: Compiler hint. If true, the compiler is
        allowed to reorder elements while concatenating inputs.  Only use if the
        order does not matter (e.g., result is only used in reduction ops)
    )r   cat)r  r   can_reorderr   s       r    r  r  $  s     <uk8<<<r"   c                .    t          j        | ||          S )aZ  
    Join the given tensors in a new, minor dimension.

    For example, given two tensors of shape (4,8), produces a new tensor of
    shape (4,8,2).  Given two scalars, returns a tensor of shape (2).

    The two inputs are broadcasted to be the same shape.

    If you want to join more than two elements, you can use multiple calls to
    this function.  This reflects the constraint in Triton that tensors must
    have power-of-two sizes.

    join is the inverse of split.

    :param a: The first input tensor.
    :type a: Tensor
    :param b: The second input tensor.
    :type b: Tensor
    )r   r  )abr   s      r    r  r  4  s    * =Ax(((r"   c                    | S r,   r-   )r  r  s     r    _take_firstr  L  s    Hr"   rR  c           
     b   t          | j                  dk    }|rt          j        | d|          } t          j        | |          \  }}|rbt          j        t          t          |dt          ||                    }t          j        t          t          |dt          ||                    }||fS )a  
    Split a tensor in two along its last dim, which must have size 2.

    For example, given a tensor of shape (4,8,2), produces two tensors of shape
    (4,8).  Given a tensor of shape (2), returns two scalars.

    If you want to split into more than two pieces, you can use multiple calls
    to this function (probably plus calling reshape).  This reflects the
    constraint in Triton that tensors must have power-of-two sizes.

    split is the inverse of join.

    :param a: The tensor to split.
    :type a: Tensor
    r   r   Nr   r(   )
r4   r<  r   r8  r   typingrF  r=   r  r  )r  r   r(   
was_rank_1out_lhsout_rhss         r    r   r   Q  s    * QW"J 1 Ax00~a22GW t+ffWdKRZgq&r&r&rss+ffWdKRZgq&r&r&rssGr"   c                   t          d           t          t          |                    }t          j        | |d|          S )a  
    Returns a tensor with the same elements as `input` but a different shape.
    The order of the elements may not be preserved.

    :param input: The input tensor.
    :type input: Block
    :param shape: The desired shape.

    :code:`shape` can be passed as a tuple or as individual parameters: ::

        # These are equivalent
        view(x, (32, 32))
        view(x, 32, 32)
    zCview is deprecated, please use reshape with can_reorder being true.T)r  rk   )r   r  rG   r   rW  r  s      r    rU  rU  t  sC    " 		NOOO.u5566EE5dHMMMMr"   )r  r   c               h    t          t          |                    }t          j        | |||          S )ag  
    Returns a tensor with the same number of elements as input but with the
    provided shape.

    :param input: The input tensor.
    :type input: Block
    :param shape: The new shape.

    :code:`shape ` can be passed as a tuple or as individual parameters: ::

        # These are equivalent
        reshape(x, (32, 32))
        reshape(x, 32, 32)
    )r  rG   r   rW  )r  r  r   r<  s       r    rW  rW    s1    " .u5566EE5+x@@@r"   c                n    | | cxk    r|k     sn t          d|  d|  d|           | dk    r| n| |z   S )Nzinvalid axis z. Expected z <= axis < r   r   )rY  ndims     r    
_wrap_axisr    sb    ET    D    RRR4%RRDRRSSS19944$+-r"   c                   t          | |          } t          |          }t          |t                    rt	          |          n|g}t          | j                  t          |          z   fd|D             }t          t          |                    t          |          k    rt          d|           | }t          |          D ]}t          j        |||          }|S )aR  
    Expand the shape of a tensor, by inserting new length-1 dimensions.

    Axis indices are with respect to the resulting tensor, so
    ``result.shape[axis]`` will be 1 for each axis.

    :param input: The input tensor.
    :type input: tl.tensor
    :param axis: The indices to add new axes
    :type axis: int | Sequence[int]

    c                J    g | ]}t          t          |                     S r-   )r  ru  )rA  r  new_ndims     r    rC  zexpand_dims.<locals>.<listcomp>  s,    GGGQJ*1--x88GGGr"   z7expand_dims received duplicate axes, normalized axes = )rN   ru  rU   r   r8   r4   r<  setr   sortedr   r8  )r  rY  r   axesr=  r  r  s         @r    r8  r8    s     uh''Et$$D#D(33?4:::$D5;#d))+HGGGG$GGGD
3t99~~T""YSWYYZZZ
CD\\ 5 5"3844Jr"   rB  rC  rD  c                    t          | |          } t          |t                    r|j        }|rt	          j        | ||          S t	          j        | |||          S )a@  
    Casts a tensor to the given :code:`dtype`.

    :param dtype: The target data type.
    :param fp_downcast_rounding: The rounding mode for downcasting
        floating-point values.  This parameter is only used when self is a
        floating-point tensor and dtype is a floating-point type with a
        smaller bitwidth. Supported values are :code:`"rtne"` (round to
        nearest, ties to even) and :code:`"rtz"` (round towards zero).
    :param bitcast: If true, the tensor is bitcasted to the given
        :code:`dtype`, instead of being numerically casted.
    )rN   rU   rh   ri   r   rD  rF  )r  rp   rB  rD  r   s        r    rF  rF    s`     uh''E'9%%  - 8uh777=x1EFFFr"   c           	        ||
J d            |/|od|j         j        v }|r|s|dnd}	t          j        d|	          }t	          |          }t	          |          }t	          |          }t          j        | ||||||          S )a  
    Returns the matrix product of two blocks.

    The two blocks must be two-dimensional and have compatible inner dimensions.

    :param input: The first tensor to be multiplied.
    :type input: 2D tensor of scalar-type in {:code:`int8`, :code: `float8_e5m2`, :code:`float16`, :code:`bfloat16`, :code:`float32`}
    :param other: The second tensor to be multiplied.
    :type other: 2D tensor of scalar-type in {:code:`int8`, :code: `float8_e5m2`, :code:`float16`, :code:`bfloat16`, :code:`float32`}
    :param input_precision: How to exercise the Tensor Cores for f32 x f32. If
      the device does not have Tensor Cores or the inputs are not of dtype f32,
      this option is ignored.  For devices that do have tensor cores, the
      default precision is tf32.
    :type input_precision: string. Available options for nvidia: :code:`"tf32"`, :code:`"tf32x3"`, :code:`"ieee"`. Default: :code:`"tf32"`. Avaliable options for amd: :code:`"ieee"`.
    :param allow_tf32: *Deprecated.*  If true, input_precision is set to "tf32".
      Only one of :code:`input_precision` and :code:`allow_tf32` can be
      specified (i.e. at least one must be :code:`None`).
    Nz;Only one of input_precision and allow_tf32 can be specifiedtf32ieeeTRITON_F32_DEFAULT)optionsallowed_dot_input_precisionsr  r  ru  r   dot)
r  r   accinput_precision
allow_tf32max_num_imprecise_acc	out_dtyper   supports_tf32default_precisions
             r    r  r    s    * "j&8&8:w&8&8&8 \Vx/?/\%\'4h*h
HZFFbh)$8:KLL)/::O#I..I/0EFF<uc?<QS\^fgggr"   r-   r)   c	                6   t          |          }t          |          }|t          ||          }|t          ||          }t          |          }t          |          }t          |          }t          |          }t          j        | ||||||||	  	        S )a  
    Return a tensor of data whose values are loaded from memory at location defined by `pointer`:

        (1) If `pointer` is a single element pointer, a scalar is be loaded.  In
            this case:

            - `mask` and `other` must also be scalars,
            - `other` is implicitly typecast to `pointer.dtype.element_ty`, and
            - `boundary_check` and `padding_option` must be empty.

        (2) If `pointer` is an N-dimensional tensor of pointers, an
            N-dimensional tensor is loaded.  In this case:

            - `mask` and `other` are implicitly broadcast to `pointer.shape`,
            - `other` is implicitly typecast to `pointer.dtype.element_ty`, and
            - `boundary_check` and `padding_option` must be empty.

        (3) If `pointer` is a block pointer defined by `make_block_ptr`, a
            tensor is loaded.  In this case:

            - `mask` and `other` must be None, and
            - `boundary_check` and `padding_option` can be specified to control
               the behavior of out-of-bound access.

    :param pointer: Pointer to the data to be loaded
    :type pointer: `triton.PointerType`, or block of `dtype=triton.PointerType`
    :param mask: if `mask[idx]` is false, do not load the data at address `pointer[idx]`
        (must be `None` with block pointers)
    :type mask: Block of `triton.int1`, optional
    :param other: if `mask[idx]` is false, return `other[idx]`
    :type other: Block, optional
    :param boundary_check: tuple of integers, indicating the dimensions which should do the boundary check
    :type boundary_check: tuple of ints, optional
    :param padding_option: should be one of {"", "zero", "nan"}, do padding while out of bound
    :param cache_modifier: changes cache option in NVIDIA PTX
    :type cache_modifier: str, optional
    :param eviction_policy: changes eviction policy in NVIDIA PTX
    :type eviction_policy: str, optional
    :param volatile: changes volatile option in NVIDIA PTX
    :type volatile: bool, optional
    )ru  rN   r   load)	pointerr\  r   r]  padding_optionr^  r_  volatiler   s	            r    r  r    s    Z t$$D&&E$))5(++(88N(88N)/::O"8,,H=$~~~_n!8- - -r"   c                T    t          ||          }t          j        | |dd||          S )a  
    Experimental feature to access TMA descriptors loads. This is an escape hatch to easily exercise TTGIR operations.
    This will be removed in the future and shouldn't be used in production code.

    This loads a tensor of data based on the descriptor and offsets.
    r)   )r;  r   descriptor_load)desc_pointerrb  r<  rp   r   rj   s         r    _experimental_descriptor_loadr  A  s.     eU##D#L'2r4RRRr"   c                0    t          j        | |||          S )a  
    Experimental feature to access TMA descriptors stores. This is an escape hatch to easily exercise TTGIR operations.
    This will be removed in the future and shouldn't be used in production code.

    This stores a tensor of data based on the descriptor and offsets.
    )r   descriptor_store)r  ri   rb  r   s       r    _experimental_descriptor_storer  M  s     $\5'8LLLr"   c           	         t          ||          }t          |          }|t          ||          }t          |          }t          |          }t          j        | ||||||          S )ae  
    Store a tensor of data into memory locations defined by `pointer`.

        (1) If `pointer` is a single element pointer, a scalar is stored.  In
            this case:

            - `mask` must also be scalar, and
            - `boundary_check` and `padding_option` must be empty.

        (2) If `pointer` is an N-dimensional tensor of pointers, an
            N-dimensional block is stored.  In this case:

            - `mask` is implicitly broadcast to `pointer.shape`, and
            - `boundary_check` must be empty.

        (3) If `pointer` is a block pointer defined by `make_block_ptr`, a block
            of data is stored.  In this case:

            - `mask` must be None, and
            - `boundary_check` can be specified to control the behavior of out-of-bound access.

    `value` is implicitly broadcast to `pointer.shape` and typecast to `pointer.dtype.element_ty`.

    :param pointer: The memory location where the elements of `value` are stored
    :type pointer: `triton.PointerType`, or block of `dtype=triton.PointerType`
    :param value: The tensor of elements to be stored
    :type value: Block
    :param mask: If `mask[idx]` is false, do not store `value[idx]` at `pointer[idx]`
    :type mask: Block of triton.int1, optional
    :param boundary_check: tuple of integers, indicating the dimensions which should do the boundary check
    :type boundary_check: tuple of ints, optional
    :param cache_modifier: changes cache option in NVIDIA PTX
    :type cache_modifier: str, optional
    :param eviction_policy: changes eviction policy in NVIDIA PTX
    :type eviction_policy: str, optional
    )rN   ru  r   r`  )r  ri   r\  r]  r^  r_  r   s          r    r`  r`  X  sn    P uh''Et$$D$))(88N)/::O>'5$P_aijjjr"   basec           	     6    t          j        | ||||||          S )ak  
    Returns a pointer to a block in a parent tensor

    :param base: The base pointer to the parent tensor
    :param shape: The shape of the parent tensor
    :param strides: The strides of the parent tensor
    :param offsets: The offsets to the block
    :param block_shape: The shape of the block
    :param order: The order of the original data format
    )r   make_block_ptr)r  r<  stridesrb  block_shapeorderr   s          r    r  r    s"     "4+uV^___r"   c                .    t          j        | ||          S )z
    Advance a block pointer

    :param base: the block pointer to advance
    :param offsets: the offsets to advance, a tuple by dimension
    )r   rc  )r  rb  r   s      r    rc  rc    s     D'8444r"   r0   r  has_cmpCallable[[T], T]c                     d fd}|S )Nfuncr   r   c                :    d d}r|dz  }|dz  }|| _         | S )Nz
    Performs an atomic z at the memory location specified by :code:`pointer`.

    Return the data stored at :code:`pointer` before the atomic operation.

    :param pointer: The memory locations to operate on
    :type pointer: Block of dtype=triton.PointerDTypez|
    :param cmp: The values expected to be found in the atomic object
    :type cmp: Block of dtype=pointer.dtype.element_tya  
    :param val: The values with which to perform the atomic operation
    :type val: Block of dtype=pointer.dtype.element_ty
    :param sem: Memory semantics to use ("ACQUIRE_RELEASE" (default),
        "ACQUIRE", "RELEASE", or "RELAXED")
    :type sem: str
    :param scope: Scope of threads that observe synchronizing effect of the
        atomic operation ("GPU" (default), "CTA", or "SYSTEM")
    :type scope: str
    )r6   )r#  docstrr   r0   s     r    
_decoratorz&_add_atomic_docstr.<locals>._decorator  sT    99 9 9  	: : :F 	 	 		 r"   r#  r   r   r   r-   )r0   r   r&  s   `` r    _add_atomic_docstrr(    s/          2 r"   zcompare-and-swapT)r   c                    t          ||          }t          ||          }t          |          }t          |          }t          j        | |||||          S r,   )rN   ru  r   ri  )r  re  rf  rg  rh  r   s         r    ri  ri    sW     S(
#
#C
S(
#
#C
c
"
"C&&EwS#uhGGGr"   exchangec                    t          ||          }t          |          }t          |          }t          |          }t          j        | |||||          S r,   )rN   ru  r   rl  r  rf  r\  rg  rh  r   s         r    rl  rl    sU     S(
#
#C
c
"
"C&&Et$$DdCIIIr"   r  c                    t          ||          }t          |          }t          |          }t          |          }t          j        | |||||          S r,   )rN   ru  r   rn  r,  s         r    rn  rn    U     S(
#
#C
c
"
"C&&Et$$DwT3xHHHr"   r  c                    t          ||          }t          |          }t          |          }t          |          }t          j        | |||||          S r,   )rN   ru  r   rp  r,  s         r    rp  rp    r.  r"   r  c                    t          ||          }t          |          }t          |          }t          |          }t          j        | |||||          S r,   )rN   ru  r   rr  r,  s         r    rr  rr    r.  r"   zlogical andc                    t          ||          }t          |          }t          |          }t          |          }t          j        | |||||          S r,   )rN   ru  r   rt  r,  s         r    rt  rt    r.  r"   z
logical orc                    t          ||          }t          |          }t          |          }t          |          }t          j        | |||||          S r,   )rN   ru  r   rv  r,  s         r    rv  rv  	  sU     S(
#
#C
c
"
"C&&Et$$DgsD#uhGGGr"   zlogical xorc                    t          ||          }t          |          }t          |          }t          |          }t          j        | |||||          S r,   )rN   ru  r   rx  r,  s         r    rx  rx    r.  r"   c                    t          | |          } t          ||          }t          ||          }t          j        | |||          S )a  
    Returns a tensor of elements from either :code:`x` or :code:`y`, depending on :code:`condition`.

    Note that :code:`x` and :code:`y` are always evaluated regardless of the value of :code:`condition`.

    If you want to avoid unintended memory operations, use the :code:`mask` arguments in `triton.load` and `triton.store` instead.

    The shape of :code:`x` and :code:`y` are both broadcast to the shape of :code:`condition`.
    :code:`x` and :code:`y` must have the same data type.

    :param condition: When True (nonzero), yield x, otherwise yield y.
    :type condition: Block of triton.bool
    :param x: values selected at indices where condition is True.
    :param y: values selected at indices where condition is False.
    )rN   r   where)	conditionrF   yr   s       r    r5  r5  $  sG    " 9h//I1hA1hA>)Q8444r"   propagate_nanc                    t          | |          } t          ||          }t          | |          } t          ||          }t          |          }t          j        | |||          S )aH  
    Computes the element-wise minimum of :code:`x` and :code:`y`.

    :param x: the first input tensor
    :type x: Block
    :param y: the second input tensor
    :type y: Block
    :param propagate_nan: whether to propagate NaN values.
    :type propagate_nan: tl.PropagateNan

    .. seealso:: :class:`tl.PropagateNan`
    r  )rN   _promote_bfloat16_to_float32ru  r   minimumrF   r7  r8  r   s       r    r;  r;  @  j     	1hA1hA$Q:::A$Q:::A'66MAq-:::r"   c                    t          | |          } t          ||          }t          | |          } t          ||          }t          |          }t          j        | |||          S )aH  
    Computes the element-wise maximum of :code:`x` and :code:`y`.

    :param x: the first input tensor
    :type x: Block
    :param y: the second input tensor
    :type y: Block
    :param propagate_nan: whether to propagate NaN values.
    :type propagate_nan: tl.PropagateNan

    .. seealso:: :class:`tl.PropagateNan`
    r  )rN   r:  ru  r   maximumr<  s       r    r?  r?  V  r=  r"   c                   t          | |          } t          ||          }t          ||          }t          | |          } t          ||          }t          ||          }t          |          }t          j        | ||||          S )a<  
    Clamps the input tensor :code:`x` within the range [min, max].
    Behavior when :code:`min` > :code:`max` is undefined.

    :param x: the input tensor
    :type x: Block
    :param min: the lower bound for clamping
    :type min: Block
    :param max: the upper bound for clamping
    :type max: Block
    :param propagate_nan: whether to propagate NaN values. Applies only to the :code:`x` tensor.
        If either :code:`min` or :code:`max` is NaN, the result is undefined.
    :type propagate_nan: tl.PropagateNan

    .. seealso:: :class:`tl.PropagateNan`
    r  )rN   r:  ru  r   clamp)rF   r  r  r8  r   s        r    rA  rA  l  s    $ 	1hA
S(
#
#C
S(
#
#C$Q:::A
&sX
>
>
>C
&sX
>
>
>C'66M>!S#}h???r"   return_indices_argtie_break_argc                     d fd}|S )Nr#  r   r   c                t    d}|d d dz  }	|d dz  }|                               | _        | S )Na  
    Returns the {name} of all elements in the :code:`input` tensor along the provided :code:`axis`

    :param input: the input values
    :param axis: the dimension along which the reduction should be done
    :param keep_dims: if true, keep the reduced dimensions with length 1z
    :param z-: if true, return index corresponding to the z valuezR: if true, return the left-most indices in case of ties for values that aren't NaNr/   formatr6   )r#  r%  r0   rB  rC  s     r    r&  z)_add_reduction_docstr.<locals>._decorator  s    L ) \\ \MQ\ \ \ \F$ pp p p pF }}$}//r"   r'  r-   )r0   rB  rC  r&  s   ``` r    _add_reduction_docstrrH    s5           " r"   c              #  d   K   |                                  }d V  |                     |           d S r,   )get_insertion_pointrestore_insertion_point)rk   ips     r    _insertion_guardrM    s:      		$	$	&	&B	EEE##B'''''r"   c                    t           t                    rt           f|          d         S  fd}fdt                    t          |          }(t	          t           d         j                            t          j         |          }|r;t          fd|D                       }nt           fd|D                       }|S )a  Applies the combine_fn to all elements in :code:`input` tensors along the provided :code:`axis`

    :param input: the input tensor, or tuple of tensors
    :param axis: the dimension along which the reduction should be done. If None, reduce all dimensions
    :param combine_fn: a function to combine two groups of scalar tensors (must be marked with @triton.jit)
    :param keep_dims: if true, keep the reduced dimensions with length 1

    r  r   r(   r   c                   d D             }t          ||dz            }|                     d          }t          	          5  	fd|j        D             }	                    ||          fdt          |j                  D             }
                    |i           }t          |t                    r	|j	        g}nd |D             } 	j
        |  d d d            d S # 1 swxY w Y   d S )Nc                &    g | ]}|j         j        S r-   rj   r   rA  ts     r    rC  z7reduce.<locals>.make_combine_region.<locals>.<listcomp>      6661666r"   r   r   c                :    g | ]}|                               S r-   r`  rA  ra  r   s     r    rC  z7reduce.<locals>.make_combine_region.<locals>.<listcomp>  %    NNN"288H--NNNr"   c                \    g | ](\  }}t                              |          |          )S r-   r=   argrA  r  ra  blocks      r    rC  z7reduce.<locals>.make_combine_region.<locals>.<listcomp>  1    [[[BF599Q<<,,[[[r"   r   c                    g | ]	}|j         
S r-   r  rA  rs     r    rC  z7reduce.<locals>.make_combine_region.<locals>.<listcomp>      55518555r"   )rW  
get_regionrM  rZ  create_block_with_parentr7  call_JitFunctionrU   r=   r  create_reduce_ret)	reduce_opin_scalar_tys	prototyperegionrZ  r   resultshandlesr]  r   r(   r  r  s           @r    make_combine_regionz#reduce.<locals>.make_combine_region  s`   66666!-1BCC	%%a((h'' 		1 		1NNNN	8MNNNK55fkJJE[[[[)IDY:Z:Z[[[D 11*d21NNG'6** 6">*55W555&H&00		1 		1 		1 		1 		1 		1 		1 		1 		1 		1 		1 		1 		1 		1 		1 		1 		1 		1   BC))C-0C-c                Z    t          j        |          D ]}t          | d          } | S )Nr   r  )builtinsranger8  )rT  ndims_r   s      r    expand_ndimszreduce.<locals>.expand_ndims  s7    && 	5 	5AAq8444AAr"   Nc              3  <   K   | ]}t          |           V  dS )r  N)r8  )rA  rT  r   rY  s     r    r  zreduce.<locals>.<genexpr>  s2      MMAAth???MMMMMMr"   c              3  `   K   | ](} |t          d          j                            V  )dS )r   N)r4   r<  )rA  rT  rv  r  s     r    r  zreduce.<locals>.<genexpr>  s=      JJQE!HN(;(;<<JJJJJJr"   )
rU   r=   r  ru  r  r4   r<  r   	reductiontuple)	r  rY  r  r  r   r(   ro  r=  rv  s	   ``` ``  @r    r  r    s9    %   uuizYQYfpqqqrstt1 1 1 1 1 1 1 1     
 t$$D#I..I$E!HN 3 344

UD*=x
H
HC KMMMMMMMMMMCCJJJJJcJJJJJCJr"   c                h    | j         j        }|t          u r|                     t          |          S | S r  )rj   r   bfloat16rG  re   )rT  r   	scalar_tys      r    r:  r:    s3    I HttGht///Hr"   c                   t          |          }| j        |         }t          d||          }t          | j                  dk    r\d t	          j        t          | j                            D             }||= t          |||          }t          || j        |          }t          | |f|||||          \  }	}
|	|
fS )Nr   r  r   c                ,    g | ]}t          |          S r-   r  )rA  r  s     r    rC  z(_reduce_with_indices.<locals>.<listcomp>  s    QQQ1)A,,QQQr"   rO  )	ru  r<  r  r4   rr  rs  r8  rJ  r  )r  rY  r  r  r   r(   nindexaxes_to_expandrvaluerindicess              r    _reduce_with_indicesr    s    t$$DDA1a(+++E
5;!QQs5;?O?O0P0PQQQ4 E>HEEEUEK(CCCuendJ)^f)35 5 5FH8r"   c                     d fd}|S )Nr#  r   r   c                B    d}|                               | _        | S )Nz
    Returns the {name} of all elements in the :code:`input` tensor along the provided :code:`axis`

    :param input: the input values
    :param axis: the dimension along which the scan should be doner/   rF  )r#  r%  r0   s     r    r&  z$_add_scan_docstr.<locals>._decorator  s'    F
 }}$}//r"   r'  r-   )r0   r&  s   ` r    _add_scan_docstrr    s)          r"   c                    t           t                    rt           f||          d         S  fd}t          |          }|(t	          |t           d         j                            }t          j         |||          S )a  Applies the combine_fn to each elements with a carry in :code:`input` tensors along the provided :code:`axis` and update the carry

    :param input: the input tensor, or tuple of tensors
    :param axis: the dimension along which the reduction should be done
    :param combine_fn: a function to combine two groups of scalar tensors (must be marked with @triton.jit)
    :param reverse: apply the associative scan in the reverse direction along axis.

    r  r   c                   d D             }t          ||dz            }|                     d          }t          	          5  	fd|j        D             }	                    ||          fdt          |j                  D             }
                    |i           }t          |t                    r	|j	        g}nd |D             } 	j
        |  d d d            d S # 1 swxY w Y   d S )Nc                &    g | ]}|j         j        S r-   rR  rS  s     r    rC  zAassociative_scan.<locals>.make_combine_region.<locals>.<listcomp>  rU  r"   r   r   c                :    g | ]}|                               S r-   r`  rW  s     r    rC  zAassociative_scan.<locals>.make_combine_region.<locals>.<listcomp>  rX  r"   c                \    g | ](\  }}t                              |          |          )S r-   rZ  r\  s      r    rC  zAassociative_scan.<locals>.make_combine_region.<locals>.<listcomp>!  r^  r"   r_  c                    g | ]	}|j         
S r-   ra  rb  s     r    rC  zAassociative_scan.<locals>.make_combine_region.<locals>.<listcomp>&  rd  r"   )rW  re  rM  rZ  rf  r7  rg  rU   r=   r  create_scan_ret)scan_oprj  rk  rl  rZ  r   rm  rn  r]  r   r(   r  r  s           @r    ro  z-associative_scan.<locals>.make_combine_region  s`   66666!-1BCC	##A&&h'' 		/ 		/NNNN	8MNNNK55fkJJE[[[[)IDY:Z:Z[[[D 11*d21NNG'6** 6">*55W555$H$g..		/ 		/ 		/ 		/ 		/ 		/ 		/ 		/ 		/ 		/ 		/ 		/ 		/ 		/ 		/ 		/ 		/ 		/rp  )rU   r=   r  ru  r  r4   r<  r   )r  rY  r  r  r   r(   ro  s   ` ` `` r    r  r    s     %   s	4Wxdnooopqrr/ / / / / / / /  t$$D$E!HN 3 344$UD2EwPXYYYr"   c                L    t          |          }t          j        | ||          S )zcomputes an histogram based on input tensor with num_bins bins, the bins have a width of 1 and start at 0.

    :param input: the input tensor
    :param num_bins: number of histogram bins

    )ru  r   r  )r  r  r   r(   s       r    r  r  /  s&     #8,,HeXx888r"   c                *    t          j        |           S )zA
    Insert a barrier to synchronize all threads in a block.
    )r   debug_barrierr  s    r    r  r  A  s    
 !(+++r"   c           	     r   t          |t                    r|g}t          |          D ]o\  }}t          |t                    st          d| d          t          |j        t
                    s(t          d| dt          |j                   d          pd |D             }t          j        | |          S )zd
    Let the compiler know that the values in :code:`input` are all multiples of :code:`value`.
    values element r  r  rp  c                    g | ]	}|j         
S r-   r?  r  s     r    rC  zmultiple_of.<locals>.<listcomp>U      &&&!ag&&&r"   )	rU   rh   r7  rE   ri   rX   rj   r   multiple_ofr  r9   r   r  r  s        r    r  r  I  s    
 &)$$ &!! t t1!Y'' 	NLaLLLMMM!'3'' 	trarrbfghgnboborrrsss	t&&v&&&Fv...r"   c           	     r   t          |t                    r|g}t          |          D ]o\  }}t          |t                    st          d| d          t          |j        t
                    s(t          d| dt          |j                   d          pd |D             }t          j        | |          S )z^
    Let the compiler know that the `value` first values in :code:`input` are contiguous.
    r  r  r  rp  c                    g | ]	}|j         
S r-   r?  r  s     r    rC  z"max_contiguous.<locals>.<listcomp>e  r  r"   )	rU   rh   r7  rE   ri   rX   rj   r   max_contiguousr  s        r    r  r  Y  s    
 &)$$ &!! t t1!Y'' 	NLaLLLMMM!'3'' 	trarrbfghgnboborrrsss	t&&v&&&F"5&111r"   c           	     r   t          |t                    r|g}t          |          D ]o\  }}t          |t                    st          d| d          t          |j        t
                    s(t          d| dt          |j                   d          pd |D             }t          j        | |          S )z
    Let the compiler know that the `value` first values in :code:`input` are constant.

    e.g. if :code:`values` is [4], then each group of 4 values in :code:`input` should all be equal,
    for example [0, 0, 0, 0, 1, 1, 1, 1].
    r  r  r  rp  c                    g | ]	}|j         
S r-   r?  r  s     r    rC  z!max_constancy.<locals>.<listcomp>x  r  r"   )	rU   rh   r7  rE   ri   rX   rj   r   max_constancyr  s        r    r  r  i  s     &)$$ &!! t t1!Y'' 	NLaLLLMMM!'3'' 	trarrbfghgnboborrrsss	t&&v&&&F!%000r"    
)sepr  fileflushr   r  r  c                    dS )a{  
    Print the values at compile time.  The parameters are the same as the builtin :code:`print`.

    NOTE: Calling the Python builtin :code:`print` is not the same as calling this, it instead maps to :code:`device_print`,
    which has special requirements for the arguments.

    .. highlight:: python
    .. code-block:: python

        tl.static_print(f"{BLOCK_SIZE=}")
    Nr-   )r  r  r  r  r   r9   s         r    static_printr    s	     	Dr"   c                    dS )z
    Assert the condition at compile time.  Does not require that the :code:`TRITON_DEBUG` environment variable
    is set.

    .. highlight:: python
    .. code-block:: python

        tl.static_assert(BLOCK_SIZE == 1024)
    Nr-   )condmsgr   s      r    static_assertr    s	     	Dr"   )hexr   c               4   ddl }t          |           } t          | t                    sJ |  d            d}| D ]}||j        vrd} n|sJ |  d            g }|D ]%}|                    t          ||                     &t          j        | |||          S )a  
    Print the values at runtime from the device.  String formatting does not work for runtime values, so you should
    provide the values you want to print as arguments.  The first value must be a string, all following values must
    be scalars or tensors.

    Calling the Python builtin :code:`print` is the same as calling this function, and the requirements for the arguments will match
    this function (not the normal requirements for :code:`print`).

    .. highlight:: python
    .. code-block:: python

        tl.device_print("pid", pid)
        print("pid", pid)

    On CUDA, printfs are streamed through a buffer of limited size (on one host,
    we measured the default as 6912 KiB, but this may not be consistent across
    GPUs and CUDA versions).  If you notice some printfs are being dropped, you
    can increase the buffer size by calling

    .. highlight:: python
    .. code-block:: python

        triton.runtime.driver.active.utils.set_printf_fifo_size(size_bytes)

    CUDA may raise an error if you try to change this value after running a
    kernel that uses printfs.  The value set here may only affect the current
    device (so if you have multiple GPUs, you'd need to call it multiple times).

    :param prefix: a prefix to print before the values. This is required to be a string literal.
    :param args: the values to print. They can be any tensor or scalar.
    :param hex: print all values as hex instead of decimal
    r   Nz is not stringTFz is not an ascii string)	stringru  rU   r  	printableappendrN   r   device_print)	prefixr  r   r   r  b_asciichnew_argsr[  s	            r    r  r    s    D MMM ((Ffc""==v$=$=$====G  V%%%GE & 66v666666H 3 3
3112222 3AAAr"   c                   t          |          }ddl} |j                    } |j        |          }t	          |d          r'|j        } |j        |          }t	          |d          'd}d}d}|0|j        )|j        j        }|j        j        j        }|j        j	        }t          j        t          | |          |||||          S )a(  
    Assert the condition at runtime from the device.  Requires that the environment variable :code:`TRITON_DEBUG`
    is set to a value besides :code:`0` in order for this to have any effect.

    Using the Python :code:`assert` statement is the same as calling this function, except that the second argument
    must be provided and must be a string, e.g. :code:`assert pid == 0, "pid != 0"`.  The environment variable must
    be set for this :code:`assert` statement to have any effect.

    .. highlight:: python
    .. code-block:: python

        tl.device_assert(pid == 0)
        assert pid == 0, f"pid != 0"

    :param cond: the condition to assert. This is required to be a boolean tensor.
    :param msg: the message to print if the assertion fails. This is required to be a string literal.
    r   Nr7   unknown)ru  r2   currentframe	getmoduler   f_backf_codeco_nameco_filenamef_linenor   device_assertrN   )	r  r  r   r2   framemodulelineno	func_name	file_names	            r    r  r    s    & c
"
"CNNN G ""EWu%%F &*
%
% *""5)) &*
%
% * FIIU\5L(	L'3	 &!*T8"<"<c9iY_aijjjr"   asmconstraintsr   r   Union[dtype, Sequence[dtype]]is_purepackc                4   t          |           } t          |          }t          |          }t          |          }	 t          |           d}n# t          $ r d}|f}Y nw xY wt          j        t
          t                   |          }|}fd|D             x}	rvt          t          j	        ddd          }
|	d         |	D ]} |
|          \  }j
        r5t          |	          D ]\  }} |
|          \  |	|<   }fd|D             }d |	D             }                    | ||fd|D             ||          |s)t                              d          |d                   S t          fd	t          |          D                       S )
a  
        Execute inline assembly over a tensor.  Essentially, this is :code:`map`
        where the function is inline assembly.

        The input tensors :code:`args` are implicitly broadcasted to the same shape.

        :code:`dtype` can be a tuple of types, in which case the output is a
        tuple of tensors.

        Each invocation of the inline asm processes :code:`pack` elements at a
        time.  Exactly which set of inputs a block receives is unspecified.
        Input elements of size less than 4 bytes are packed into 4-byte
        registers.

        This op does not support empty :code:`dtype` -- the inline asm must
        return at least one tensor, even if you don't need it.  You can work
        around this by returning a dummy tensor of arbitrary type; it shouldn't
        cost you anything if you don't use it.

        Example using
        [PTX](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html)
        assembly:

        .. highlight:: python
        .. code-block:: python

        @triton.jit
        def kernel(A, B, C, D, BLOCK: tl.constexpr):
            a = tl.load(A + tl.arange(0, BLOCK)) # uint8 tensor
            b = tl.load(B + tl.arange(0, BLOCK)) # float32 tensor

            # For each (a,b) in zip(a,b), perform the following:
            # - Let ai be `a` converted to int32.
            # - Let af be `a` converted to float.
            # - Let m be the max of ai and b.
            # - Return ai and mi.
            # Do the above 4 elements at a time.
            (c, d) = tl.inline_asm_elementwise(
                asm="""
                {
                    // Unpack `a` into `ai`.
                    .reg .b8 tmp<4>;
                    mov.b32 {tmp0, tmp1, tmp2, tmp3}, $8;
                    cvt.u32.u8 $0, tmp0;
                    cvt.u32.u8 $1, tmp1;
                    cvt.u32.u8 $2, tmp2;
                    cvt.u32.u8 $3, tmp3;
                }
                // Convert `ai` to float.
                cvt.rn.f32.s32 $4, $0;
                cvt.rn.f32.s32 $5, $1;
                cvt.rn.f32.s32 $6, $2;
                cvt.rn.f32.s32 $7, $3;
                // Take max of `ai` and `b`.
                max.f32 $4, $4, $9;
                max.f32 $5, $5, $10;
                max.f32 $6, $6, $11;
                max.f32 $7, $7, $12;
                """,
                constraints=(
                    # 8 output registers, namely
                    #   $0=ai0, $1=ai1, $2=ai2, $3=ai3,
                    #   $4=m0,  $5=m1,  $6=m2,  $7=m3.
                    "=r,=r,=r,=r,=r,=r,=r,=r,"
                    # 5 input registers, namely
                    #   $8=ai,
                    #   $9=b0, $10=b1, $11=b2, $12=b3.
                    # The four elements from `a` are all packed into one register.
                    "r,r,r,r,r"),
                args=[a, b],
                dtype=(tl.int32, tl.float32),
                is_pure=True,
                pack=4,
            )
            tl.store(C + tl.arange(0, BLOCK), c)
            tl.store(D + tl.arange(0, BLOCK), d)

        :param asm: assembly to run.  Must match target's assembly format.
        :param constraints: asm constraints in
            [LLVM format](https://llvm.org/docs/LangRef.html#inline-asm-constraint-string)
        :param args: the input tensors, whose values are passed to the asm block
        :param dtype: the element type(s) of the returned tensor(s)
        :param is_pure: if true, the compiler assumes the asm block has no side-effects
        :param pack: the number of elements to be processed by one instance of inline assembly
        :param _builder: the builder
        :return: one tensor or a tuple of tensors of the given dtypes
    TFc                0    g | ]}t          |          S r-   rM   )rA  r[  r   s     r    rC  z*inline_asm_elementwise.<locals>.<listcomp>c	  s#    CCCsC22CCCr"   )rk   arithmetic_checkallow_lhs_ptrallow_rhs_ptrr   c                :    g | ]}t          |j                  S r-   )r;  r<  )rA  dtbroadcast_args     r    rC  z*inline_asm_elementwise.<locals>.<listcomp>s	  s&    KKKrz"m&9::KKKr"   c                    g | ]	}|j         
S r-   ra  rS  s     r    rC  z*inline_asm_elementwise.<locals>.<listcomp>t	  s    ///Aqx///r"   c                :    g | ]}|                               S r-   r`  rW  s     r    rC  z*inline_asm_elementwise.<locals>.<listcomp>u	  s&    AgAgAgY["((8BTBTAgAgAgr"   c              3  d   K   | ]*\  }}t                              |          |          V  +d S r,   )r=   
get_result)rA  r  ra  calls      r    r  z)inline_asm_elementwise.<locals>.<genexpr>y	  s=      PPEAr**B//PPPPPPr"   )ru  rD   rE   r  rF  r   _DtypeClassr   r   binary_op_type_checking_implr<  r7  create_inline_asmr=   r  rz  )r  r  r   rp   r  r  r   has_multiple_outputsres_tysdispatch_argsbin_op_type_checkingitemru  r  rn  r  r  s         `        @@r    inline_asm_elementwiser    s   t c
"
"C%k22Kt$$D!'**GU#   $	 K-u55EGCCCCdCCCC} L&1" 
  
  
 &a(! 	I 	ID33D-HHA}} 	L$]33 P P4&:&:4&O&O#a !!KKKKUKKKG/////G%%c;AgAgAgAg_fAgAgAgiprvwwD 6dooa(('!*555PPPPYw=O=OPPPPPPs   A A%$A%c                  &    e Zd ZdZddZd Zd ZdS )static_rangea  
    Iterator that counts upward forever.

    .. highlight:: python
    .. code-block:: python

        @triton.jit
        def kernel(...):
            for i in tl.static_range(10):
                ...
    :note: This is a special iterator used to implement similar semantics to Python's :code:`range` in the context of
        :code:`triton.jit` functions. In addition, it also guides the compiler to unroll the loop aggressively.
    :param arg1: the start value.
    :param arg2: the end value.
    :param step: the step value.
    Nc                &   t          |t                    sJ |t          d          | _        nt          |t                    sJ || _        |t          d          | _        || _        d S t          |t                    sJ || _        || _        d S Nr   r   )rU   rh   r;  r9  r  )r.   arg1arg2r;  s       r    r   zstatic_range.__init__	  s    $	*****<!!DIIdI.....DI<"1DJDHHHdI.....DJDHHHr"   c                     t          d          Nz8static_range can only be used in @triton.jit'd functionsr   r   s    r    r  zstatic_range.__iter__	      UVVVr"   c                     t          d          r  r   r   s    r    __next__zstatic_range.__next__	  r  r"   r  r7   r   r   r6   r   r  r  r-   r"   r    r  r  	  sX         "   W W WW W W W Wr"   r  c                  &    e Zd ZdZddZd Zd ZdS )rs  a  
    Iterator that counts upward forever.

    .. highlight:: python
    .. code-block:: python

        @triton.jit
        def kernel(...):
            for i in tl.range(10, num_stages=3):
                ...
    :note: This is a special iterator used to implement similar semantics to Python's :code:`range` in the context of
        :code:`triton.jit` functions. In addition, it allows user to pass extra attributes to the compiler.
    :param arg1: the start value.
    :param arg2: the end value.
    :param step: the step value.
    :param num_stages: pipeline the loop into this many stages (so there are
        :code:`num_stages` iterations of the loop in flight at once).

        Note this is subtly different than passing :code:`num_stages` as a
        kernel argument.  The kernel argument only pipelines loads that feed
        into :code:`dot` operations, while this attribute tries to pipeline most
        (though not all) loads in this loop.
    Nc                    |t          d          | _        n|| _        |t          d          | _        || _        n|| _        || _        || _        d S r  )rh   r;  r9  r  
num_stages)r.   r  r  r;  r  s        r    r   zrange.__init__	  sQ    <!!DIIDI<"1DJDHHDJDH$r"   c                     t          d          Nz4tl.range can only be used in @triton.jit'd functionsr   r   s    r    r  zrange.__iter__	      QRRRr"   c                     t          d          r  r   r   s    r    r  zrange.__next__	  r  r"   r  r  r-   r"   r    rs  rs  	  sX         0% % % %S S SS S S S Sr"   rs  lib_namelib_pathr8   arg_type_symbol_dictdict	ret_shaperz  c                ^   t          |          dk    rt          d          t          t          |                                          d                   }t          |          |k    r"t          dt          |           d|           g }	g }
|D ]}t	          |t
                    r5|	                    |j                   |
                    |j                   L|	                    t          |                     |
                    |           t          |	          }	|	|vr't          d|                                 d|	           ||	         d         }||	         d         }|rt          ||          }t           | ||||
|                    |          |          |          S )a  
        Dispatch a function to a library
        :param func: the function to dispatch
        :param lib_name: the name of the library
        :param lib_path: the path of the library
        :param args: the arguments of the function
        :param arg_type_symbol_dict: the type of the arguments
        :param ret_shape: the shape of the return value
        :param _builder: the builder
        :return: the return value of the function
    r   zarg_type_symbol_dict is emptyz+length of input args does not match.Expect z, got z,input arg type does not match.Expect one of r   )r4   r   r8   r5   rU   r=   r  rp   r  rj   rz  r;  r	  )r#  r  r  r   r  r  r  r   num_args	arg_typesarg_listr[  symbolrc  s                 r    dispatchr  	  s      A%%89994,113344Q788H
4yyH ?#&t99? ?4<? ? @ @ 	@ IH ! !c6"" 	!SY'''OOCJ''''T#YY'''OOC    i  I,,, Y*>*C*C*E*EY YMVY Y Z Z 	Z &i03'	215 	7!(I66Hdd8XvxPXAYAY[bccemnnnr"   c           
        |                                 }d}d}g }	t          j        t          |                    D ]\}
t	          ||
         |          ||
<   |	                    ||
         j                   ||
         j                                        rd}]t          |	          dk    rt          |	          }	d}|	|v rd}|d         }|D ]}t          j        ||||          \  }}t          j        t          |                    D ]&}
t          j        ||
         |||          \  ||
<   }'|s|j        }|j        }t          || ||||||          S )a  
        Dispatch an elementwise function to a library
        :param lib_name: the name of the library
        :param lib_path: the path of the library
        :param args: the arguments of the function
        :param arg_type_symbol_dict: the type of the arguments
        :param is_pure: whether the function is pure
        :param _builder: the builder
        :return: the return value of the function
    TNFr   )r  )copyrr  rs  r4   rN   r  rp   rj   r   rz  r   r  r<  create_extern_elementwiser  )r  r  r   r  r  r   r  
all_scalarr  r  r  r  r  r  ru  r#  s                   r    extern_elementwiser  
  s    IIKKMJII^C..//  %mA&6AAaq)/000 ))++ 	J
9~~)$$	,,,$%a(! 	h 	hD'DT=ZbVf h  h  hA}} M 2 233 	k 	kA"*"GVWHXZgiqYi#k #k #kM!aa 	,%+I-DD(Hm=QS\^egopppr"   c                .    t          j        | ||          S )z
        Convert both operands to a single common type
        :param lhs: the left operand
        :param rhs: the right operand
        :param builder: the builder
    )r   r  )lhsrhsrk   s      r    binary_op_type_legalizationr  1
  s     0c7CCCr"   c                     t          |           S )z#A decorator for external functions.)r&   rK   s    r    externr  ;
  s    2;;r"   )r   r   r   r   r8  r,   )rf  rX   rg  rH   r   rp   )r  r=   )FNr  r  r  r  )NNr-   r)   r)   r)   FN)Nr-   r)   r)   N)r  r=   r  )r0   r  r   rH   r   r!  r  )NNNN)r8  rh   )r0   r  rB  r  rC  r  r   r!  )FNN)r0   r  r   r!  )r  r  r  r  )r)   N)r  r  r  r  r   r   rp   r  r  rH   r  rX   )r  r  r  r  r   r8   r  r  r  rz  r  rH   )
r  r  r  r  r   r8   r  r  r  rH   )
__future__r   warningsr   
contextlibr   enumr   	functoolsr   r   r  r	   r
   r   r   r   r   rr  runtime.jitr   r2   r  _C.libtritonr   r)   r   r   rH  r%   PROPAGATE_NANPropagateNanr&   rB   rG   r<   rO   rN   rp   r  r  r0  r;  rW  r~   rW   rq   rr   rZ   r^   rs   rt   r\   r`   float8e5float8e5b16
float8e4nv
float8e4b8float8e4b15float16r|  re   rg   pi32_tri  rk  rh   r  r  r=   r  ru  r  r  r  r  r  r  rJ  rO  rQ  r  r  r  r   rU  rW  r  r8  rF  r  r  r  r  r`  r  rc  r(  ri  rl  rn  rp  rr  rt  rv  rx  r5  NONEr;  r?  rA  rH  rM  r  r:  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rs  r  r  r  r  r-   r"   r    <module>r     s   " " " " " "       % % % % % %       $ $ $ $ $ $ $ $  E E E E E E E E E E E E E E E E         				            GCLL! %    ) ) ) )X  ,. . . .
 	# # # 	#D D DDc8 c8 c8 c8 c8 c8 c8 c8R          5      Fa a a a a a a a"0 0 0 0 0 0 0 0fB B B B BE B B B  uV}}uV}}uV}}gggg	x	x	x5>>eJU9
U9
eJ
%--5==
%--
%--	e		U U U U8	 	 	 	 	 	 	 	B) B) B) B) B) B) B) B)J ill  Z Z Z Z Z Z Z Zz0 0 0   	/ / / 	/$ 	1 1 1 	1  	1 1 1 	1"3 3 3 	8 8 8 	8* 		A 	A 	A 		A )- A A A A 	 A& )- 3 3 3 3 3 	 32 #' 3 3 3 3 	 3, 	= = = 	= 	) ) ) 	).        	 B !% N N N N 	 N( ',t A A A A 	 A&. . .    	 8 G G G G 	 G4 	]amth h h 	hJ 	rt"&7- 7- 7- 	7-t 	S S S 	S 	M M M 	M ,k ,k ,k 	 ,k^ 	` ` ` ` 	` 5 5 5 	 5    < &555H H H 65 	 H JJ J J   	 J EI I I  	 I EI I I  	 I EI I I  	 I M""I I I #" 	 I L!!H H H "! 	 H M""I I I #" 	 I 	5 5 5 	56 	-9-> ; ; ; ; 	;* 	-9-> ; ; ; ; 	;* 	2>2Cd @ @ @ @ 	@D    , ( ( ( + + + 	 +\ 	   	 	   	,    Z Z Z 	 ZD 9 9 9 	 9  	, , , 	, 	/ / / 	/ 	2 2 2 	2 	1 1 1 	1. 	%(TE\` 	 	 	 	 	 		 	
	 
	 
	 	
	 	$)D -B -B -B -B 	-B` 	%k %k %k 	%kP 	>BQ Q Q Q 	QN%W %W %W %W %W %W %W %WP*S *S *S *S *S *S *S *Sf &*(o (o (o (o (oV 	 $'q 'q 'q 'q 	'qTD D D    r"   