
    çg                   L   d dl mZ d dlmZmZmZmZmZ ddlm	Z	 ddl
mZ ddl
mZ  ed          Z G d	 d
e          ZddZddZddZddZddZ	 	 	 ddd$Zdd'Zdd(Zdd)Zdd*Zdd+Zdd-Zdd.Zdd3Zdd4Zdd7Z dd8Z!dd9Z"dd:Z#dd;Z$dd<Z%dd=Z&dd>Z'dd?Z(dd@Z)ddAZ*ddBZ+ddCZ,ddDZ-ddGZ.ddHZ/ddIZ0ddJZ1ddKZ2ddLZ3ddMZ4ddPZ5ddTZ6ddVZ7ddYZ8ddZZ9dd[Z:dd^Z;dd_Z<ddbZ=ddcZ>dddZ?ddgZ@ddiZA	 dddlZBdm ZCdn ZDdo ZEdp ZFdq ZGdr ZHds ZIdt ZJdu ZKddZLddZMddZNd ZOd ZPddZQddZRddZSddZTddZUddZVddZWddZXddZYddZZd Z[ddZ\ddZ]d Z^ddZ_ddZ`ddZaddZbddZcddZdddZeddZfddZgd ZhddZiddZjddZkdjS )    )annotations)ListOptionalSequenceTupleTypeVar   )ir   )core)mathTc                       e Zd Z fdZ xZS )IncompatibleTypeErrorImplc                    || _         || _        d| j                                         z   dz   | j                                        z   | _        t	          t
          |                               | j                   d S )Nzinvalid operands of type  and )type_atype_b__repr__messagesuperr   __init__)selfr   r   	__class__s      T/var/www/html/ai-engine/env/lib/python3.11/site-packages/triton/language/semantic.pyr   z"IncompatibleTypeErrorImpl.__init__   sl    2T[5I5I5K5KKgUX\XcXlXlXnXnn'..77EEEEE    )__name__
__module____qualname__r   __classcell__)r   s   @r   r   r      sA        F F F F F F F F Fr   r   axisintbuilder
ir.builderreturn	tl.tensorc                    | dvrt          d|            t          j        |                    |           t          j                  S )Nr   r   r	   z+program_id axis must be 0, 1, or 2 but got )
ValueErrortltensorcreate_get_program_idint32r!   r#   s     r   
program_idr/      sF    9MtMMNNN9W22488"(CCCr   c                    | dvrt          d|            t          j        |                    |           t          j                  S )Nr(   z-num_programs axis must be 0, 1, or 2 but got )r)   r*   r+   create_get_num_programsr-   r.   s     r   num_programsr2       sF    9OOOPPP9W44T::BHEEEr   a_tytl.dtypeb_tyc                   | j         }|j         }| j        }|j        }||k    r
||k    r| n|S |t          j        j        j        k    r
||k    r| n|S |t          j        j        j        k    r
||k    r|n| S t          d| d|           )Nzunexpected signedness r   )int_bitwidthint_signednessr*   dtype
SIGNEDNESSUNSIGNED	TypeError)r3   r5   a_rankb_ranka_snb_sns         r   integer_promote_implrA   +   s    FFDD t||ttD0	$-	-	-''ttT1	$-	-	-''ttT1
>T>>>>
?
??r   
div_or_modboolc                ~   |                                  s|                                 rt          j        S |                                 s|                                rt          j        S |                                 s|                                r|rt          j        S t          j        S |                                 s|                                rN|rt          j        S |                                 r |                                rt          j        S t          j        S | 	                                r|	                                st          d|  d|           |rO| j        |j        k    r?t          d|                                 z   dz   |                                z   dz             t          | |          S )Nunexpected type r   zCannot use /, #, or % with x because they have different signedness;this is unlikely to result in a useful answer. Cast them to the same signedness.)is_fp64r*   float64is_fp32float32is_fp16float16is_bf16bfloat16is_intr<   r8   r   rA   )r3   r5   rB   s      r   computation_type_implrP   ;   s    ||~~  z ||~~  z ||~~   	::||~~   	:<<>> 	dllnn 	;z;;== > ><4<<d<<===  ld)T-@@@5G'QTXTaTaTcTcckk l l 	l  d+++r   r   r   allow_ptr_aNonec                    |                                  r`|st          | |          |                                 r| |k    rt          | |          |                                rt          | |          d S d S N)is_ptrr   is_floating)r   r   rQ   s      r   check_ptr_type_implrW   c   s    }} < 	<+FF;;;==?? 	<& 0 0+FF;;; 	<+FF;;;< <	< 	<r   FTlhsrhsTuple[tl.tensor, tl.tensor]c                `   t          | ||          \  } }| j        j        }|j        j        }t          |||           t          |||           |r[|                                sG|                                s3t          |||          }	t          | |	|          } t          ||	|          }| |fS rT   )broadcast_impl_valuetypescalarrW   rU   rP   cast)
rX   rY   r#   allow_lhs_ptrallow_rhs_ptrarithmetic_checkrB   
lhs_sca_ty
rhs_sca_ty
ret_sca_tys
             r   binary_op_type_checking_implrf   o   s     $Cg66HCJJ
J>>>
J>>> -
 1 1 3 3 -J<M<M<O<O -*:z:NN
3
G,,3
G,,8Or   inputotherc                @   t          | ||dd          \  } }| j        j        }|j        j        }|                                r#|                                rt	          d          |                                r0|                                s|| }} | j        j        }|j        j        }|                                r8t          j        |                    | j        |j                  | j                  S |	                                r8t          j        |
                    | j        |j                  | j                  S |                                r8t          j        |                    | j        |j                  | j                  S t	          d|           )NTzcannot add pointers togetherrE   )rf   r]   r^   rU   r<   r*   r+   create_addptrhandlerV   create_faddrO   
create_addrg   rh   r#   input_scalar_tyother_scalar_tys        r   addrq      sy   /ugtTRRLE5j'Oj'O 8O$:$:$<$< 86777  ,(>(>(@(@ ,eu*+*+ Uy..u|U\JJEJWWW		$	$	&	& Uy,,U\5<HH%*UUU				!	! Uy++EL%,GGTTT
888
9
99r   c           	     N   t          | ||dd          \  } }| j        j        }|                                rFt	          j        |                    | j        t          ||          j                  | j                  S |	                                r8t	          j        |
                    | j        |j                  | j                  S |                                r8t	          j        |                    | j        |j                  | j                  S t          d|           )NTFrE   )rf   r]   r^   rU   r*   r+   rj   rk   minusrV   create_fsubrO   
create_subr<   rg   rh   r#   	scalar_tys       r   subrx      s    /ugtUSSLE5
!I hy..u|U5'=R=R=YZZ\a\fggg Uy,,U\5<HH%*UUU					 Uy++EL%,GGTTT
2y22
3
33r   c                   t          | ||          \  } }| j        j        }|                                r8t	          j        |                    | j        |j                  | j                  S |                                r8t	          j        |	                    | j        |j                  | j                  S t          d|           NrE   )rf   r]   r^   rV   r*   r+   create_fmulrk   rO   
create_mulr<   rv   s       r   mulr}      s    /ugFFLE5
!I Uy,,U\5<HH%*UUU					 Uy++EL%,GGTTT
2y22
3
33r   c           	     V   t          | ||dddd          \  } }| j        j        }|j        j        }|                                r'|                                rt          |||          }n|                                r&|                                rt          | ||          } n|                                rK|                                r7t          | t          j        |          } t          |t          j        |          }nn|                                rH|                                r4|j        |j        k    rt          |||          }n$t          | ||          } nt          d|           t          j
        |                    | j        |j                  | j                  S NFTrE   )rf   r]   r^   rV   rO   r_   r*   rJ   fp_mantissa_widthr<   r+   create_fdivrk   rn   s        r   truedivr      s   /ugueUY[_``LE5j'Oj'O""$$ >)?)?)A)A >UOW55				!	! >o&A&A&C&C >UOW55				!	! >o&<&<&>&> >UBJ00UBJ00		$	$	&	& >?+F+F+H+H >,/PPP99EE99EE <?<<===9W((u|DDejQQQr   c           	     B   t          | ||dddd          \  } }| j        j        }|j        j        }|                                r|                                rt	          ||          }t          | ||          } t          |||          }|                                r8t          j        |	                    | j
        |j
                  | j                  S t          j        |                    | j
        |j
                  | j                  S t          d|           r   )rf   r]   r^   rO   rA   r_   is_int_signedr*   r+   create_sdivrk   create_udivr<   )rg   rh   r#   ro   rp   ret_tys         r   floordivr      s   /ugueUY[_``LE5j'Oj'O ZO$:$:$<$< Z%oGGUFG,,UFG,,!! 	Z9W00u|LLejYYY9W00u|LLejYYY
888
9
99r   ieee_roundingc           	     D   | j         j        }|j         j        }|                                r|                                st          d          t	          | ||dddd          \  } }|                    | j        |j                  }t          j        || j                   S )Nz4both operands of fdiv must have floating scalar typeFT)	r]   r^   rV   r<   rf   r   rk   r*   r+   )rg   rh   r   r#   ro   rp   rets          r   fdivr      s    j'Oj'O&&(( P0K0K0M0M PNOOO/ugueUZ\`aaLE5


elEL
9
9C9S%*%%%r   c                   t          | ||dddd          \  } }| j        j        }|j        j        }|                                rFt	          | t          t          j        t          | |d|          |          ||          |          }|S |	                                r|j
        |j
        k    r?t          d|                                z   dz   |                                z   dz             |                                r8t          j        |                    | j        |j                  | j                  S t          j        |                    | j        |j                  | j                  S t          d|           )NFT_builderzCannot mod z by rF   rE   )rf   r]   r^   rV   rx   r}   r   floorr   rO   r8   r<   r   r   r*   r+   create_sremrk   create_urem)rg   rh   r#   rw   rp   r   s         r   modr      s|   /ugueUY[_``LE5
!Ij'O Z%TZUE5'(J(JU\]]]_dfmnnpwxx
					 Z#'EEEMI,>,>,@,@@6IOLdLdLfLff jo o p p p ""$$ 	Z9W00u|LLejYYY9W00u|LLejYYY
2y22
3
33r   xypropagate_nantl.PropagateNanc                   t          | ||          \  } }| j        }|                                r|t          j        j        k    r8t          j        |                    | j        |j                  | j	                  S |t          j        j
        k    r8t          j        |                    | j        |j                  | j	                  S t          d|           |                                r8t          j        |                    | j        |j                  | j	                  S |                                r8t          j        |                    | j        |j                  | j	                  S t#          d|           NzUnexpected propagate_nan Unexpected dtype )rf   r9   rV   r*   PropagateNanALLr+   create_minimumfrk   r]   NONEcreate_minnumfr)   r   create_minsiis_int_unsignedcreate_minuir<   r   r   r   r#   r9   s        r   minimumr     H   '1g66DAqGE 5BO///9W44QXqxHH!&QQQbo2229W33AHahGGPPPHHHIII					 5y--ahAA16JJJ				 	  5y--ahAA16JJJ3E33444r   c                   t          | ||          \  } }| j        }|                                r|t          j        j        k    r8t          j        |                    | j        |j                  | j	                  S |t          j        j
        k    r8t          j        |                    | j        |j                  | j	                  S t          d|           |                                r8t          j        |                    | j        |j                  | j	                  S |                                r8t          j        |                    | j        |j                  | j	                  S t#          d|           r   )rf   r9   rV   r*   r   r   r+   create_maximumfrk   r]   r   create_maxnumfr)   r   create_maxsir   create_maxuir<   r   s        r   maximumr     r   r   minmaxc                T   t          |||          \  }}t          | ||          \  } }t          | ||          \  } }| j        }|                                r?t          j        |                    | j        |j        |j        |          | j                  S t          d| d          )Nr   z(. Only floating point clamp is supported)	rf   r9   rV   r*   r+   create_clampfrk   r]   r<   )r   r   r   r   r#   r9   s         r   clampr   %  s    +Cg>>HC)!S'::FAs)!S'::FAsGE ]y..qxSZQ^__abaghhh[E[[[\\\r   c                T   t          | ||ddd          \  } }| j        j        }|j        j        }|                                r|                                st	          ||          t          ||          }||k    rt          | ||          } ||k    rt          |||          }| |fS )NF)rf   r]   r^   rO   r   rA   r_   )rg   rh   r#   input_sca_tyother_sca_tyre   s         r   bitwise_op_type_checking_implr   6  s    /ugueUZ[[LE5:$L:$L   D(;(;(=(= D'lCCC%lLAAJ\!!UJ00\!!UJ00%<r   c                    t          | ||          \  } }t          j        |                    | j        |j                  | j                  S rT   )r   r*   r+   
create_andrk   r]   rg   rh   r#   s      r   and_r   E  ?    0wGGLE59W''elCCUZPPPr   c                    t          | ||          \  } }t          j        |                    | j        |j                  | j                  S rT   )r   r*   r+   	create_orrk   r]   r   s      r   or_r   J  s?    0wGGLE59W&&u|U\BBEJOOOr   c                    t          | ||          \  } }t          j        |                    | j        |j                  | j                  S rT   )r   r*   r+   
create_xorrk   r]   r   s      r   xor_r   O  r   r   c                   | j                                         s#t          | t          j        d          |          } |j                                         s#t          |t          j        d          |          }t          | ||          S Nint1)r]   is_int1bitcastr*   r9   r   r   s      r   logical_andr   T  sv    : :rx//99: :rx//99ug&&&r   c                   | j                                         s#t          | t          j        d          |          } |j                                         s#t          |t          j        d          |          }t          | ||          S r   )r]   r   r   r*   r9   r   r   s      r   
logical_orr   \  sv    : :rx//99: :rx//99ueW%%%r   c                    | j                                         s#t          | t          j        d          |          } t          | |          S r   )r]   r   r   r*   r9   invert)rg   r#   s     r   not_r   d  sC    : :rx//99%!!!r   c                    t          | ||          \  } }t          j        |                    | j        |j                  | j                  S rT   )r   r*   r+   create_lshrrk   r]   r   s      r   lshrr   j  ?    0wGGLE59W((u|DDejQQQr   c                    t          | ||          \  } }t          j        |                    | j        |j                  | j                  S rT   )r   r*   r+   create_ashrrk   r]   r   s      r   ashrr   o  r   r   c                    t          | ||          \  } }t          j        |                    | j        |j                  | j                  S rT   )r   r*   r+   
create_shlrk   r]   r   s      r   shlr   t  r   r   c                    | S rT    )rg   s    r   plusr   ~  s    Lr   c                (   | j         j        }|                                r't          d|                                z   dz             t          j        |                    |                    |                    |          }t          || |          S )Nz$wrong type argument to unary minus ())
r]   r^   rU   r)   r   r*   r+   get_null_valueto_irrx   )rg   r#   r   _0s       r   rs   rs     s    :$L a?,BWBWBYBYY\__```	7)),*<*<W*E*EFF	U	UBr5'"""r   c                P   | j         j        }|                                s|                                r't	          d|                                z   dz             t          j        |                    |	                    |                    |          }t          | ||          S )Nz%wrong type argument to unary invert (r   )r]   r^   rU   rV   r)   r   r*   r+   get_all_ones_valuer   r   )rg   r#   r   _1s       r   r   r     s    :$L b 8 8 : : b@<CXCXCZCZZ]``aaa	7--l.@.@.I.IJJL	Y	YBr7###r   vtl.block_typec                    | j                                         st          j        S | j         j        }t          j        t          j        |          S rT   )r]   is_blockr*   r   shape
block_type)r   r   s     r   
_bool_liker     s:    6?? wFLE=%(((r   c                ^   t          | ||          \  } }| j        j        }|                                r@t	          j        |                    | j        |j                  t          |                     S |	                                r|
                                r@t	          j        |                    | j        |j                  t          |                     S t	          j        |                    | j        |j                  t          |                     S t          d|           rz   )rf   r]   r^   rV   r*   r+   create_fcmpOGTrk   r   rO   r   create_icmpSGTcreate_icmpUGTr<   rv   s       r   greater_thanr        /ugFFLE5
!I dy//elKKZX]M^M^___					 d""$$ 	d9W33EL%,OOQ[\aQbQbccc9W33EL%,OOQ[\aQbQbccc
2y22
3
33r   c                ^   t          | ||          \  } }| j        j        }|                                r@t	          j        |                    | j        |j                  t          |                     S |	                                r|
                                r@t	          j        |                    | j        |j                  t          |                     S t	          j        |                    | j        |j                  t          |                     S t          d|           rz   )rf   r]   r^   rV   r*   r+   create_fcmpOGErk   r   rO   r   create_icmpSGEcreate_icmpUGEr<   rv   s       r   greater_equalr     r   r   c                ^   t          | ||          \  } }| j        j        }|                                r@t	          j        |                    | j        |j                  t          |                     S |	                                r|
                                r@t	          j        |                    | j        |j                  t          |                     S t	          j        |                    | j        |j                  t          |                     S t          d|           rz   )rf   r]   r^   rV   r*   r+   create_fcmpOLTrk   r   rO   r   create_icmpSLTcreate_icmpULTr<   rv   s       r   	less_thanr     r   r   c                ^   t          | ||          \  } }| j        j        }|                                r@t	          j        |                    | j        |j                  t          |                     S |	                                r|
                                r@t	          j        |                    | j        |j                  t          |                     S t	          j        |                    | j        |j                  t          |                     S t          d|           rz   )rf   r]   r^   rV   r*   r+   create_fcmpOLErk   r   rO   r   create_icmpSLEcreate_icmpULEr<   rv   s       r   
less_equalr     r   r   c                   t          | ||          \  } }| j        j        }|                                r@t	          j        |                    | j        |j                  t          |                     S |	                                r@t	          j        |
                    | j        |j                  t          |                     S t          d|           rz   )rf   r]   r^   rV   r*   r+   create_fcmpOEQrk   r   rO   create_icmpEQr<   rv   s       r   equalr         /ugFFLE5
!I _y//elKKZX]M^M^___					 _y..u|U\JJJW\L]L]^^^
2y22
3
33r   c                   t          | ||          \  } }| j        j        }|                                r@t	          j        |                    | j        |j                  t          |                     S |	                                r@t	          j        |
                    | j        |j                  t          |                     S t          d|           rz   )rf   r]   r^   rV   r*   r+   create_fcmpUNErk   r   rO   create_icmpNEr<   rv   s       r   	not_equalr    r   r   startendc                   t          | t                    rt          |t                    st          d          t          | dz	            }t          |dz	            }|s|rt          d          || k    rt          d          || z
  }||dz
  z  dk    rt          d          |g}t	          j        t          j        |          }t	          j        |                    | |          |          S )Nz/arange's arguments must be of type tl.constexpr    zarange must fit in int32z=arange's end argument must be greater than the start argumentr   r   z#arange's range must be a power of 2)	
isinstancer"   r)   rC   r*   r   r-   r+   create_make_range)r  r  r#   is_start_int64is_end_int64ranger   r   s           r   aranger    s    eS!! LC)=)= LJKKK%2+&&Nr	??L 5 53444
e||XYYY%KE!!>???GE]28U++F9W..uc::FCCCr   r   	List[int]r9   c                   t          |t          j                  r,|j        j        dk    s
J d            t          |||          }nx|t          d          |dk    r)|                    |                    |                    }n#t          |d|j
                   } ||          }t          j        ||          }t          || |          S )Nr   zonly accepts size-1 tensorz2dtype must be specified when value is not a tensorr   get_)r	  r*   r+   numelvaluer_   r)   r   r   getattrnamesplat)r   r  r9   r#   get_value_fns        r   fullr    s    %## ({ A%%%'C%%%UE7++ =QRRRA::**5;;w+?+?@@EE"7,?5:,?,?@@L L''E	%''w'''r   r  c                   | j                                         r
J d            t          |          dk    r| S t          j        | j        |          }t          j        |                    | j        |          |          S )NzCannot splat a block tensorr   )	r]   r   lenr*   r   r9   r+   create_splatrk   )r  r   r#   r   s       r   r  r    sp    z""$$CC&CCCC
5zzQ]5;..F9W))%,>>GGGr   	dst_shapecan_reorderc                    d}|D ]}||z  }| j         j        |k    rt          d          t          j        | j         j        |          }t          j        |                    | j        ||          |          S )Nr   z:reshape() cannot change total number of elements in tensor)	r]   r  r)   r*   r   r^   r+   create_reshaperk   )rg   r  r  r#   r  sr   s          r   reshaper!  %  s}    E  
z5  UVVV]5:,i88F9W++EL)[QQSYZZZr   c                @   d | j         D             }|                    |d           | j                                        st	          | ||          S t          j        | j        j        |          }t          j        |	                    | j
        |          |          S )Nc                6    g | ]}t          j        |          S r   r*   _constexpr_to_value).0r   s     r   
<listcomp>zexpand_dims.<locals>.<listcomp>0  s#    @@@q'**@@@r   r   )r   r#   )r   insertr]   r   r  r*   r   r^   r+   create_expand_dimsrk   )rg   r!   r#   r  r   s        r   expand_dimsr*  /  s    @@EK@@@IT1:   >U)W====]5:,i88F9W//dCCVLLLr   c                &   |s
J d            t          | j                  dk    sJ t          j        | j        j        | j        d         |j        d         z   g          }t          j        |                    | j        |j                  |          S )Nz;current implementation of `cat` always may reorder elementsr   r   )	r  r   r*   r   r]   r^   r+   
create_catrk   )rX   rY   r  r#   ret_types        r   catr.  :  s    UUUUUUsy>>Q}SX_sy|cil/J.KLLH9W''
CJ??JJJr   abc                   t          | ||          \  } }| j        g k    }|r"t          | d|          } t          |d|          }t          | j        d         t          j                  rt	          j        d          }nd}| j        |gz   }t	          j        | j        j        |          }t	          j	        |
                    | j        |j                  |          }|rt          |dgd|          }|S )Nr   r	   Fr  r#   )r\   r   r*  r	  r*   	constexprr   r]   r^   r+   create_joinrk   r!  )r/  r0  r#   
was_rank_1two	new_shaper-  r   s           r   joinr9  A  s    1g..DAq BJ '1g&&1g&&!'"+r|,, l1oo3%I}QV]I66H
)G''!(;;X
F
FC DcA3E7CCCJr   c                j   t          | j                  dk    sJ t          j        | j        d                   dk    sJ | j        d d         }t          j        | j        j        |          }|                    | j                  \  }}t          j	        ||          t          j	        ||          fS )Nr   r2  r	   )
r  r   r*   r%  r   r]   r^   create_splitrk   r+   )r/  r#   r8  r-  outLHSoutRHSs         r   splitr>  Z  s    LL1"172;//14444I}QV]I66H))!(33NFF
	&(##
	&(## r   dims
Tuple[int]c                    t           j                  t          |          k    rt          d          t          d |D                       t	          t          t          |                              k    rt          d|           t          j         j        j	         fd|D                       }t          j
        |                     j        |          |          S )Nz5permute dims must have the same length as input shapec              3  >   K   | ]}t          j        |          V  d S rT   r$  )r&  ds     r   	<genexpr>zpermute.<locals>.<genexpr>j  s-      66Ab$Q''666666r   z?permute dims must be a permutation of 0, 1, ..., n-1, but were c                *    g | ]}j         |         S r   )r   )r&  rC  rg   s     r   r'  zpermute.<locals>.<listcomp>m  s    0N0N0NAQ0N0N0Nr   )r  r   r)   sortedlistr  r*   r   r]   r^   r+   create_transrk   )rg   r?  r#   r-  s   `   r   permuterI  g  s    
5;3t99$$PQQQ6666666$uSYY?O?O:P:PPPa[_aabbb}UZ.0N0N0N0N0N0N0NOOH9W))%,==xHHHr   c                   | j                                         sHt          j        | j         |          }t          j        |                    | j        |          |          S | j                                         }t          |          t          |          k    rt          d| d|           ||k    r| S t          |          D ];\  }}||         |k    r*|dk    r$t          d||          d| d| d| d| 
          <t          j        | j         j        |          }t          j        |                    | j        |          |          S )Nz!Cannot broadcast, rank mismatch: , r   z3Cannot broadcast, the expanded size of the tensor (z ) must match the existing size (z) at non-singleton dimension : )r]   r   r*   r   r+   r  rk   get_block_shapesr  r)   	enumerater^   create_broadcast)rg   r   r#   r   	src_shapeiitems          r   broadcast_impl_shaperS  q  sv   :   Luz511y--elEBBFKKK
++--I
9~~U##QYQQ%QQRRR	Y'' < <48t		 ;SXYZS[ ; ;?C; ;!"; ;&/; ;38; ; < < < ]5:,e44F9W--elEBBFKKKr   c           	     v   | j         }|j         }|                                ru|                                sat          j        |j        |j                  }t          j        |                    |j        |	                                          |          }n|                                su|                                rat          j        |j        |j                  }t          j        |                    | j        |	                                          |          } n|                                r|                                r|	                                }|	                                }t          |          t          |          k     rt          t          |          t          |                    D ]g}t          j        |                    | j        d          t          j        |j        dg|z                       } | j         }|	                                }hnt          |          t          |          k     rt          t          |          t          |                    D ]g}t          j        |                    |j        d          t          j        |j        dg|z                       }|j         }|	                                }ht          |          t          |          k    sJ g }t          |          D ]\  }	}
||	         }|
dk    r|                    |           )|dk    s||
k    r|                    |
           Kt          dt!          |	          z   dz   t!          |
          z   dz   t!          |          z             ||k    rHt          j        |j        |          }t          j        |                    | j        |          |          } ||k    rHt          j        |j        |          }t          j        |                    |j        |          |          }| |fS )Nr   r   z?Cannot make_shape_compatible: incompatible dimensions at index rL  r   )r]   r   r*   r   r^   r   r+   r  rk   rM  r  r  r)  rN  appendr)   strrO  )rX   rY   r#   lhs_tyrhs_ty	lhs_shape	rhs_shape_	ret_shaperQ  leftrightr   s                r   r\   r\     s   XFXF  +U!2!2 +Uv}fl;;i,,SZ9P9P9R9RSSU[\\__ 'U6??#4#4 'Uv}fl;;i,,SZ9P9P9R9RSSU[\\			 #Uv00 #U++--	++--	y>>C	NN**3y>>3y>>:: 6 6i : :3:q I I "fmaS9_ M MO O"3355			6
 ^^c)nn,,3y>>3y>>:: 6 6i : :3:q I I "fmaS9_ M MO O"3355		9~~Y////	 ++ 	a 	aGAtaLEqyy  ''''1**%4--  &&&&  "-/21vv"68<"=?B4yy"IKR"SUXY^U_U_"` a a a	!!]6=)<<F)G44SZKKVTTC	!!]6=)<<F)G44SZKKVTTC8Or   rounding_modeOptional[str]c                    | d S | dk    rt           j        j        S | dk    rt           j        j        S t	          d|  d          )NrtnertzzInvalid rounding mode: z0. Supported rounding modes are 'rtne' and 'rtz'.)r
   ROUNDING_MODERTNERTZr)   )r_  s    r   _str_to_rounding_moderg    sU    t$$##
n}nnn
o
oor   dst_tyc                F   | j         }|                                r1t          j        |j        | j                                                   }||k    r| S |j        }|j        }|                                s|                                rt          | ||          S |j        }|j        }||k    r2t          dt          |          z   dz   t          |          z             t          j        |                    | j        |                    |                    |          S )Nz!Cannot bitcast data-type of size z to data-type of size )r]   r   r*   r   r^   rM  rU   r_   primitive_bitwidthr)   rV  r+   create_bitcastrk   r   )rg   rh  r#   src_ty
src_sca_ty
dst_sca_tysrc_bitsdst_bitss           r   r   r     s   ZF Mv}ej.I.I.K.KLLJJ ,j//11 ,E67+++,H,H8<s8}}L P. .03H> ? ? 	?9W++EL&,,w:O:OPPRXYYYr   Nfp_downcast_roundingc                   | j         }t          |t          j                  r|j        }t          |t          j                  r|j        }|                                r1t          j        |j        | j                                                   }||k    r| S |j        }|j        }t          |          }d}|
                                rP|
                                r<|j        |j        k     r,|t          j        j        }nL|t          j        j        k    rd}n4|2t          dt!          |          z   dz   t!          |          z             |                                s|                                r|j        j        s
J d            |                                s|                                r?|j                            d          	 
J d             |j        d         | |||          S |                                r|
                                s*|
                                r|                                s|rBt          j        |                    | j        |                    |          |          |          S |                                r|                                r(|                                r>|                                s*t?          t?          | t          j         |          ||          S |
                                o#|
                                o|j        |j        k    }|rAt          j        |!                    | j        |                    |                    |          S |
                                o#|
                                o|j        |j        k     }	|	rAt          j        |"                    | j        |                    |                    |          S |#                                r|#                                r|j$        |j$        k    s|j%        |j%        k    r|&                                o|'                                 }
|'                                rX| j(                            |          }t          j        |)                    |          | j(                  }tU          | ||          S t          j        |+                    | j        |                    |          |
          |          S |,                                r|#                                r|'                                rX| j(                            |          }t          j        |)                    |          | j(                  }tU          | ||          S |&                                rAt          j        |-                    | j        |                    |                    |          S t          j        |.                    | j        |                    |                    |          S |#                                r|,                                r|'                                s|&                                sAt          j        |/                    | j        |                    |                    |          S t          j        |0                    | j        |                    |                    |          S |1                                r|#                                r|j$        }|d	k    rAt          j        |2                    | j        |                    |                    |          S |d
k    rZtU          t?          | t          j3        |          t          j        |4                    d          t          j3                  |          S |#                                rU|1                                rAt          j        |5                    | j        |                    |                    |          S |1                                rU|1                                rAt          j        |6                    | j        |                    |                    |          S J d|  d|             )NFTz]fp_downcast_rounding should be set only for truncating fp conversions. Source scalar type is z and destination type is z4fp8e4nv data type is not supported on CUDA arch < 89convert_custom_typesz0target doesn't provide conversion for this type.r   @   r   r   zcannot cast z to )7r]   r	  r*   r4  r  r   r   r^   rM  rg  rV   rj  r
   rd  re  r)   rV  
is_fp8e4nvoptionsallow_fp8e4nvis_fp8e4b15codegen_fnsgetis_fp8r+   create_fp_to_fprk   r   rK   rI   rM   r_   rJ   create_fp_trunccreate_fp_extrO   r7   r8   r   is_boolr9   r   r  create_int_castis_standard_floatingcreate_fp_to_sicreate_fp_to_uicreate_ui_to_fpcreate_si_to_fprU   create_ptr_to_intint64	get_int64create_int_to_ptrrk  )rg   rh  r#   rq  rl  rm  rn  use_custom_roundingtruncate_fpext_fpsign_extendtyr   bitwidths                 r   r_   r_     ss   ZF&",'' &55 :39 Mv}ej.I.I.K.KLLJJ 11EFF yJ$:$: % % y

'**G
G
G'@P@U)=)=!R%5%:::RV<O+ 68;JHJefhklvhwhwx y y y 	 e:#8#8#:#: e,dd.dddd   rJ$:$:$<$< r"&&"$ $+/0 01c0 0 0:w"#9:5&J^ipqqqq 	 u
 6 6 8 8 u  u%/%6%6%8%8uu y00v||G?T?TVjkkmsttt 	 KZ%7%7%9%9 KK%/%7%7%9%9KD
G44j'JJJ
 ((** F  F%
(EE   _y00v||G?T?TUUW]^^^ ##%% F  F%
(EE   ]y..u|V\\'=R=RSSU[\\\  pz0022 p:#:::j>W[e[t>t>t ..00M9K9K9M9M5M 	p""7++B711"55u{CCBUB0009W44U\6<<PWCXCXZeffhnooo &&(( cZ->->-@-@ c 	c""7++B711"55u{CCBUB000%%'' 	c9W44U\6<<PWCXCXYY[abbb9W44U\6<<PWCXCXYY[abbb  cz>>@@ c 	cz'?'?'A'A 	c9W44U\6<<PWCXCXYY[abbb9W44U\6<<PWCXCXYY[abbb  qz0022 q*r>>9W66u|V\\RYEZEZ[[]cdddq==T%7;;RYwGXGXYZG[G[]_]e=f=fhoppp  az0022 ay225<gAVAVWWY_```  ^z0022 ^y//fll7>S>STTV\]]]4444F44444r   c                    t           j        j        }| rC| dk    rt           j        j        }n+| dk    rt           j        j        }nt          d|  d          |S )Nz.ca.cgCache modifier  not supported)r
   CACHE_MODIFIERr   CACGr)   cache_modifiercaches     r   _str_to_load_cache_modifierr  Q  se    "E OU""%(EEu$$%(EEM~MMMNNNLr   c                   t           j        j        }| rs| dk    rt           j        j        }n[| dk    rt           j        j        }nC| dk    rt           j        j        }n+| dk    rt           j        j        }nt          d|  d          |S )Nz.wbr  z.csz.wtr  r  )r
   r  r   WBr  CSWTr)   r  s     r   _str_to_store_cache_modifierr  ]  s    "E 
OU""%(EEu$$%(EEu$$%(EEu$$%(EEM~MMMNNNLr   c                    t           j        j        }| rC| dk    rt           j        j        }n+| dk    rt           j        j        }nt          d|  d          |S )N
evict_lastevict_firstzEviction policy r  )r
   EVICTION_POLICYNORMAL
EVICT_LASTEVICT_FIRSTr)   )eviction_policyevictions     r   _str_to_eviction_policyr  m  se    !(H Ql**)4HH--)5HHOOOOPPPOr   c                    d }| rC| dk    rt           j        j        }n+| dk    rt           j        j        }nt	          d|  d          |S )NzeronanzPadding option r  )r
   PADDING_OPTIONPAD_ZEROPAD_NANr)   )padding_optionpaddings     r   _str_to_padding_optionr  y  s_    G OV##'0GGu$$'/GGM~MMMNNNNr   c                   t           j        j        }| rs| dk    rt           j        j        }n[| dk    rt           j        j        }nC| dk    rt           j        j        }n+| dk    rt           j        j        }nt          d|  d          |S )Nacquirereleaseacq_relrelaxedMemory semantic r  )r
   MEM_SEMANTICACQUIRE_RELEASEACQUIRERELEASERELAXEDr)   )
sem_optionsems     r   _str_to_semr    s    
/
)C 
L""/)CC9$$/)CC9$$/1CC9$$/)CCJ
JJJKKKJr   c                    t           j        j        }| r[| dk    rt           j        j        }nC| dk    rt           j        j        }n+| dk    rt           j        j        }nt          d|  d          |S )Ngpuctasysr  r  )r
   MEM_SYNC_SCOPEGPUCTASYSTEMr)   )scope_optionscopes     r   _str_to_scoper    s}    !E N5  %)EEU""%)EEU""%,EELLLLMMMLr   c                n   | rt          | d          s| g} d | D             } | D ]5}t          |t                    rd|cxk    rt          |          k     sn J 6t          |           dk    sJ t          |           t          t	          |                     k    s
J d            t          |           S dS )N__iter__c                T    g | ]%}t          |t          j                  r|j        n|&S r   r	  r*   r4  r  r&  elems     r   r'  z0_canonicalize_boundary_check.<locals>.<listcomp>  s0    lllUY
4(F(FP$**Dlllr   r   z'Duplicate dimension in `boundary_check`r   )hasattrr	  r"   r  setrF  )boundary_checkblock_shapedims      r   _canonicalize_boundary_checkr    s     &~z22 	.,-Nll]klll! 	H 	HCc3''GA,G,G,G,Gs;7G7G,G,G,G,G,G,G,G>""Q&&&&>""c#n*=*=&>&>>>>@i>>>n%%%2r   c	           
        ||t          d          | j        j        j        }	|	t          j        k    s
J d            |	                                r$|t          j        j        k    rt          d          | j        j        }
t          ||

                                          }t          j        |                    | j        |||||          |
          S )NK`mask` and `other` arguments cannot be specified for loading block pointers3`tl.int1` should be rewrited in `tl.make_block_ptr`z@Padding option `nan` is not supported for integer block pointers)r)   r]   
element_tyr*   r   rO   r
   r  r  r  rM  r+   create_tensor_pointer_loadrk   )ptrmaskrh   r  r  r  r  is_volatiler#   elt_tyrh  s              r   _load_block_pointerr    s     5,fgggX +FRWS}} ]7b&7&???[\\\ X F 2.&BYBYB[B[\\N 9**3:~wPUW_almmouw w wr   c	           
        | j         j                                        s*t          d| j                                          d          ||t          d          |s|rt          d          | j                                         sT|r(|j                                         rt          d          |r(|j                                         rt          d          | j                                         rT|(t          || j                                         |          }|(t          || j                                         |          }| j         j        }	|	j        }
|
t          j
        k    r7t          j        }
t          j        |
|	j                  }	t          | |	|          } |t          ||
|          }| j                                         r/| j                                         }t          j        |
|          }n|
}|0t          j        |                    | j        |||          |          S t          j        |                    | j        |j        |r|j        nd |||          |          S )NUnsupported ptr type z in `tl.load`z)`other` cannot be provided without `mask`z`padding_option` or `boundary_check` argument is not supported for loading a tensor ofpointers or loading a scalar. Because the compiler does not know the boundary; please use block pointers (defined by `make_block_ptr`) insteadEMask argument cannot be block type if pointer argument is not a blockzFOther argument cannot be block type if pointer argument is not a block)r]   r^   rU   r)   r   r   rS  rM  r  r*   r   int8pointer_typeaddress_spacer_   r   r+   create_loadrk   create_masked_load)r  r  rh   r  r  r  r  r  r#   ptr_tyr  r   rh  s                r   _load_legacyr    s   8?!!## US1B1B1D1DSSSTTT |)DEEE U. U T U U 	U
 8 g 	fDI&&(( 	fdeee 	gUZ((** 	gefff x V'ch.G.G.I.I7SSD(0I0I0K0KWUUE X_FF )=>>3(( UFG,, x ))++vu--  |y,,SZ+VVX^___y&&sz4;PU@_[_afhp'24 45;= = 	=r   r  r  Optional[tl.tensor]r  r   r  rV  r  r  r  c	                &   t          |          }	t          |          }
t          |          }| j                                        r5| j        j                                        rt          | |||||	|
||	  	        S t          | |||||	|
||	  	        S rT   )	r  r  r  r]   rU   r  r   r  r  )r  r  rh   r  r  r  r  r  r#   r  r  r  s               r   loadr     s     (77E&77H$^44G
x nSX099;; n"3e^WeU]_jlsttt CunguhXcelmmmr   desc_ptrc           	         t          ||d          }|                    | j        ||                    |          t	          |          t          |                    }t          j        ||          S NFrequire_i64)_convert_to_ir_valuescreate_descriptor_loadrk   r   r  r  r*   r+   )r  offsetsr  r  r]   r#   r   s          r   descriptor_loadr    sk    #GW%HHHG&&xGATAT'B>'R'R'>'O'O	Q 	QA 9Qr   c                    t          ||d          }t          j        |                    | j        |j        |          t          j                  S r  )r  r*   r+   create_descriptor_storerk   void)r  r  r  r#   s       r   descriptor_storer    sC    #GW%HHHG9W44X_elT[\\^`^efffr   c           	     .   |t          d          | j        j                                        }|j                                        st          |||          }|j                                        s
J d            ||j                                        k    s(J d| d|j                                         d            | j        j        j        |j        j        k    s*J d| j        j        j         d|j        j         d            | j        j        j        }|t          j        k    s
J d            t          ||          }t          |||          }t          j
        |                    | j        |j        |||          t          j                  S )	Nr  z-Value argument must be block type or a scalarzBlock shape(z) and value shape(z
) mismatchzBlock element type(z) and value element type(r  )r)   r]   r  rM  r   rS  r*   r   r  r_   r+   create_tensor_pointer_storerk   r  )	r  valr  r  r  r  r#   r  r  s	            r   _store_block_pointerr    s    fggg (%6688K8 >"3W==8OO OOOO#(33     \k\\SX5N5N5P5P\\\  8)SX-@@@@  CqX[X`XkXv  Cq  Cq  RU  RZ  Re  Cq  Cq  Cq@@@X +FRWS 2.+NNN sFG
$
$C 9W88SZQ_afhpqqW  r   c           	        | j         j                                        s*t          d| j                                          d          |rt          d          | j                                         sR|j                                         rt          d          |r(|j                                         rt          d          | j                                         rRt          || j                                         |          }|(t          || j                                         |          }| j         j        }|j        }|t          j
        k    r7t          j        }t          j        ||j                  }t          | ||          } t          |||          }|s?t          j        |                    | j        |j        ||          t          j                  S |j         j                                        st          d          t          j        |                    | j        |j        |j        ||          t          j                  S )Nr  z in `tl.store`z`boundary_check` argument is not supported for storing a tensor of pointers or storing a scalar. Because the compiler does not know the boundary; please use block pointers (defined by `make_block_ptr`) insteadzFValue argument cannot be block type if pointer argument is not a blockr  z"Mask must have boolean scalar type)r]   r^   rU   r)   r   r   rS  rM  r  r*   r   r  r  r  r_   r+   create_storerk   r  r  create_masked_store)	r  r  r  r  r  r  r#   r  r  s	            r   _store_legacyr  ;  s'   8?!!## VT1B1B1D1DTTTUUU  B A B B 	B
 8 f8 	gefff 	fDI&&(( 	fdeee x T"3(A(A(C(CWMM'ch.G.G.I.I7SSDX_FF )=>>3(( sFG
$
$C  ay--cj#*eXVVXZX_```9##%% ?=>>>9W00SZV[]effhjhopppr   r  c           	        t          |          }t          |          }| j                                        s| j        j                                        rt          d          | j                                        r3| j        j                                        rt          | ||||||          S t          | ||||||          S )N"Cannot store to a constant pointer)r  r  r]   is_constr^   r)   rU   r  r   r  r  )	r  r  r  r  r  r  r#   r  r  s	            r   storer  g  s     )88E&77H
x ?cho6688 ?=>>>
x WSX099;; W#CdNE8U\]]] S#t^UHgVVVr   cmpr  r  c           	        t          |          }t          |          }| j        j        j        }|j        dvrt          d          t          j        |	                    | j
        |j
        |j
        ||          |j                  S )N)   r  rt  z9atomic_cas only supports elements with width {16, 32, 64})r  r  r]   r^   r  rj  r)   r*   r+   create_atomic_casrk   )r  r  r  r  r  r#   r  s          r   
atomic_casr  }  sy    
c

C%  E+J$L88TUUU9W..sz3:szSVX]^^`c`hiiir   op&Tuple[tl.tensor, tl.tensor, tl.tensor]c                   | j         j                                        s)t          d| j                                         z             | j                                         s| j         j                                        rt          d          | j         j        j        }|t          j        u r|dk    rt          d|z   dz             |t          j	        t          j
        t          j        t          j        fv r%t          d|z   dz   t          |          z             | j                                         rT|(t          || j                                         |          }|(t          || j                                         |          }t#          || j         j        j        |          }|s|                    d          }t          j	        }| j                                         rc|                    || j                                                   }t          j        t          j	        | j                                                   }t          j        ||          }| ||fS )Nz)Pointer argument of store instruction is r  rq   atomic_z does not support fp16z does not support T)r]   r^   rU   r)   r   r  r  r*   rL   r   r  int16rN   rV  r   rS  rM  r_   get_int1r  r   r+   )r  r  r  r  r#   r  mask_irmask_tys           r   atom_red_typechecking_implr    s   8?!!## \DsxGXGXGZGZZ[[[
x ?ch1::<< ?=>>>+JRZB%KKR*BBCCCbgrw"+>>>R*>>ZPQQQ
x R'ch.G.G.I.I7SSD?&sCH,E,E,G,GQQC
sCHO.
8
8C +""4(('8 	J**7CH4M4M4O4OPPGmBGSX-F-F-H-HIIGy'**T>r   c                
   t          | ||d|          \  } }}t          |          }t          |          }|j        j        }|                                r|                                rPt          j        |	                    t          j        j        | j        |j        |j        ||          |j                  S t          j        |	                    t          j        j        | j        |j        |j        ||          |j                  S |t          j        t          j        hvrt#          d|           t%          g d||          }|t          j        k    rt          j        nt          j        }t+          |||          }	t+          | t          j        |d          |          }
|t          j        k    rt          j        nt          j        }t+          |||          }t+          | t          j        |d          |          }t3          |||          }t5          |||          }t          j        |	                    t          j        j        |
j        |	j        t7          |||          j        ||          |	j                  }t          j        |	                    t          j        j        |j        |j        t7          |||          j        ||          |j                  }t;          ||||          }t+          |||          S )Nr   z#atomic_max not supported for dtype         r   )r  r  r  r]   r^   rO   r   r*   r+   create_atomic_rmwr
   	ATOMIC_OPMAXrk   UMAXrJ   rH   r<   r  r-   r  r   r  uint32uint64r   r   r   UMINwherer  r  r  r  r  r#   sca_tyr  i_typei_vali_ptrui_typeui_valui_ptrposnegpos_retneg_retr   s                      r   
atomic_maxr,       /S$wOONCd
c

C%  EX_F}} y!! 	y9))",*:CJ
TXT_adfkllnqnvx x x 9))",*;SZUYU`beglmmorowy y y
 bj"*---FfFFGGGC))D2:--RXX28FC))EC33W==E!RZ//biiRYGS'7++FS"/'155w??F
T7
+
+C
Cw
'
'Ci!!","2EL%,"&tS'":":"A3	O 	OPUPZ\ \G i!!","3V]FM"&tS'":":"A3	O 	OPVP[] ]G Wgw
/
/C3(((r   c                
   t          | ||d|          \  } }}t          |          }t          |          }|j        j        }|                                r|                                rPt          j        |	                    t          j        j        | j        |j        |j        ||          |j                  S t          j        |	                    t          j        j        | j        |j        |j        ||          |j                  S |t          j        t          j        hvrt#          d|           t%          g d||          }|t          j        k    rt          j        nt          j        }t+          |||          }	t+          | t          j        |d          |          }
|t          j        k    rt          j        nt          j        }t+          |||          }t+          | t          j        |d          |          }t3          |||          }t5          |||          }t          j        |	                    t          j        j        |
j        |	j        t7          |||          j        ||          |	j                  }t          j        |	                    t          j        j        |j        |j        t7          |||          j        ||          |j                  }t;          ||||          }t+          |||          S )Nr   z#atomic_min not supported for dtype r  r   )r  r  r  r]   r^   rO   r   r*   r+   r  r
   r  MINrk   r  rJ   rH   r<   r  r-   r  r   r  r  r  r   r   r   r  r  r   s                      r   
atomic_minr0    r-  r   c           
     r   t          | ||d|          \  } }}t          |          }t          |          }|j        j        }|                                rt          j        j        nt          j        j	        }t          j        |                    || j        |j        |j        ||          |j                  S )Nrq   )r  r  r  r]   r^   rV   r
   r  FADDADDr*   r+   r  rk   )r  r  r  r  r  r#   r!  r  s           r   
atomic_addr4    s    /S$wOONCd
c

C%  EX_F$0022	H		8HB9W..r3:sz4;X[]bccehemnnnr   c           
        t          | ||d|          \  } }}t          |          }t          |          }t          j        |                    t          j        j        | j	        |j	        |j	        ||          |j
                  S )Nand)r  r  r  r*   r+   r  r
   r  ANDrk   r]   r  r  r  r  r  r#   s         r   
atomic_andr9    y    /S$wOONCd
c

C%  E9W..r|/?SZY]YdfikpqqX  r   c           
        t          | ||d|          \  } }}t          |          }t          |          }t          j        |                    t          j        j        | j	        |j	        |j	        ||          |j
                  S )Nor)r  r  r  r*   r+   r  r
   r  ORrk   r]   r8  s         r   	atomic_orr>     sx    /S$gNNNCd
c

C%  E9W..r|
CJX\XcehjoppX  r   c           
        t          | ||d|          \  } }}t          |          }t          |          }t          j        |                    t          j        j        | j	        |j	        |j	        ||          |j
                  S )Nxor)r  r  r  r*   r+   r  r
   r  XORrk   r]   r8  s         r   
atomic_xorrB    r:  r   c           
        t          | ||d|          \  } }}t          |          }t          |          }t          j        |                    t          j        j        | j	        |j	        |j	        ||          |j
                  S )Nxchg)r  r  r  r*   r+   r  r
   r  XCHGrk   r]   r8  s         r   atomic_xchgrF    sy    /S$PPNCd
c

C%  E9W..r|/@#*cjZ^ZegjlqrrX  r   c                    |                                  |j        j        v sJ d|j        j         d|              |                                 } | dk    rd} t	          t
          j        |           S )Nzinput_precision must be one of z. Got TF32X3TF32x3)lowerrv  allowed_dot_input_precisionsupperr  r
   INPUT_PRECISION)input_precisionr#   s     r   _str_to_dot_input_precisionrO    sz      ""go&RRRRo'/*Voo^moo SRR%++--O("""2%777r   accrN  max_num_imprecise_acc	out_dtypec           
        d }| j                                         r|j                                         sJ  || j        |j        |j                   | j                                        s|j                                        r6t          | t          j        |          } t          |t          j        |          }||j        j        }t          ||          }t          | j                  }t          |j                  }	||	cxk    rdk    s,n ||	cxk    rdk    sn J d| j         d|j         d            | j        d         j        |j        d         j        k    sAJ d	| j         d
|j         d| j        d         j         d|j        d         j         d	            | j        d         j        dk    r,| j        d         j        dk    r|j        d         j        dk    sJ d| j         d|j         d            | j         j                                        rf| j         j        t          j        k    s
J d            | j        d         j        dk    s
J d            |                    d          }
t          j        }n|                                rt'          d          | j         j                                        s| j         j                                        r"|                    d          }
t          j        }n@|                                r|                    d          n|                    d          }
|}| j         j        d         }|j         j        d         }|dk    r| j         j        d         nd }t          j        ||r|||gn||g          }| |                    |
|r|||gn||g          }n|j        }|j         |k    sJ |A| j                                        r&|j                                        r|j        j        }nd}t          j        |                    | j        |j        |||          |          S )Nc                @   |j         su|                                 s|                                r
J d            |                                 r|                                rd S | |k    sJ d|  d| d            d S |                                 s|                                rQ| |k    sJ d|  d| d            |                                 s"|                                 sJ d|  d            d S d S |                                 s|                                r/|j        rg d	}nd
dg}d } || |d            |||d           d S |                                 sI|                                 s5| 	                                s!| 
                                sJ d|              |                                sI|                                s5|	                                s!|
                                sJ d|             | |k    sJ d|  d| d            d S )Nz1Dot op does not support fp8e4nv on CUDA arch < 90zFirst input (z) and second input (z) must have the same dtype!z0Both operands must be same type. First operand (z) and second operand (r   z:Both operands must be either int8 or uint8. Operand type ()fp8e4nvfp8e5fp8e4b15rU  rV  c           	          t           fd|D                       s.d                    |          }t          d| d| d  d          d S )Nc              3  N   K   | ]} t          d |                       V   dS )is_N)r  )r&  
dtype_namer9   s     r   rD  zLdot.<locals>.assert_dtypes_valid.<locals>._validate_dtype.<locals>.<genexpr>=  s?      dd
Awu.@J.@.@AACCddddddr   rK  zOnly supports z. z (r   )anyr9  AssertionError)r9   allowed_typesoperand_namesupported_typess   `   r   _validate_dtypez9dot.<locals>.assert_dtypes_valid.<locals>._validate_dtype<  ss    ddddVcddddd k*.))M*B*B,-io-i-iQ]-i-iaf-i-i-ijjjk kr   zFirst operandzSecond operandzUnsupported dtype )rw  ru  r{  rO   is_int8is_uint8allow_fp8e4b15rK   rM   rI   r   )	lhs_dtype	rhs_dtyperv  r^  ra  s        r   assert_dtypes_validz dot.<locals>.assert_dtypes_valid*  s\   $ 	E ++-- Ci6J6J 7 7 C CBC C C!! i&6&6&8&8 	)))  ,A9  ,A  ,AZc  ,A  ,A  ,A)))))!! EY%5%5%7%7 E I---  0Pbk  0P  0P  DM  0P  0P  0P--- ((** ]i.@.@ / / ] ]\PY\\\] ] ] ] ] ] ]!!## Ey'7'7'9'9 E) 9$D$D$DMM%.$8Mk k k
  	=/JJJ	=:JKKKKK ((** 4i.?.?.A.A 4YEVEVEXEX 4\e\m\m ] ] 4 43	334 4 4 ((** 4i.?.?.A.A 4YEVEVEXEX 4\e\m\m ] ] 4 43	334 4 4 I---  0Ey  0E  0E^g  0E  0E  0E-----r   r	      z+Both inputs must be either 2D or 3D; (lhs: z	 vs rhs: r   r2  zFirst input shape (z) and second input shape z= are not compatible for matmul (second index of first shape (z0) must be equal to first index of second shape (r
  z0All non-batch values in both first input shape (z) and second input shape (z) must be >= 16!zonly int8 supported!r   r  zsmall blocks not supported!r   zhout_dtype=bfloat16 is unsupported. Please use out_dtype=float32/float16 and cast with `.to(tl.bfloat16)`) r]   r   r9   rv  rx  r_   r*   rL   default_dot_input_precisionrO  r  r   r  r^   rO   r  	get_int32r-   rM   r)   rI   get_fp32rJ   rK   get_fp16r   r  rk   r{  max_num_imprecise_acc_defaultr+   
create_dot)rX   rY   rP  rN  rQ  rR  r#   rg  lhs_rankrhs_rankr   ret_scalar_tyMNBr   
acc_handles                    r   dotrw  '  s   E E E@ 8638#4#4#6#6666	39go>>>
y -#)"7"7"9"9 -3
G,,3
G,,!/E1/7KKO39~~H39~~Hx$$$$1$$$$H(A(A(A(A(A(A(A(A(A  DRqtqz  DR  DR  FI  FO  DR  DR  DR(A(A(A9R=#)
#   q  q  qSY  q  q  VY  V_  `b  Vc  Vi  q  q  [^  [d  eg  [h  [n  q  q  q  9R="$$2)<)B)BIbM2%%%{39{{`c`i{{{ &%%
x "x"')))+A)))y|!R''')F'''q!!					 "vx x 	x		 	 	"	" "cho&=&=&?&? "a  
$-$5$5$7$7PWa   W=M=Ma=P=P!rArA%]]qA]=q*D1a))q!fEEF
{))"1.Hq!Qii1a&II

Z
x6!!!! $9 	&#)"2"2"4"4 	&$+O$Q!!$%!9W''
CJ
O]rss  r   	conditionc                   t          | t          j        |          } | j                                        r<t          | ||          \  } }t          |||          \  }}t          | ||          \  } }t          |||dd          \  }}| j                                        st          | ||          \  } }|j        }t          j        |                    | j	        |j	        |j	                  |          S )NT)
r_   r*   r   r]   r   r\   rf   r+   create_selectrk   )rx  r   r   r#   r[  r   s         r   r  r    s    Y11I~   C+Iq'BB	1#Aq'221+Iq'BB	1'1gtTBBDAq>""$$ C+Iq'BB	1VF9W**9+;QXqxPPRXYYYr   c                `    |rt          j        ||          }n|}t          j        | |          S rT   )r*   r   r+   )r   rw   r\  res_tys       r   wrap_tensorr}    s6     y)44 9Qr   inputsSequence[tl.tensor]Tuple[tl.tensor, ...]c                "    t          fd D                        d d         j        j        t                    }|k     sJ d| d            fdt	                    D             t          fd D                       s
J d                                d  D                        |                                            t           fd	t          t                               D                       S )
Nc              3  T   K   | ]"}t          ||j        j        gd           V  #dS )Tr3  N)r!  r  r  )r&  tr#   s     r   rD  zreduction.<locals>.<genexpr>  s;      ffZ[wq17=/tWUUUffffffr   r   z&reduction axis must be < inputs rank (r   c                &    g | ]\  }}|k    |S r   r   )r&  rQ  r   r!   s      r   r'  zreduction.<locals>.<listcomp>  s"    ===tq!199999r   c              3  8   K   | ]}|j         j        k    V  d S rT   )r]   r   )r&  r  r   s     r   rD  zreduction.<locals>.<genexpr>  s,      55qv|u$555555r   z-all reduction inputs must have the same shapec                    g | ]	}|j         
S r   rk   r&  r  s     r   r'  zreduction.<locals>.<listcomp>  s    &@&@&@Aqx&@&@&@r   c              3     K   | ]8}t                              |          |         j        j                  V  9d S rT   r}  
get_resultr]   r^   )r&  rQ  r~  	reduce_opr\  s     r   rD  zreduction.<locals>.<genexpr>  sG      tt\]Y11!44fQin6KYWWttttttr   )	tupler]   r   r  rN  allcreate_reduceverifyr  )r~  r!   region_builder_fnr#   rankr  r\  r   s   `` ` @@@r   	reductionr    sC   |ffff_efffff1IN Eu::D$;;;HHHH;;;====y//===I5555f55555ff7ffff%%&@&@&@&@&@$GGIi   ttttttafgjkqgrgrasasttttttr   reversec                     d         j         j        t                    }| |cxk    r|k     sn J d| d| d            |dk     r||z  } D ]}|j         j        k    s
J d            |                    d  D             ||           |                                            t           fdt          t                               D                       S )Nr   z
scan axis z must be < inputs rank (r   z(all scan inputs must have the same shapec                    g | ]	}|j         
S r   r  r  s     r   r'  z$associative_scan.<locals>.<listcomp>  s    "<"<"<18"<"<"<r   c              3     K   | ]8}t                              |          |         j        j                  V  9d S rT   r  )r&  rQ  r~  scan_opr   s     r   rD  z#associative_scan.<locals>.<genexpr>  sG      nnVWW//22F1IN4I5QQnnnnnnr   )r]   r   r  create_scanr  r  r  )	r~  r!   r  r  r#   r  r  r  r   s	   `      @@r   associative_scanr    s(   1IN Eu::D5D4!Sd!S!SD!S!S!Saxx Q Qv|u$$$&P$$$$!!"<"<V"<"<"<dGLLGgNNnnnnnn[`adekalal[m[mnnnnnnr   num_binsc                $   t          | j                  dk    s
J d            | j                                        s
J d            t	          j        |                    | j        |          t	          j        t          j	        |f                    S )Nr   z histogram only supports 1D inputz%histogram only supports integer input)
r  r   r9   rO   r*   r+   create_histogramrk   r   r-   )rg   r  r#   s      r   	histogramr    s    u{q   "D   ;HH!HHHH9W--elHEEr}UWU]`h_kGlGlmmmr   valuesc                   t          dt          | j                            t          |          k    rt          d          | j                            dt          j        || j                                                             | S )Nr   zAShape of input to multiple_of does not match the length of valuesztt.divisibility)	r   r  r   r)   rk   set_attrr
   	make_attrget_contextr   r  s     r   multiple_ofr    sl    
1c!'lls6{{**\]]]H'fah>R>R>T>T)U)UVVVHr   c                    t          | j                  t          |          k    rt          d          | j                            dt          j        || j                                                             | S )NzDShape of input to max_contiguous does not match the length of valuesztt.contiguityr  r   r)   rk   r  r
   r  r  r  s     r   max_contiguousr    sa    
17||s6{{""_```Hor|FAH<P<P<R<R'S'STTTHr   c                    t          | j                  t          |          k    rt          d          | j                            dt          j        || j                                                             | S )NzCShape of input to max_constancy does not match the length of valuesztt.constancyr  r  s     r   max_constancyr    sa    
17||s6{{""^___Hnbl618;O;O;Q;Q&R&RSSSHr   c                d    t          j        |                                 t           j                  S rT   )r*   r+   create_barrierr  )r#   s    r   debug_barrierr    s"    9W++--rw777r   prefixargsList[tl.tensor]hexc                \   |                      d          s|r| dz  } |                      d          s|r| d d         dz   } t          |           dk    r|                     d          sd| z   } d |D             }t          j        |                    | ||          t          j                  S )N rL  r2  r	   c                    g | ]	}|j         
S r   r  )r&  args     r   r'  z device_print.<locals>.<listcomp>  s    +++s
+++r   )endswithr  
startswithr*   r+   create_printr  )r  r  r  r#   new_argss        r   device_printr    s     ??3 D #??4   $T $t#
6{{Qv0055v++d+++H9W))&#x@@"'JJJr   condmsg	file_namelinenoc           	     >   | j         }|                                sHt          j        |j        d          }t          j        |                    | j        d          |          } t          j        |                    | j        ||||          t          j	                  S )N)r   )
r]   r   r*   r   r^   r+   r  rk   create_assertr  )r  r  r  	func_namer  r#   cond_tys          r   device_assertr    s    iG L-66y--dk5AA7KK9W**4;Y	SYZZ\^\cdddr   c                z   t          |t                    rt          j        |          }t          |t          j                  r|rAd|j        cxk    rdk     sn J d|j         d            |                     |j                  S d|j        cxk    rdk     sn J d|j         d            |                     |j                  S t          |t          j                  r|j        j        dk    s
J d	            |j	        
                                s
J d
            |j	        t          j        k    rG|rE|                     |j        |                                 |j	                                                  S |j	        t          j        k    r|s
J d            |j        S J dt#          |                       )Nl         l            z@Block pointers only support 64 bit `shape/strides`, got a value z which is out of the range           zFBlock pointers only support 32 bit `offsets/block_shape`, got a value r   z*Expected a scalar in shape/strides/offsetsz8Expected an integer scalar type in shape/strides/offsetsFzzBlock pointers only support 32 bit `offsets/block_shape`, add a `.to(tl.int32)` or use regular indexing for 64 bit supportz3Unsupported element type in shape/strides/offsets: )r	  r"   r*   r4  r  r  rk  r+   r  r9   rO   r  r  rk   get_int64_tyr   r-   r]   )r#   r  r  s      r   _convert_elem_to_ir_valuer    s   $ "|D!!$%%  	1TZ////%///// 2F#z2F 2F 2F///$$TZ000TZ////%///// 2F#z2F 2F 2F///$$TZ000	D")	$	$ z1$$$&R$$$z  ""^^$^^^^:!!k!**4;8L8L8N8NPTPZPhPhPjPjkkkZ28##K#S S S S S{TTT

TTTTTr   c                h     t          |d          r fd|D             S t           |          gS )Nr  c                2    g | ]}t          |          S r   )r  )r&  r  r#   r  s     r   r'  z)_convert_to_ir_values.<locals>.<listcomp>%  s&    \\\$)'4EE\\\r   )r  r  )r#   	list_liker  s   ` `r   r  r  #  sJ    y*%% ]\\\\\R[\\\\%gy+FFGGr   basec           	        t          ||          }t          ||          }t          ||d          }| j                                        r| j        j                                        rt          d          | j        j        t          j        k    r8t          | t          j	        t          j
        | j        j                  |          } t          d          sgd D             t          d D                       s
J d            t          |d          s|g}d |D             }t          |          t          t!          t#          |                              k    s
J d	            t          fd
||||fD                       s
J d            |                    | j        ||||          }t          j        |t          j	        t          j        | j        j                                      S )NFr  zMExpected `base` to be a pointer type (but not a block pointer type or others)r  c                T    g | ]%}t          |t          j                  r|j        n|&S r   r  r  s     r   r'  z"make_block_ptr.<locals>.<listcomp>;  s/    bbbdD",!?!?I4::Tbbbr   c              3  `   K   | ])}t          |t                    od |cxk    odk     nc V  *dS )r  r  N)r	  r"   r  s     r   rD  z!make_block_ptr.<locals>.<genexpr><  sM      XXDz$$$?4)?)?)?)?%)?)?)?)?XXXXXXr   zGExpected a list of constant integers (`int32_t` range) in `block_shape`c                T    g | ]%}t          |t          j                  r|j        n|&S r   r  r  s     r   r'  z"make_block_ptr.<locals>.<listcomp>B  s/    VVV:dBL99CTZZtVVVr   z<Expected a permutation of (0, 1, ..., len(order)-1) in orderc              3  X   K   | ]$}t                    t          |          k    V  %d S rT   )r  )r&  r  r  s     r   rD  z!make_block_ptr.<locals>.<genexpr>F  s6      ddis;3y>>1ddddddr   zBExpected shape/strides/offsets/block_shape to have the same length)r  r]   rU   r  r   r)   r*   r   r_   r  r  r  r  r  rF  rG  r  r  create_make_block_ptrrk   r+   r   )r  r   stridesr  r  orderr#   rk   s       `   r   make_block_ptrr  )  s    "'511E#GW55G#GW%HHHG 9 j!5!>!>!@!@ jhiii yrw&&D"/"'493JKKWUU ;
++ $"mbbVabbbKXXKXXXXX R RQR R R 5*%% VVPUVVVE%==Ds5zz!2!2333335s333 ddddE7T[]bCcddddd M MLM M M **4;wQ\^cddF9VR_R]49;OQ\-]-]^^___r   c                    t          ||d          }t          j        |                    | j        |          | j                  S r  )r  r*   r+   create_advancerk   r]   )r  r  r#   s      r   advancer  P  s>    #GW%HHHG 9W++DKAA49MMMr   )r!   r"   r#   r$   r%   r&   )r3   r4   r5   r4   r%   r4   )r3   r4   r5   r4   rB   rC   r%   r4   )r   r4   r   r4   rQ   rC   r%   rR   )FFTF)rX   r&   rY   r&   r#   r$   r%   rZ   )rg   r&   rh   r&   r#   r$   r%   r&   )
rg   r&   rh   r&   r   rC   r#   r$   r%   r&   )r   r&   r   r&   r   r   r#   r$   )
r   r&   r   r&   r   r&   r   r   r#   r$   )rg   r&   rh   r&   r#   r$   r%   rZ   )rg   r&   r#   r$   )rg   r&   r%   r&   )rg   r&   r#   r$   r%   r&   )rg   r&   r#   r&   r%   r&   )r   r&   r%   r   )r  r"   r  r"   r#   r$   r%   r&   )r   r  r9   r4   r#   r$   r%   r&   )r  r&   r   r  r#   r$   r%   r&   )
rg   r&   r  r  r  rC   r#   r$   r%   r&   )rg   r&   r!   r"   r#   r$   r%   r&   )
rX   r&   rY   r&   r  rC   r#   r$   r%   r&   )r/  r&   r0  r&   r#   r$   r%   r&   )r/  r&   r#   r$   r%   rZ   )rg   r&   r?  r@  r#   r$   r%   r&   )rg   r&   r   r  r#   r$   r%   r&   )rX   r&   rY   r&   r#   r$   r%   r&   )r_  r`  )rg   r&   rh  r4   r#   r$   r%   r&   rT   )
rg   r&   rh  r4   r#   r$   rq  r`  r%   r&   )r  r&   r  r  rh   r  r  r   r  rV  r  rV  r  rV  r  rC   r#   r$   r%   r&   )
r  r&   r  rV  r  rV  r#   r$   r%   r&   )r  r&   r  r&   r#   r$   r%   r&   )r  r&   r  r&   r  r  r  rV  r  rV  r#   r$   r%   r&   )r  r&   r  r&   r  r&   r  rV  r  rV  r#   r$   r%   r&   )r  r&   r  r&   r  r&   r  rV  r#   r$   r%   r  )r  r&   r  r&   r  r&   r  rV  r  rV  r#   r$   r%   r&   )rX   r&   rY   r&   rP  r&   rN  r`  rQ  r"   rR  r4   r#   r$   r%   r&   )
rx  r&   r   r&   r   r&   r#   r$   r%   r&   )r~  r  r!   r"   r#   r$   r%   r  )
r~  r  r!   r"   r  rC   r#   r$   r%   r  )rg   r&   r  r"   r#   r$   r%   r&   )r   r&   r  r  r%   r&   )r#   r$   r%   r&   )
r  rV  r  r  r  rC   r#   r$   r%   r&   )r  r&   r  rV  r  rV  r  r"   r#   r$   r%   r&   )T)r  r&   r#   r$   r%   r&   )l
__future__r   typingr   r   r   r   r   _C.libtritonr
    r   r*   r   r   	Exceptionr   r/   r2   rA   rP   rW   rf   rq   rx   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rs   r   r   r   r   r   r   r   r  r  r  r  r!  r*  r.  r9  r>  rI  rS  r\   rg  r   r_   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r,  r0  r4  r9  r>  rB  rF  rO  rw  r  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r   r   <module>r     sW   " " " " " " ; ; ; ; ; ; ; ; ; ; ; ; ; ;                  GCLLF F F F F	 F F FD D D DF F F F@ @ @ @  ,  ,  ,  ,P	< 	< 	< 	< ejGK,1    ": : : :04 4 4 4	4 	4 	4 	4R R R R4: : : :& & & &4 4 4 465 5 5 5$5 5 5 5$	] 	] 	] 	]"   Q Q Q Q
P P P P
Q Q Q Q
' ' ' '& & & &" " " "R R R R
R R R R
Q Q Q Q   # # # #$ $ $ $) ) ) )4 4 4 44 4 4 44 4 4 44 4 4 4	4 	4 	4 	4	4 	4 	4 	4"D D D D"( ( ( (.H H H H[ [ [ [M M M MK K K K   2
 
 
 
I I I IL L L L$2 2 2 2tp p p pZ Z Z Z( 04o5 o5 o5 o5 o5n	 	 	   	 	 		 	 	     
 
 
w w w,7= 7= 7=tn n n n    g g g g
  :)q )q )qXW W W W,j j j j   6$) $) $) $)N$) $) $) $)No o o o            8 8 8X X X X@Z Z Z Z&     u u u u.o o o o2n n n n         8 8 8 8K K K Ke e e eU U U0H H H H$` $` $` $`NN N N N N Nr   