
    çg{                     f   d dl Z ddlmZmZ ddlmZ ddlmZ d Z edd i           ed	d
 i          ed	ej        fd                                    Z	 edd i           ed	d i          ed	ej        fd                                    Z
 G d de j        j                  Zej        ZdS )    N   )
heuristicsjit)languagenext_power_of_2c                 &    | dk     rdS | dk     rdS dS )Ni      i           )Ns    T/var/www/html/ai-engine/env/lib/python3.11/site-packages/triton/ops/cross_entropy.py	num_warpsr      s#    4xxq	
Tq2    r   c                 ,    t          | d                   S Nr   r   nargss    r   <lambda>r          	%*(=(= r   BLOCKc                 ,    t          | d                   S r   r   r   s    r   r   r          OE#J$?$? r   c                    t          j        d          }t          j        d|          }t          j        ||z             }| ||z  z   |z   } |||z  z   |z   }	|||z  z   |z   }
t          j        | ||k     t	          d                     }|                    t           j                  }|t          j        |d          z
  }t          j        t          j	        t          j
        |          d                    |z
  }t          j        |	|||k                t          j                     t          j        |
          }t          j        ||z   |           d S Nr   inf)maskother)r   )tl
program_idarangeloadfloattofloat32maxlogsumexpstoredebug_barrier)LOGITSPROBSIDXLOSSr   r   rowcolsidx
WRIT_PROBS
READ_PROBSlogitsprobss                r   _forwardr9      s8    -

C9QD
'#)

CcAg$Fq4'Jq3&JWV$(5<<-@@@FYYrz""FbfVQ'''FF26"&..!,,--6EHZTAX.... GJEHTCZr   c                 ,    t          | d                   S r   r   r   s    r   r   r   )   r   r   c                 ,    t          | d                   S r   r   r   s    r   r   r   *   r   r   c                 
   t          j        d          }t          j        d|          }t          j        ||z             }| ||z  z   |z   } t          j        | ||k     t	          d                     }t          j        |                    t           j                            }||k    }	t          j        ||z             }
||	z
  |
z  }t          j        | |                    | j	        j
                  ||k                d S r   )r!   r"   r#   r$   r%   r+   r&   r'   r,   dtype
element_ty)r/   r0   DPROBSr   r   r2   r3   r4   r8   deltadoutdins               r   	_backwardrC   )   s     -

C9QD
'#)

CC!GOd"E WUu>>>>EF588BJ''((ECKE76C<  D5=D
 CHUCFF5;122BBBBBBr   c                   :    e Zd Zed             Zed             ZdS )_cross_entropyc                 D  	 |j         t          j        k    s
J d            j        j         }}j        d         	t          j        |||          }t          j        ||          }	fd}t          |         |||	           |                    ||           |S )Nz(Indices are expected to be of type long.)r=   devicec                 4                                     z  fS Nnumel)optr7   n_colss    r   r   z(_cross_entropy.forward.<locals>.<lambda>I   s    FLLNNf47 r   )r=   torchint64rH   shape
empty_liker9   save_for_backward)
clsctxr7   indicesrH   r=   resultneg_logprobsgridrN   s
     `      @r   forwardz_cross_entropy.forward?   s     ,,,/Y,,,v|b!!'vFFF'eFKKK77777v|WffEEElG444r   c                 x    |j         \  }j        d         fd}t          |         ||           dfS )a  We know d(-log(p[i])/dlogit[k] = -id_mat[i,k] + p[k]
        so we initialize the gradient as neg_logprobs, so we can just exponentiate
        to get p[k], which is most of what we need...  neg_logprobs will be
        modified in place to become the gradient we want
        rG   c                 4                                     z  fS rJ   rK   )rM   rN   rX   s    r   r   z)_cross_entropy.backward.<locals>.<lambda>[   s    L..00F:= r   N)saved_tensorsrQ   rC   )rT   rU   dneg_logprobsrV   rY   rN   rX   s        @@r   backwardz_cross_entropy.backwardO   sV     !$ 1g #B'=====$g}fEEET!!r   N)__name__
__module____qualname__classmethodrZ   r_   r   r   r   rE   rE   =   sH          [ " " [" " "r   rE   )rO    r   r   r   r!   r   r   	constexprr9   rC   autogradFunctionrE   applycross_entropyr   r   r   <module>rj      sj                          [==>??W??@AA         BA @? , [==>??W??@AACBL C C C  BA @?C" "  "  "  "  "U^,  "  "  "F $r   