
    çgF                        d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ ej                            ej                            e                    Zej                            ed          gZd Z e j                    d	             Zd
 Z G d de          Zd Zd Z G d de          Z G d de          ZdS )    N)Path)_build)get_cache_manager)	GPUTarget)	GPUDriverincludec                 (    dd l }|                                dk    rd S dd lddlm}m}m}mmm}  G fddj	                  }
                    | ||           ||           ||                    }	                     d          j        }n#  Y d S xY w|g|_        ||_        d                    dz             }	 fd	}
 | ||
          |	          r't!          j                            |	                    S d S )
Nr   Linux)c_charc_intc_size_tc_void_pc_char_pPOINTERc                        e Zd Zdfd fgZdS )8_find_already_mmapped_dylib_on_linux.<locals>.DlPhdrInfo	dlpi_addr	dlpi_nameN)__name__
__module____qualname___fields_)r   r   s   V/var/www/html/ai-engine/env/lib/python3.11/site-packages/triton/backends/amd/driver.py
DlPhdrInfor      s&        (#(#
    r   z	libc.so.6i      c           
          | j         j        }t          t          j        |                    }|j        v r4                    ||t          t          |                               dS dS )Nr   r   )	contentsr   r   osfsdecodenamememmoveminlen)infosizedatar   pctypeslib_namemax_path_lengths        r   callbackz6_find_already_mmapped_dylib_on_linux.<locals>.callback2   sa    M+	Y''((qvNN4CY,P,PQQQ1qr   )platformsystemr)   r   r   r   r   r   r   	Structure	CFUNCTYPECDLLdl_iterate_phdrargtypesrestypecreate_string_bufferr   r    	string_at)r*   r-   r   r   r   r   r   
callback_tr2   pathr,   r   r   r)   r+   s   `          @@@@r   $_find_already_mmapped_dylib_on_linuxr9      s   OOOG##t
 MMMKKKKKKKKKKKKKKKK
 
 
 
 
 
 
 
V% 
 
 
 !!%)<)<ggh>O>OQXQXY_Q`Q`aaJ ++k22Btt *H5O#OO&&':;;D       zz(++T22 3{6++D112224s   B B"c                  x   dt          j        d          } | rK|                               r!t           j                            |           r| S t          d|  d           t                    }|r6t           j                            |          r|S t          d| d           g }dd l}|                                }|	                                }|j
        r|g|z   }|D ]\}t           j                            |dd	          }t           j                            |          r|c S |                    |           ]t          j        d
          }|rp|                    d          D ]Z}t           j                            |          }	t           j                            |	          r|	c S |                    |	           [t          j        ddg                                          }
fd|
                                D             }|D ]:}t           j                            |          r|c S |                    |           ;t           j                            d          }t           j                            |          r|S |                    |           t          d d|           )Nzlibamdhip64.soTRITON_LIBHIP_PATHzTRITON_LIBHIP_PATH 'z' does not point to a valid zmemory mapped 'z'' in process does not point to a valid r   torchlibLD_LIBRARY_PATH:z/sbin/ldconfigz-pc                     g | ]C}|                                                               )|                                d          DS ))stripendswithsplit).0liner*   s     r   
<listcomp>z2_get_path_to_hip_runtime_dylib.<locals>.<listcomp>p   sB    ^^^djjll>S>ST\>]>]^DJJLL^^^r   z/opt/rocm/lib/zcannot locate z after attempted paths )r   getenvrC   r8   existsRuntimeErrorr9   sitegetsitepackagesgetusersitepackagesENABLE_USER_SITEjoinappendrD   
subprocesscheck_outputdecode
splitlines)env_libhip_pathmmapped_pathpathsrK   site_packages	user_siter8   env_ld_library_pathdflibslocsloccommon_install_pathr*   s                 @r   _get_path_to_hip_runtime_dylibra   @   s   H i 455O k##H-- 	#"'..2Q2Q 	#""i/ii_giijjj 8AAL n7>>,'' 	 l\llbjllmmmEKKK ((**M((**I 4"m3  w||D'5(;;7>>$ 	KKKT )$566 $**3// 	 	AQ))Aw~~a   LLOOOO "$4d#;<<CCEED _^^^):):^^^D  7>># 	JJJS ',,'7BB	w~~)** #""	LL$%%%
PPPPP
Q
QQr   c           	      \   t          j        |                     d                                                    }t	          |          }|                    | d          }|t          j                    5 }t          j	        
                    |d          }t          |d          5 }|                    |            d d d            n# 1 swxY w Y   t          |||g t          g           }t          |d          5 }|                    |                                | dd          }d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   dd l}	|	j                            ||          }
|	j                            |
          }|
j                            |           |S )	Nzutf-8z.sozmain.cwrbT)binaryr   )hashlibsha256encode	hexdigestr   get_filetempfileTemporaryDirectoryr   r8   rO   openwriter   include_dirputreadimportlib.utilutilspec_from_file_locationmodule_from_specloaderexec_module)srcr!   keycache
cache_pathtmpdirsrc_pathr\   so	importlibspecmods               r   compile_module_from_srcr      sK   
.G,,
-
-
7
7
9
9Cc""E4--J(** 	Lfw||FH55Hh$$               hKDDBb$ L1"YYqvvxxDdYKK
L L L L L L L L L L L L L L L	L 	L 	L 	L 	L 	L 	L 	L 	L 	L 	L 	L 	L 	L 	L >11$
CCD
.
)
)$
/
/CKC   JsZ   61E'C	=E	C	EC	,E=.D7+E7D;	;E>D;	?EEEc                   $     e Zd Z fdZd Z xZS )HIPUtilsc                     t          | d          s-t          t          |                               |           | _        | j        S )Ninstance)hasattrsuperr   __new__r   )cls	__class__s    r   r   zHIPUtils.__new__   s<    sJ'' 	= 3//77<<CL|r   c                 (   t                      }t          t          j                            t
          d                                                    }|                    d|d          }t          |d          }|j	        | _	        |j
        | _
        d S )Nzdriver.cz/*py_libhip_search_path*/r   	hip_utils)ra   r   r   r8   rO   dirname	read_textreplacer   load_binaryget_device_properties)selflibhip_pathrx   r   s       r   __init__zHIPUtils.__init__   sx    46627<<4455??AA kk5{AFF%c;77?%(%>"""r   )r   r   r   r   r   __classcell__r   s   @r   r   r      sG            
	? 	? 	? 	? 	? 	? 	?r   r   c                 N    | d         dk    rdS ddddddd	d
dddddddd|          S )Nr   *hipDeviceptr_tint32_tint8_tint16_tint64_tuint32_tuint8_tuint16_tuint64_tfloatdoublei1i8i16i32i64u1u8u16u32u64fp16bf16fp32f32fp64 tys    r   	ty_to_cppr      s[    	!u||   	!
 
r   c                     t          |          }d                    d |                                D                       }d d d                    fd|                                D                       }d|z   }t          |          dk    r4dd                    d	 |                                D                       z   nd}t	                      }	 fd
|                                D             }
d|	 dt          |          dk    rd|z   nd dd                    d |
D                        d| dd                    fd|                                D                        d| d| dd                    d |                                D                        dt          |          dk    r4dd                    d |                                D                       z   nd d}|S )N, c              3   D   K   | ]\  }}t          |           d | V  dS )z argN)r   rE   ir   s      r   	<genexpr>z make_launcher.<locals>.<genexpr>   s:      SS2Yr]]3333SSSSSSr   c                 N    | d         dk    rdS ddddddd	d
dddddddd|          S )Nr   r   	PyObject*r   r   r   r   r   r   r   r   r   r   r   r   r   s    r   _extracted_typez&make_launcher.<locals>._extracted_type   sZ    a5C<<;
 
  ! 	r   c                 ,    dddddddddd	d
dd|          S )NOr\   r[   lbhr   BHIK)r   r   r   longr   r   r   r   r   r   r   r   r   r   s    r   	format_ofz make_launcher.<locals>.format_of   s?    
 
  	r    c                 8    g | ]}  |                    S r   r   )rE   r   r   r   s     r   rG   z!make_launcher.<locals>.<listcomp>   s-    WWWb99__R%8%899WWWr   	iiiKKOOOOr   c              3   &   K   | ]\  }}d | V  dS )z&_argNr   r   s      r   r   z make_launcher.<locals>.<genexpr>   s,       L LB L L L L L Lr   c                     g | ]}|v|	S r   r   )rE   r   	constantss     r   rG   z!make_launcher.<locals>.<listcomp>   s#    @@@AQi-?-?a-?-?-?r   a;  
#define __HIP_PLATFORM_AMD__
#include <hip/hip_runtime.h>
#include <Python.h>
#include <dlfcn.h>
#include <stdbool.h>
#include <dlfcn.h>

// The list of paths to search for the HIP runtime library. The caller Python
// code should substitute the search path placeholder.
static const char *hipLibSearchPaths[] = {"a  "};

// The list of HIP dynamic library symbols and their signature we are interested
// in this file.
#define HIP_SYMBOL_LIST(FOR_EACH_ERR_FN, FOR_EACH_STR_FN)                     \
  FOR_EACH_STR_FN(hipGetErrorString, hipError_t hipError)                     \
  FOR_EACH_ERR_FN(hipModuleLaunchKernel, hipFunction_t f,                     \
                  unsigned int gridDimX, unsigned int gridDimY,               \
                  unsigned int gridDimZ, unsigned int blockDimX,              \
                  unsigned int blockDimY, unsigned int blockDimZ,             \
                  unsigned int sharedMemBytes, hipStream_t stream,            \
                  void **kernelParams, void **extra)                          \
  FOR_EACH_ERR_FN(hipPointerGetAttribute, void *data,                         \
                  hipPointer_attribute attribute, hipDeviceptr_t ptr)

// The HIP symbol table for holding resolved dynamic library symbols.
struct HIPSymbolTable {
#define DEFINE_EACH_ERR_FIELD(hipSymbolName, ...)                             \
  hipError_t (*hipSymbolName)(__VA_ARGS__);
#define DEFINE_EACH_STR_FIELD(hipSymbolName, ...)                             \
  const char *(*hipSymbolName)(__VA_ARGS__);

  HIP_SYMBOL_LIST(DEFINE_EACH_ERR_FIELD, DEFINE_EACH_STR_FIELD)
};

static struct HIPSymbolTable hipSymbolTable;

bool initSymbolTable() {
  // Use the HIP runtime library loaded into the existing process if it exits.
  void *lib = dlopen("libamdhip64.so", RTLD_NOLOAD);
  if (lib) {
    // printf("[triton] chosen loaded libamdhip64.so in the process\n");
  }

  // Otherwise, go through the list of search paths to dlopen the first HIP
  // driver library.
  if (!lib) {
    int n = sizeof(hipLibSearchPaths) / sizeof(hipLibSearchPaths[0]);
    for (int i = 0; i < n; ++i) {
      void *handle = dlopen(hipLibSearchPaths[i], RTLD_LAZY | RTLD_LOCAL);
      if (handle) {
        lib = handle;
        // printf("[triton] chosen %s\n", hipLibSearchPaths[i]);
      }
    }
  }
  if (!lib) {
    PyErr_SetString(PyExc_RuntimeError, "cannot open libamdhip64.so");
    return false;
  }

  // Resolve all symbols we are interested in.
  dlerror(); // Clear existing errors
  const char *error = NULL;
#define QUERY_EACH_FN(hipSymbolName, ...)                                     \
  *(void **)&hipSymbolTable.hipSymbolName = dlsym(lib, #hipSymbolName);       \
  error = dlerror();                                                          \
  if (error) {                                                               \
    PyErr_SetString(PyExc_RuntimeError,                                       \
                    "cannot query " #hipSymbolName " from libamdhip64.so");   \
    dlclose(lib);                                                             \
    return false;                                                             \
  }

  HIP_SYMBOL_LIST(QUERY_EACH_FN, QUERY_EACH_FN)

  return true;
}

static inline void gpuAssert(hipError_t code, const char *file, int line)
{
   if (code != HIP_SUCCESS)
   {
      const char* prefix = "Triton Error [HIP]: ";
       const char* str = hipSymbolTable.hipGetErrorString(code);
      char err[1024] = {0};
      snprintf(err, 1024, "%s Code: %d, Messsage: %s", prefix, code, str );
      PyErr_SetString(PyExc_RuntimeError, err);
   }
}

#define HIP_CHECK(ans) { gpuAssert((ans), __FILE__, __LINE__); }

static void _launch(int gridX, int gridY, int gridZ, int num_warps, int num_ctas, int clusterDimX, int clusterDimY, int clusterDimZ, int shared_memory, hipStream_t stream, hipFunction_t functionz>) {
  // printf("_launch hip kernel\n");
  void *params[] = { c              3       K   | ]	}d | V  
dS )z&argNr   )rE   r   s     r   r   z make_launcher.<locals>.<genexpr>M  s(       < < < < < < < <r   zw };
  if (gridX*gridY*gridZ > 0) {
      HIP_CHECK(hipSymbolTable.hipModuleLaunchKernel(function, gridX, gridY, gridZ, aw  *num_warps, 1, 1, shared_memory, stream, params, 0));
    }
  }

typedef struct _DevicePtrInfo {
    hipDeviceptr_t dev_ptr;
    bool valid;
} DevicePtrInfo;

static inline DevicePtrInfo getPointer(PyObject *obj, int idx) {
  DevicePtrInfo ptr_info;
  ptr_info.dev_ptr = 0;
  ptr_info.valid = true;
  if (PyLong_Check(obj)) {
    ptr_info.dev_ptr = (hipDeviceptr_t)PyLong_AsUnsignedLongLong(obj);
    return ptr_info;
  }
  if (obj == Py_None) {
    // valid nullptr
    return ptr_info;
  }
  PyObject *ptr = PyObject_GetAttrString(obj, "data_ptr");
  if(ptr){
    PyObject *empty_tuple = PyTuple_New(0);
    PyObject *ret = PyObject_Call(ptr, empty_tuple, NULL);
    Py_DECREF(empty_tuple);
    Py_DECREF(ptr);
    if (!PyLong_Check(ret)) {
      PyErr_SetString(PyExc_TypeError, "data_ptr method of Pointer object must return 64-bit int");
      ptr_info.valid = false;
      return ptr_info;
    }
    ptr_info.dev_ptr = (hipDeviceptr_t)PyLong_AsUnsignedLongLong(ret);
    if(!ptr_info.dev_ptr)
      return ptr_info;
    uint64_t dev_ptr;
    hipError_t status = hipSymbolTable.hipPointerGetAttribute(&dev_ptr, HIP_POINTER_ATTRIBUTE_DEVICE_POINTER, ptr_info.dev_ptr);
    if (status == hipErrorInvalidValue) {
        PyErr_Format(PyExc_ValueError,
                     "Pointer argument (at %d) cannot be accessed from Triton (cpu tensor?)", idx);
        ptr_info.valid = false;
    }
    ptr_info.dev_ptr = (hipDeviceptr_t)dev_ptr;
    Py_DECREF(ret);
    return ptr_info;
  }
  PyErr_SetString(PyExc_TypeError, "Pointer argument must be either uint64 or have data_ptr method");
  return ptr_info;
}

static PyObject* launch(PyObject* self, PyObject* args) {
   // printf("launch\n");
  int gridX, gridY, gridZ;
  uint64_t _stream;
  uint64_t _function;
  PyObject *launch_enter_hook = NULL;
  PyObject *launch_exit_hook = NULL;
  PyObject *kernel_metadata = NULL;
  PyObject *launch_metadata = NULL;
   c                 8    g | ]\  }} |           d | dS )z _arg; r   )rE   r   r   r   s      r   rG   z!make_launcher.<locals>.<listcomp>  s8    OOOEAr##//!///OOOr   z
  if(!PyArg_ParseTuple(args, "z", &gridX, &gridY, &gridZ, &_stream, &_function,
                                           &kernel_metadata, &launch_metadata,
                                           &launch_enter_hook, &launch_exit_hook a=  )) {
    return NULL;
  }

  // extract kernel metadata
  int num_warps, num_ctas, shared_memory, clusterDimX, clusterDimY, clusterDimZ;
  if (!PyArg_ParseTuple(kernel_metadata, "iiiiii", &num_warps, &num_ctas, &shared_memory, &clusterDimX, &clusterDimY, &clusterDimZ)) {
    return NULL;
  }
  // extract launch metadata
  if (launch_enter_hook != Py_None){
    PyObject* args = Py_BuildValue("(O)", launch_metadata);
    PyObject* ret = PyObject_CallObject(launch_enter_hook, args);
    Py_DECREF(args);
    if (!ret)
      return NULL;
  }


  // raise exception asap
  r   c                 N    g | ]"\  }}|d          dk    rd| d| d| d| d	nd#S )r   r   zDevicePtrInfo ptr_infoz = getPointer(_argr   z); if (!ptr_infoz.valid) return NULL;r   r   r   s      r   rG   z!make_launcher.<locals>.<listcomp>  s~      d  d  d  IN  IJ  LNoqrsotx{o{o{kqkkAkkkkTUkkkk  BD  d  d  dr   z;
  _launch(gridX, gridY, gridZ, num_warps, num_ctas, clusterDimX, clusterDimY, clusterDimZ, shared_memory, (hipStream_t)_stream, (hipFunction_t)_functionc              3   J   K   | ]\  }}|d          dk    rd| dnd| V  dS )r   r   ptr_infoz.dev_ptr_argNr   r   s      r   r   z make_launcher.<locals>.<genexpr>  s        j~  j~  ch  cd  fh  EG  HI  EJ  LO  EO  EO  kA  vw  kA  kA  kA  kA  U_  \]  U_  U_  j~  j~  j~  j~  j~  j~r   an  );

  if(launch_exit_hook != Py_None){
    PyObject* args = Py_BuildValue("(O)", launch_metadata);
    PyObject* ret = PyObject_CallObject(launch_exit_hook, args);
    Py_DECREF(args);
    if (!ret)
      return NULL;
  }

  if(PyErr_Occurred()) {
    return NULL;
  }
  // return None
  Py_INCREF(Py_None);
  return Py_None;
}

static PyMethodDef ModuleMethods[] = {
  {"launch", launch, METH_VARARGS, "Entry point for all kernels with this signature"},
  {NULL, NULL, 0, NULL} // sentinel
};

static struct PyModuleDef ModuleDef = {
  PyModuleDef_HEAD_INIT,
  "__triton_launcher",
  NULL, //documentation
  -1, //size
  ModuleMethods
};

PyMODINIT_FUNC PyInit___triton_launcher(void) {
  if (!initSymbolTable()) {
    return NULL;
  }
  PyObject *m = PyModule_Create(&ModuleDef);
  if(m == NULL) {
    return NULL;
  }
  PyModule_AddFunctions(m, ModuleMethods);
  return m;
}
)r$   rO   itemsvaluesra   keys)r   	signatureids	warp_size
start_desc	arg_declsargs_formatformat	args_listr   paramsrx   r   r   s   `           @@r   make_launcherr      sm   YJ		SSARARSSSSSI  *    ''WWWWWIDTDTDVDVWWWXXK;&FPST]P^P^abPbPbtyy L L)//:K:K L L LLLLLhjI022K A@@@))@@@F^ .9^ ^z X[  \e  Xf  Xf  ij  Xj  Xj  DH  KT  DT  DT  pr{^ ^~ yy < <V < < <<<^ ^B V_C^ ^x 88OOOOY__=N=NOOOPPy^ ^z !'{^ ^~ S\^ ^f 99  d  d  R[  Ra  Ra  Rc  Rc  d  d  d  e  eg^ ^h BE  FO  BP  BP  ST  BT  BT  Z^  ae  aj  aj  j~  j~  lu  l{  l{  l}  l}  j~  j~  j~  a~  a~  Z~  Z~  Z\i^ ^ ^C~ Jr   c                       e Zd Zd Zd ZdS )HIPLauncherc                    dt          d          rj        j        nt                      i}t          d          rj        nt                      }fdfd|                                D             }fdj                                        D             }t          ||||j	                  t          d          }|j        | _        d S )Nids_of_const_exprsfnr   c                 p    t          | t                    rj        j                            |           n| S N)
isinstancestrr   	arg_namesindex)r   rx   s    r   <lambda>z&HIPLauncher.__init__.<locals>.<lambda>  s/    As9K9KRCF,221555QR r   c                 .    i | ]\  }} |          |S r   r   rE   ry   valuecst_keys      r   
<dictcomp>z(HIPLauncher.__init__.<locals>.<dictcomp>  s'    MMMZS%WWS\\5MMMr   c                 .    i | ]\  }} |          |S r   r   r   s      r   r   z(HIPLauncher.__init__.<locals>.<dictcomp>  s'    QQQZS%WWS\\5QQQr   __triton_launcher)r   r   
constexprstupler   dictr   r   r   r   r   launch)r   rx   metadatar   r   r   r   r   s    `     @r   r   zHIPLauncher.__init__  s    #'#t:L:L%YSV%6%6RWRYRYZ%,S+%>%>JCMMDFF	RRRRMMMM9??;L;LMMM	QQQQ3=;N;N;P;PQQQ	Iy#x7IJJ%c+>??jr   c                       | j         |i | d S r   )r   )r   argskwargss      r   __call__zHIPLauncher.__call__  s    T$V$$$$$r   N)r   r   r   r   r  r   r   r   r   r     s2        ! ! !% % % % %r   r   c                   :     e Zd Z fdZed             Zd Z xZS )	HIPDriverc                     t                                                       t                      | _        t          | _        d S r   )r   r   r   utilsr   launcher_cls)r   r   s    r   r   zHIPDriver.__init__  s2    ZZ
'r   c                  &    dd l } | j        j        d uS )Nr   )r<   versionhip)r<   s    r   	is_activezHIPDriver.is_active  s    } ,,r   c                     |                                  }| j                            |          }|d         }|d         }t          d|                    d          d         |          S )NarchwarpSizer  r?   r   )get_current_devicer  r   r   rD   )r   devicedevice_propertiesr  r   s        r   get_current_targetzHIPDriver.get_current_target  s^    ((** J<<VDD (%j1	

3 2I>>>r   )r   r   r   r   staticmethodr  r  r   r   s   @r   r  r    sb        ( ( ( ( (
 - - \-? ? ? ? ? ? ?r   r  )	functoolsr   rf   rQ   rk   pathlibr   triton.runtime.buildr   triton.runtime.cacher   triton.backends.compilerr   triton.backends.driverr   r8   r   realpath__file__rO   ro   r9   	lru_cachera   r   objectr   r   r   r   r  r   r   r   <module>r     s       				             ' ' ' ' ' ' 2 2 2 2 2 2 . . . . . . , , , , , ,
'//"'**844
5
5w||GY//0- - -` ;R ;R ;R|  &? ? ? ? ?v ? ? ?(
 
 
,Q Q Qh% % % % %& % % % ? ? ? ? ?	 ? ? ? ? ?r   