
     NgGX                     N   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZ d dlZd dlZd dlZd dlZd dlmZ d dlZ ej        e          Z G d	 d
e          Z G d de          Z  G d d          Z!dej"        iZ#ddddddi fdZ$d*dZ%d+dZ&d Z'd Z(d Z)d Z*d,dZ+ej,        d fdZ-d Z.d-dZ/deeee0ef                           fd Z1 G d! d"e          Z2 G d# d$e2          Z3 G d% d&e2          Z4d.d(Z5d) Z6dS )/    N)ABCabstractmethod)ThreadPoolExecutor)datetime)Enum)sleep)AnyDictListOptional)versionc                   $    e Zd ZdZdZdZdZd ZdS )	Precisionfp32fp16int8int4c                     | j         S Nvalueselfs    e/var/www/html/ai-engine/env/lib/python3.11/site-packages/onnxruntime/transformers/benchmark_helper.py__str__zPrecision.__str__&   
    z    N)__name__
__module____qualname__FLOAT32FLOAT16INT8INT4r    r   r   r   r       s7        GGDD    r   r   c                        e Zd ZdZdZdZd ZdS )OptimizerInfono_optby_ort	by_scriptc                     | j         S r   r   r   s    r   r   zOptimizerInfo.__str__1   r   r   N)r   r   r    NOOPTBYORTBYSCRIPTr   r%   r   r   r'   r'   *   s4         EEH    r   r'   c                        e Zd Zd Zd Zd ZdS )ConfigModifierc                     || _         d S r   
num_layers)r   r3   s     r   __init__zConfigModifier.__init__6   s    $r   c                    | j         d S t          |d          r.| j         |_        t                              d| j                     t          |d          r.| j         |_        t                              d| j                     t          |d          r0| j         |_        t                              d| j                     d S d S )Nnum_hidden_layersz6Modifying pytorch model's number of hidden layers to: encoder_layersz7Modifying pytorch model's number of encoder layers to: zdecoder_layers z7Modifying pytorch model's number of decoder layers to: )r3   hasattrr6   loggerinfor7   decoder_layers)r   configs     r   modifyzConfigModifier.modify9   s    ?"F6.// 	d'+F$KKbQUQ`bbccc6+,, 	e$(OF!KKcRVRaccddd6,-- 	e$(OF!KKcRVRaccddddd	e 	er   c                     | j         S r   r2   r   s    r   get_layer_numzConfigModifier.get_layer_numF   s
    r   N)r   r   r    r4   r=   r?   r%   r   r   r0   r0   5   sD        % % %e e e    r   r0   float32TFc	                    d }		 t          j                    }
|rt           j        j        |
_        nt           j        j        |
_        |rd|
_        |dk    r)||
_        t          	                    d|
j                    |rd|
_
        nd|
_
        t          	                    d|             |r<|dk    rddg}n4|d	k    rd
dg}n)|dk    rg d}n|dk    rddg}n|dk    rg d}nddg}ndg}rfd|D             }|r|
                    dd           t          j        | |
|          }	n,# t          $ r t                              dd           Y nw xY w|	S )NTr   z%Session option: intra_op_num_threads=   zCreate session for onnx model: dmlDmlExecutionProviderCPUExecutionProviderrocmROCMExecutionProvidermigraphx)MIGraphXExecutionProviderrH   rF   cudaCUDAExecutionProvidertensorrt)TensorrtExecutionProviderrL   rF   c                 0    g | ]}|v r
||         fn|S r%   r%   ).0nameprovider_optionss     r   
<listcomp>z.create_onnxruntime_session.<locals>.<listcomp>   s6    tttbf4CS;S;S$ 0 677Y]tttr   z(mlas.enable_gemm_fastmath_arm64_bfloat161)	providers	Exception)exc_info)onnxruntimeSessionOptionsGraphOptimizationLevelORT_ENABLE_ALLgraph_optimization_levelORT_ENABLE_BASICenable_profilingintra_op_num_threadsr9   debuglog_severity_leveladd_session_config_entryInferenceSessionrV   error)onnx_model_pathuse_gpuproviderenable_all_optimizationnum_threadsr^   verbose(enable_mlas_gemm_fastmath_arm64_bfloat16rR   sessionsess_optionsrU   s           `   r   create_onnxruntime_sessionrn   P   s    G51"133" 	h4?4V4eL114?4V4gL1 	1,0L)??0;L-LLdAbddeee 	0./L++./L+HHHIII 	15  35KL		V##46LM		Z''  		
 V##46LM		Z''  		 56LM		/0I 	uttttjstttI3 	c112\^abbb.Xabbb 1 1 1[4000001 Ns   D(D. .&EEc                     | rt          j        dd           d S t          j        d           t          j        d                              t          j                   d S )NDEBUGz8[%(filename)s:%(lineno)s - %(funcName)20s()] %(message)s)levelfmtz%(message)s)rr   transformers)coloredlogsinstalllogging	getLoggersetLevelWARNING)rj   s    r   setup_loggerrz      sr     DJ	
 	
 	
 	
 	
 	

 	.....))227?CCCCCr   c                    | r3t           j                            |           st          j        |            |r3t           j                            |          st          j        |           |re|dk    r dt	          j                    v s
J d            n?t          t	          j                                                  g d          r
J d            t          	                    dt          j                    t          	                    dt          j                    t          	                    dt          j                    t          j        t          j                  t          j        d	          k    sJ t          j        t          j                  t          j        d
          k    sJ t          j        t          j                  t          j        d	          k    sJ d S )NrD   rE   zBPlease install onnxruntime-directml package to test GPU inference.)rL   rH   rJ   zWPlease install onnxruntime-gpu package, or install ROCm support, to test GPU inference.zPyTorch Version:zTransformers Version:zOnnxRuntime Version:z1.10.0z4.12.0)ospathexistsmakedirsrX   get_available_providersset
isdisjointr9   r:   torch__version__rs   r   parse)	cache_dir
output_dirrf   rg   s       r   prepare_environmentr      s    	22 
I  "'..44  
J 	iu&+*M*O*OOOOS POOO ;>@@AALL___  i ihi i i KK65#466777
KKB(@BBCCC
KK@{'>@@AAA =*++w}X/F/FFFFF=122gmH6M6MMMMM=011W]85L5LLLLLLLr   c                    t          |           t          t          |                     z  dz  }t          j        | t          j                  dz  }|d|z  z  }t          |           |dt          j        | d          dz  dt          j        | d          dz  dt          j        | d          dz  d|d|ddS )Ng     @@)dtypez.2fZ   _   c   )
test_timeslatency_variancelatency_90_percentilelatency_95_percentilelatency_99_percentileaverage_latency_msQPS)sumfloatlennumpyvarfloat64
percentile)latency_list
batch_size
latency_msr   
throughputs        r   get_latency_resultr      s    \""U3|+<+<%=%==FJyU]CCCfLv
23J ,''/55$)$4\2$F$F$O!U!U$)$4\2$F$F$O!U!U$)$4\2$F$F$O!U!U!+11""  r   c                 (   t          |ddd          5 }g d}t          j        ||          }|                                 | D ]}|                    |           	 d d d            n# 1 swxY w Y   t
                              d|            d S )Na asciimodenewlineencoding)enginer   rU   device	precision	optimizer
io_binding
model_nameinputsthreadsr   sequence_lengthcustom_layer_numr   r   r   r   r   r   r   r   
fieldnamesz&Detail results are saved to csv file: )opencsv
DictWriterwriteheaderwriterowr9   r:   )resultscsv_filenamecsv_filecolumn_names
csv_writerresults         r   output_detailsr      s    	lb7	C	C	C (x
 
 
0 ^HFFF
    	( 	(F''''	(7( ( ( ( ( ( ( ( ( ( ( ( ( ( (< KKGGGHHHHHs   A	A**A.1A.c                     t          |ddd          5 }g dg }|j        D ]L}|j        dgk    r|                    d|            '|j        D ]}|                    d| d|            Mt	          j        ||z             }|                                 |j        D ]#}d	D ]}	|j        D ]}
d
D ]	}|j	        D ]}i }| D ]}|d         |k    r|d         |	k    r|d         |
k    r|d         |k    r|d         |k    rfd|
                                D             }|s5|                    |           |                    d |D                        nD ]}||         ||         k    sJ |d         }|d         }|r|d         |d| d| <   |d         |d| <   |r|                    |           %	 d d d            n# 1 swxY w Y   t                              d|            d S )Nr   r   r   r   )r   r   r   r   r   rU   r   r   r   r   r   b_sr   )         )TFr   r   r   r   r   r   c                 $    i | ]\  }}|v 	||S r%   r%   )rP   kvheader_namess      r   
<dictcomp>z"output_summary.<locals>.<dictcomp>  s+    .d.d.d1RSWcRcRcq!RcRcRcr   c                     i | ]}|d S )r   r%   )rP   r   s     r   r   z"output_summary.<locals>.<dictcomp>  s    3N3N3NaAr3N3N3Nr   r   r   r   z'Summary results are saved to csv file: )r   batch_sizessequence_lengthsappendr   r   r   modelsenginesri   itemsupdater   r9   r:   )r   r   argsr   
data_namesr   r   r   r   input_countengine_namer   r   rowr   headersr   r   sr   s                      @r   output_summaryr      s)   	lb7	C	C	C 49x
 
 
 
* 	K 	KJ$,,!!"2j"2"23333'+'< K KO%%&I*&I&I&I&IJJJJK ^H
9RSSS
   + 	9 	9J( 9 9#'< 9 9K&7 9 9
'+'7 9 9G"$C*1 T T$*<$8J$F$F(.x(8K(G(G(.x(8K(G(G(.|(<
(J(J(.y(9W(D(D.d.d.d.d.d.d.dG+. %H(+

7(;(;(;(+

3N3N:3N3N3N(O(O(O(O1= )H )HA36q6WQZ3G3G3G3G3G(.|(<A(./@(AA'( %T<BCW<XLLLQLL(9(97=>R7SGGG" 9 * 3 3C 8 8 819999	9149 49 49 49 49 49 49 49 49 49 49 49 49 49 49l KKH,HHIIIIIs   F4GGGc           
         t          |ddd          5 }ddddgt          t          t          |                                                                                               }t          j        ||	          }|                                 | D ]{}t          t          j                              | |         d<   t          j        | |         d<   t          j        | |         d<   || |         d<   |                    | |                    |	 d d d            n# 1 swxY w Y   t                               d
|            d S )Nr   r   r   r   model_filenamer   rs   r   r   z(Fusion statistics is saved to csv file: )r   listnextitervalueskeysr   r   r   strr   nowrs   r   r   r   r9   r:   )model_fusion_statisticsr   r   r   r   keys         r   output_fusion_statisticsr   )  s   	lb7	C	C	C >x	

 $t3::<<==>>CCEEFF
 ^HFFF
   * 	> 	>C7:8<>>7J7J#C(4;G;S#C(8494E#C(1=@#C()9: 7 <====	>> > > > > > > > > > > > > > >  KKI<IIJJJJJs   C<DD!$D!c                     i }t          j         fdd|           t          j         fdd|          }|                    |           |                    ddi           |                    t          ||                     |S )Nc                  0                         d            S r   run
ort_inputsort_sessions   r   <lambda>zinference_ort.<locals>.<lambda>?  s    +//$
;; r   r   numberrepeatc                  0                         d            S r   r   r   s   r   r   zinference_ort.<locals>.<lambda>@  s    z)J)J r   r   F)timeitr   r   r   )r   r   result_templaterepeat_timesr   warm_up_repeatr   r   s   ``      r   inference_ortr   =  s    F
M;;;;;An]]]]=!J!J!J!J!JST]ijjjL
MM/"""
MM<'(((
MM$\:>>???Mr   c           
          i }                                  |D ]}t          j        ||                                       |	          }t                              t          ||         j                  |
          }                    ||j	        j
        d||j        |                                           t          |          dk    rt          |||	           t          |          D ]\\  }}                    |||         j	        j
        dt"          j        ||         j        ||                                                    ]t'          j         fdd|           t'          j         fdd|          }|                    |           |                    ddi           |                    t-          ||                     |S )Nr   c                  .                                    S r   run_with_iobindingr   r   s   r   r   z/inference_ort_with_io_binding.<locals>.<lambda>t      ..z:: r   r   r   c                  .                                    S r   r   r   s   r   r   z/inference_ort_with_io_binding.<locals>.<lambda>z  r   r   r   T)r   r   
from_numpytoIO_BINDING_DATA_TYPE_MAPgetr   r   
bind_inputr   typeshapedata_ptrr   allocateOutputBuffers	enumeratebind_outputr   r@   r   r   r   r   )r   r   r   r   ort_output_namesort_outputsoutput_buffersoutput_buffer_max_sizesr   r   	data_typer   r   rQ   np_input
input_typeiort_output_namer   r   s   `                  @r   inference_ort_with_io_bindingr  G  s    F ''))J 

 

#Jt$45588@@-11#j6F6L2M2MyYY
O N	
 	
 	
 	
 >an.EvNNN'(899 
 
?1$)MN 1&&((	
 	
 	
 	
 M:::::    =:::::  L
 MM/"""
MM<&'''
MM$\:>>???Mr   c                 x    |D ]6}|                      t          j        |t          j        |                     7d S )N)r   r   )r   r   emptyr@   )r  r  r   r  s       r   r  r    sK     % R Rek!5=PPPQQQQR Rr   {   c                    t          j        |            t          j                             |            t          j        |            t          j                            |            t          j                            |            dS )z5Set random seed manually to get deterministic resultsN)randomseedr   r   manual_seedrK   manual_seed_all)r  s    r   set_random_seedr    sk    
K	Ld	d	J4   	Jt$$$$$r   returnc            	         ddl m} m}m}m}m}m}m} 	  |             g } |            }t          |t                    sd S t          |          D ]j}	 | ||	                    }
t          |
t                    r d S |                    |	 | ||	                    |
j        |
j        |
j        d           k |             |S # | $ r}t!          d|           Y d }~d S d }~ww xY w)Nr   	NVMLErrornvmlDeviceGetCountnvmlDeviceGetHandleByIndexnvmlDeviceGetMemoryInfonvmlDeviceGetNamenvmlInitnvmlShutdown)idrQ   totalfreeused-Error fetching GPU information using nvml: %s)py3nvml.py3nvmlr!  r"  r#  r$  r%  r&  r'  
isinstanceintranger   r   r)  r*  r+  print)r!  r"  r#  r$  r%  r&  r'  r   device_countr  r:   rd   s               r   get_gpu_infor3    s                    


))++,,, 	4|$$ 	 	A**+E+Ea+H+HIID$$$ ttMM--.H.H.K.KLL!Z I I     	   =uEEEttttts$   +C ;C >AC C'C""C'c                   `    e Zd ZddZd Zedeeee	e
f                           fd            ZdS )MemoryMonitorTc                     || _         d S r   )keep_measuring)r   r7  s     r   r4   zMemoryMonitor.__init__  s    ,r   c                     dd l }d}	 t          ||                    t          j                                                              j        dz            }t          d           | j        snf|S )Nr   T   {Gzt?)	psutilmaxProcessr|   getpidmemory_inforssr   r7  )r   r;  	max_usages      r   measure_cpu_usagezMemoryMonitor.measure_cpu_usage  sq    		Iv~~bikk'B'B'N'N'P'P'TW^'^__I%LLL& 		
 r   r  c                     t                      r   )NotImplementedErrorr   s    r   measure_gpu_usagezMemoryMonitor.measure_gpu_usage  s    !###r   NT)r   r   r    r4   rB  r   r   r   r
   r   r	   rE  r%   r   r   r5  r5    sp        - - - -	 	 	 $8Dc3h,@#A $ $ $ ^$ $ $r   r5  c                   T     e Zd Zd fd	Zdeeeeef                           fdZ	 xZ
S )CudaMemoryMonitorTc                 J    t                                          |           d S r   )superr4   )r   r7  	__class__s     r   r4   zCudaMemoryMonitor.__init__  s!    (((((r   r  c                   
 ddl m}m}mm}mm}m} g g 
	  |              |            }t          |t                    st                              d|            d S d t          |          D             fdt          |          D             
	 t          |          D ]l} | |                    }t          |t                    r t                              d|             d S t          |         |j        dz            |<   mt!          d	           | j        sn |             
fd
t          |          D             S # |$ r&}	t                              d|	           Y d }	~	d S d }	~	ww xY w)Nr   r   z*nvmlDeviceGetCount result is not integer: c                     g | ]}d S r   r%   rP   r  s     r   rS   z7CudaMemoryMonitor.measure_gpu_usage.<locals>.<listcomp>  s    <<<1Q<<<r   c                 8    g | ]}  |                    S r%   r%   )rP   r  r#  r%  s     r   rS   z7CudaMemoryMonitor.measure_gpu_usage.<locals>.<listcomp>  s1    fffQ))*D*DQ*G*GHHfffr   Tz%nvmlDeviceGetMemoryInfo returns str: r9  r:  c                 4    g | ]}||         |         d S )	device_idrQ   max_used_MBr%   rP   r  gpu_namemax_gpu_usages     r   rS   z7CudaMemoryMonitor.measure_gpu_usage.<locals>.<listcomp>  sD        	 "#$QK#0#3   r   r,  )r-  r!  r"  r#  r$  r%  r&  r'  r.  r/  r9   rd   r0  r   r<  r+  r   r7  )r   r!  r"  r$  r&  r'  r2  r  r:   rd   rV  rW  r#  r%  s             @@@@r   rE  z#CudaMemoryMonitor.measure_gpu_usage  s6   	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	HJJJ--//LlC00 X,XXYYYt<<l(;(;<<<MfffffRWXdReRefffH	|,, R RA223M3Ma3P3PQQD!$,, $%ST%S%STTT#tt'*=+;TY=P'Q'QM!$$e* 	 LNNN     |,,     	 	 	LLH%PPP44444	s&   AE $BE 4AE E?E::E?rF  )r   r   r    r4   r   r   r
   r   r	   rE  __classcell__rK  s   @r   rH  rH    sj        ) ) ) ) ) )+8Dc3h,@#A + + + + + + + +r   rH  c                   ,     e Zd Zd fd	Zd Zd Z xZS )RocmMemoryMonitorTc                 `   t                                          |           d}t          j                            |          r-|t
          j        vrt
          j                            |           	 dd l}|| _        | j                                         d S # t          $ r d | _        Y d S w xY w)Nz/opt/rocm/libexec/rocm_smir   )
rJ  r4   r|   r}   r~   sysr   rocm_smiinitializeRsmiImportError)r   r7  rocm_smi_pathr^  rK  s       r   r4   zRocmMemoryMonitor.__init__  s    (((47>>-(( 	/CH,,...	!OOO$DMM((***** 	! 	! 	! DMMMM	!s   2$B B-,B-c                 b    | j         dS | j                             |d          d         dz  dz  S )NrA   VRAMr   i   )r^  
getMemInfo)r   devs     r   get_used_memoryz!RocmMemoryMonitor.get_used_memory  s6    = 2}''V44Q7$>EEr   c                    | j         d S | j         &t          | j                                                   nd}d t          |          D             d t          |          D             	 t          |          D ].}t	          |         |                     |                    |<   /t          j        d           | j        sn[fdt          |          D             S )Nr   c                     g | ]}d S rN  r%   rO  s     r   rS   z7RocmMemoryMonitor.measure_gpu_usage.<locals>.<listcomp>  s    888q888r   c                     g | ]}d | S )GPUr%   rO  s     r   rS   z7RocmMemoryMonitor.measure_gpu_usage.<locals>.<listcomp>  s    ;;;!I!II;;;r   Tr:  c                 4    g | ]}||         |         d S rR  r%   rU  s     r   rS   z7RocmMemoryMonitor.measure_gpu_usage.<locals>.<listcomp>%  sD     
 
 
 	  ,Q/ 
 
 
r   )	r^  r   listDevicesr0  r<  rf  timer   r7  )r   r2  r  rV  rW  s      @@r   rE  z#RocmMemoryMonitor.measure_gpu_usage  s   = 4;?=;Ts4=4466777Z[88E,$7$7888;;u\':':;;;	<(( R R#&}Q'79M9Ma9P9P#Q#Qa  Ju& 	
 
 
 
 
 <((
 
 
 	
r   rF  )r   r   r    r4   rf  rE  rX  rY  s   @r   r[  r[    s^        ! ! ! ! ! !F F F

 
 
 
 
 
 
r   r[  rK   c                 l   d }|dk    rt           }nt          } |d          }| r||}n|                                }|d S ||S t                      5 } |            }|                    |j                  }	 |                    |          }	|	                                }
d|_        |                                }n # d|_        |                                }w xY w|	 d d d            d S t                              d| d|            t          |          dk    rt          |          dk    rpt          |          t          |          k    rPd}t          |          D ]0\  }}|d         }||         d         }||z
  }t          ||          }1|cd d d            S d d d            n# 1 swxY w Y   d S ||}n|                                }||S t                      5 } |            }|                    |j                  }	 |                    |          }	|	                                }
d|_        |                                }n # d|_        |                                }w xY wt                              d|d	d
|d	d           ||z
  cd d d            S # 1 swxY w Y   d S )NrG   FzGPU memory usage: before=z  peak=r   r   rT  zCPU memory usage: before=z.1fz
 MB, peak=z MB)r[  rH  rE  r   submitr   r7  r9   r:   r   r	  r<  rB  )is_gpufuncmonitor_typestart_memorymemory_monitor_typemonitormemory_before_testexecutor
mem_thread	fn_thread_rA  max_usedr  memory_beforebeforeafterr+  s                     r   measure_memoryr  /  s   v//!!%((G "#!-!(!:!:!<!<%4<%%!! 	 X))++G!)BCCJ0$OOD11	$$&&).&&--//		 */&&--//	//// 	  	  	  	  	  	  	  	  KKZ4FZZyZZ[[[%&&!++I!0C0CL^H_H_cfgpcqcqHqHq(12D(E(E 3 3$A}*=9F%aL7E 6>D"8T22HH-	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 . t )$6688|!!			 .%%''__W%>??
	, --I  ""A%*G""))++II &+G""))++I++++d0BdddR[ddddeee--. . . . . . . . . . . . . . . . . .s[   %F2;)C $F2 CF20B)F22F69F6*%J))I9J)I22*J))J-0J-c                  l    g d} d}| D ]*}t          j        |          }||r|dz  }|| d| z  }+|S )N)ORT_DISABLE_FUSED_ATTENTION!ORT_ENABLE_FUSED_CAUSAL_ATTENTION!ORT_DISABLE_FUSED_CROSS_ATTENTIONORT_DISABLE_TRT_FLASH_ATTENTION&ORT_DISABLE_MEMORY_EFFICIENT_ATTENTIONORT_TRANSFORMER_OPTIONSORT_CUDA_GEMM_OPTIONSr   ,=)r|   getenv)	env_namesenvrQ   r   s       r   get_ort_environment_variablesr  s  sn      I C ! !	$= 	3JC$     Jr   rF  r   rN  )r  )rK   N)7r   rv   r|   r  r]  rm  r   abcr   r   concurrent.futuresr   r   enumr   r   typingr	   r
   r   r   rt   r   r   rs   	packagingr   rX   rw   r   r9   r   r'   r0   r@   r  rn   rz   r   r   r   r   r   r   longlongr  r  r  r   r3  r5  rH  r[  r  r  r%   r   r   <module>r     s   


  				  



   # # # # # # # # 1 1 1 1 1 1                   , , , , , , , , , , , ,                    		8	$	$           D          , u}   -2C C C CLD D D DM M M M8   I I ID7J 7J 7JtK K K(   * n: : : :zR R R% % % %#htDcN34 # # # #L$ $ $ $ $C $ $ $(/ / / / / / / /d(
 (
 (
 (
 (
 (
 (
 (
VA. A. A. A.H    r   