
     NgO                     p   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
m
Z
 d dlmZ d dlmZ d dlZd dlZd dlZd dlmZmZ e	 G d d                      Ze	 G d	 d
                      Z	 	 	 ddZd Zd Zd Zd Zd Zd Zd Zd Z d Z!d Z"d Z#d Z$e%dk    rdZ& e$             dS dS )    N)	dataclass)datetime)Path)Optional)generate_test_dataget_bert_inputsc                       e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed	<   eed
<   eed<   eed<   eed<   dS )TestSetting
batch_sizesequence_length
test_cases
test_timesuse_gpuuse_io_bindingproviderintra_op_num_threadsseedverboselog_severityaverage_sequence_lengthrandom_sequence_lengthN)__name__
__module____qualname__int__annotations__boolstr     c/var/www/html/ai-engine/env/lib/python3.11/site-packages/onnxruntime/transformers/bert_perf_test.pyr
   r
   "   s         OOOOOOOOOMMMMMM
IIIMMM          r    r
   c                   x    e Zd ZU eed<   eed<   eed<   eed<   eed<   ee         ed<   ee         ed<   eed<   d	S )
ModelSetting
model_pathinput_ids_namesegment_ids_nameinput_mask_name	opt_levelinput_tuning_resultsoutput_tuning_results	mask_typeN)r   r   r   r   r   r   r   r   r    r!   r#   r#   3   so         OOONNN"3-'''#C=(((NNNNNr    r#      c                 4   dd l }|                    |           |r%d|                                vrt          d           |r<|dk    rddg}n4|dk    rddg}n)|d	k    rg d
}n|dk    rddg}n|dk    rg d}nddg}ndg}|                                }	||	_        |j        j        |	_        ||j	        j
        |	_        ng|dk    r|j	        j        |	_        nO|dk    r|j	        j        |	_        n7|dk    r|j	        j        |	_        n|dk    r|j	        j
        |	_        n||	_        |||	_        |                    | |	|          }
|r|dk    rd|
                                v sJ n|dk    rd|
                                v sJ n|d	k    r1d|
                                v sJ d|
                                v sJ n|dk    rd|
                                v sJ nh|dk    r1d|
                                v sJ d|
                                v sJ n1d|
                                v sJ nd|
                                v sJ |Nt%          |          5 }|
                    t)          j        |                     d d d            n# 1 swxY w Y   |
S )Nr   CUDAExecutionProviderzWarning: Please install onnxruntime-gpu package instead of onnxruntime, and use a machine with GPU for testing gpu performance.dmlDmlExecutionProviderCPUExecutionProviderrocmROCMExecutionProvidermigraphx)MIGraphXExecutionProviderr3   r1   cudatensorrt)TensorrtExecutionProviderr.   r1      r,   c   )	providersr5   r8   )onnxruntimeset_default_logger_severityget_available_providersprintSessionOptionslog_severity_levelExecutionModeORT_SEQUENTIALexecution_modeGraphOptimizationLevelORT_ENABLE_ALLgraph_optimization_levelORT_DISABLE_ALLORT_ENABLE_BASICORT_ENABLE_EXTENDEDr   InferenceSessionget_providersopenset_tuning_resultsjsonload)r$   r   r   r   rG   r   tuning_results_pathr<   execution_providerssess_optionssessionfs               r!   create_sessionrV   ?   s    ++L999 
+;3V3V3X3XXX N	
 	
 	
  7u#9;Q"R#:<R"S### # #
 #:<R"S### # # $;<R"S56--//L&2L#"-";"JL'0;0R0a--	!Q	&	&0;0R0b--	!Q	&	&0;0R0c--	!Q	&	&0;0R0f--	!R	'	'0;0R0a--0H-',@)**:|Ob*ccG Au)W-B-B-D-DDDDDD*g.C.C.E.EEEEEE##.'2G2G2I2IIIII*g.C.C.E.EEEEEE*g.C.C.E.EEEEEE##.'2G2G2I2IIIII*g.C.C.E.EEEEEE*g.C.C.E.EEEEEE%)>)>)@)@@@@@&%&& 	5!&&ty||444	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 Ns   (JJJc                     t           j        t          j        t           j        t          j        t           j        t          j        t           j        t          j        i}||          S )N)torchfloat32npfloat16int32int64longlong)
torch_typetype_maps     r!   
numpy_typera      s:    rzrzRXR[	H Jr    c                     fd|                                  D             }fd|                                 D             }||fS )Nc                 f    i | ]-\  }}|t          j        |                                        .S r   rX   
from_numpyto.0namearraydevices      r!   
<dictcomp>z/create_input_output_tensors.<locals>.<dictcomp>   s8    ```+$T5+E2255f==```r    c                 f    i | ]-\  }}|t          j        |                                        .S r   rd   rg   s      r!   rl   z/create_input_output_tensors.<locals>.<dictcomp>   s8    bbb;4dE,U3366v>>bbbr    )items)inputsoutputsrk   input_tensorsoutput_tensorss     `  r!   create_input_output_tensorsrs      sZ    ````QWQ]Q]Q_Q_```MbbbbRYR_R_RaRabbbN.((r    c           
         |                                  }|                                D ]R\  }}|                    ||j        j        dt          |j                  |j        |                                           S|                                D ]R\  }}|	                    ||j        j        dt          |j                  |j        |                                           S|S Nr   )

io_bindingrn   
bind_inputrk   typera   dtypeshapedata_ptrbind_output)sessrq   rr   rv   ri   tensors         r!   create_io_bindingr      s    ""J%++-- 
 
fMv|$$LOO	
 	
 	
 	
 ',,.. 
 
fMv|$$LOO	
 	
 	
 	
 r    c                 &   g }g }|j         rdnd}t          |          D ]\  }}|                     ||          }	|                    |	           i }
t	          t          |                    D ]}|	|         |
||         <   t          ||
|          \  }}t          | ||          }|                     |           t          j
                    }|                     |           t          j
                    |z
  }|                    |           ||fS )Nr6   cpu)r   	enumeraterunappendrangelenrs   r   run_with_iobindingtimeitdefault_timer)rT   
all_inputsoutput_namestest_settingresultslatency_listrk   _test_case_idro   resultrp   irq   rr   rv   
start_timelatencys                    r!   %onnxruntime_inference_with_io_bindingr      s,   GL#+6VVF!*:!6!6 % %v\622vs<(()) 	1 	1A'-ayGLO$$(CFGU[(\(\%~&w~NN
 	"":...)++
"":...&((:5G$$$$L  r    c                    t          |          dk    r(|                     |t          j        |                     g }g }t	          |          D ]n\  }}t          j                    }|                     ||          }t          j                    |z
  }	|                    |           |                    |	           o||fS ru   )r   r   randomchoicer   r   r   r   )
rT   r   r   r   r   r   ro   r   r   r   s
             r!   onnxruntime_inferencer      s    
:L&-
";";<<<GL!*:!6!6 % %v)++
\622&((:5vG$$$$L  r    c                    |                                 }dt          j                            |            d}|d|j         d|j         d                    dd          z  }|d|j         d|j         dz  }|d	|j	         d
|j
         dz  }|d|j         d|j         dz  }|d|j         dz  }|d|j         z  }|S )Nzmodel=,zgraph_optimization_level=z,intra_op_num_threads=zGraphOptimizationLevel.ORT_ zbatch_size=z,sequence_length=ztest_cases=z,test_times=zuse_gpu=z,use_io_binding=zaverage_sequence_length=zrandom_sequence_length=)get_session_optionsospathbasenamerG   r   replacer   r   r   r   r   r   r   r   )r$   rT   r   rS   options        r!   	to_stringr      s-   ..00L5bg&&z22555F
  L,*O  L  Lgs  hI  L  L  L  T  T%r  F eL3eelFbeeeeF
[L3[[AX[[[[F
]-]]|?Z]]]]F
P)MPPPPF
M(KMMMFMr    c           	          t          | j        |j        |j        || j        |j        | j                  }d |                                D             }t          | j        ||          }||v rt          d|           d S t          d|           g }|j
        rBt          |j                  D ],}	t          ||||          \  }
}|                    |           -n@t          |j                  D ]+}	t          |||          \  }
}|                    |           ,t!          j        |          dz  }t%          j        |          }t!          j        |d          }t!          j        |d          }t!          j        |d          }t!          j        |d	          }t!          j        |d
          }|j        d|z  z  }|||||||f||<   t          d                    t-          |d          t-          |d                               | j        r t0          j                            | j                  }t0          j                            |          rZ|}|                    dd          d          dt;          j                                                     d}t          d|d|d           |                                 }tC          |d          5 }tE          j#        ||           d d d            n# 1 swxY w Y   t          d|           d S d S )N)r   rQ   c                     g | ]	}|j         
S r   )ri   )rh   outputs     r!   
<listcomp>z run_one_test.<locals>.<listcomp>  s    DDDFFKDDDr    zskip duplicated test:zRunning test:  2   K   Z   _   r:   g     @@z,Average latency = {} ms, Throughput = {} QPS.2fz.jsonr9   r   .zWARNING:zexists, will write tozinstead.wzTuning results is saved to)$rV   r$   r   r   r(   r   r)   get_outputsr   r?   r   r   r   r   extendr   rZ   rj   
statisticsmean
percentiler   formatr*   r   r   abspathexistsrsplitr   now	timestampget_tuning_resultsrM   rO   dump)model_settingr   perf_resultsr   r   rT   r   keyall_latency_list_ir   r   
latency_msaverage_latency
latency_50
latency_75
latency_90
latency_95
latency_99
throughputoutput_pathold_output_pathtrsrU   s                           r!   run_one_testr      s|    !.)>  G EDg.A.A.C.CDDDL
M,g|
D
DC
l%s+++	/3" 	2/00 	2 	2B$I\<% %!G\ ##L1111		2 /00 	2 	2B$9':|$\$\!G\##L1111 *++d2J oj11Oz2..Jz2..Jz2..Jz2..Jz2..J(F_,DEJ 	L 
6==f_V[>\>\^deoqv^w^wxx   * 
9goom&IJJ7>>+&& 	a)O*11'1==a@ff8<>>C[C[C]C]fffK*o/FU_```((**+s## 	qIc1	 	 	 	 	 	 	 	 	 	 	 	 	 	 	*K88888
9 
9s   K11K58K5c                     t          j        t          | ||||f          }|                                 |                                 d S )N)targetargs)multiprocessingProcessr   startjoin)r   r   r   r   r   processs         r!   launch_testr   <  sP    % 
	 	 	G MMOOOLLNNNNNr    c                    |j         t          | ||||j                    d S t          j        d          }t          j        d          }t	          ||h          }t          dt          d|                    D ]}||vr|                    |           |                    d           |D ]}t          | ||||           d S )NF)logicalTr9      )reverse)	r   r   psutil	cpu_countlistr   minr   sort)	r   r   r   r   r   logical_corescandidate_threadsr   r   s	            r!   run_perf_testsr   K  s   (4-	
 	
 	
 	 ///I$T222MmY7881c"m,,-- ( (%%%$$Q'''4((( 1 a aM<zK_````a ar    c                 P   t          | j        | j        | j        | j                  \  }}}t          d|j         d|j         d|j                    t          |j        |j        |j        |j
        |j        ||||j        |j        | j                  }t          | |||           d S )NzGenerating z samples for batch_size=z sequence_length=)r+   )r   r$   r%   r&   r'   r?   r   r   r   r   r   r   r   r   r+   r   )r   r   r   	input_idssegment_ids
input_maskr   s          r!   run_performancer   c  s    )8 $&%	* *&I{J 
 	Pl-  	P  	P|G^  	P  	Pq}  rN  	P  	P   $$,+)  J =,jIIIIIr    c                     t          j                    } |                     ddt          d           |                     dddt          dd	           |                     d
ddt          d           |                     ddt          dd           |                     dddt          dd           |                     ddt          g ddd           |                     ddt          dd           |                     dddd !           |                     d"           |                     d#dt          d$g d%d&'           |                     d(ddd)!           |                     d*           |                     d+ddd,!           |                     d-           |                     d.dt          d d/           |                     d0d1dt          d d2           |                     d3dt          d d4           |                     d5dt          d d6           |                     d7dt          d d8           |                     d9d t          d:;           |                     d<d t          d=;           |                     d>d?d@t          dA;           |                     dBdCdddD!           |                     dE           |                     dFdt          d$dG           |                                 }|S )HNz--modelTzbert onnx model path)requiredrx   helpz-bz--batch_size+zKbatch size of input. Allow one or multiple values in the range of [1, 128].)r   rx   nargsr   z-sz--sequence_lengthz maximum sequence length of inputz	--samplesF
   z!number of samples to be generated)r   rx   defaultr   z-tz--test_timesr   zJnumber of times to run per sample. By default, the value is 1000 / samplesz--opt_level)r   r9   r,   r:   r:   zZonnxruntime optimization level: 0 - disable all, 1 - basic, 2 - extended, 99 - enable all.)r   rx   choicesr   r   z--seed   zPrandom seed. Use the same seed to make sure test data is same in multiple tests.z	--verbose
store_truezprint verbose information)r   actionr   )r   z--log_severityr,   )r   r9   r,   r      z.0:Verbose, 1:Info, 2:Warning, 3:Error, 4:Fatal)r   rx   r   r   r   z	--use_gpuzuse GPU)r   z--use_io_bindingzuse io_binding)r   z
--providerzExecution provider to usez-nz--intra_op_num_threadsz>=0, set intra_op_num_threadsz--input_ids_namezinput name for input idsz--segment_ids_namezinput name for segment idsz--input_mask_namezinput name for attention maskz--input_tuning_resultsz3tuning results (json) to be loaded before benchmark)r   rx   r   z--output_tuning_resultsz1tuning results (json) to be saved after benchmarkz-az--average_sequence_lengthz)average sequence length excluding paddingz-rz--random_sequence_lengthz3use uniform random instead of fixed sequence length)r   z--mask_typezmmask type: (1: mask index or sequence length, 2: raw 2D mask, 3: key len, cumulated lengths of query and key))argparseArgumentParseradd_argumentr   r   set_defaults
parse_args)parserr   s     r!   parse_argumentsr     s&   $&&F
	DsAWXXX
Z     /     0     Y     i     _     (	     &&&
=     eLyYYY
&&&
*U<Vfggg
u---
(      ,     '     )     ,      B	     !@	     #8     "B     u555
|     DKr    c                     t                      } | j        dk    r*t          dt          d| j        z                      | _        | j        dk    r| j        | _        t          j                    }|	                                }t          | j                  }t          |          dk    rt          |          dk    st          d          t          | j        | j        | j        | j        | j        | j        | j        | j                  }|D ]z}t/          || j        | j        | j        | j        | j        | j        | j        | j        | j        | j        | j        | j                  }tA          d|           tC          |||           {tE          |#                                dd 	          }tH          j%        &                    tO          | j                  j(        d
)                    | j        rdndd&                    d tE          tU          |                    D                       | j        tW          j,                    -                    d                              }t]          |dd          5 }	t_          j0        |	dd          }
d }|D ]\  }}|1                    d          }|8g d}|2                    d |D                        |
3                    |           d |D             }|2                    d |D                        |
3                    |           	 d d d            n# 1 swxY w Y   tA          d|           d S )Nr   r9   r      z batch_size not in range [1, 128]ztest settingFc                     | d         S )Nr9   r   )xs    r!   <lambda>zmain.<locals>.<lambda>R  s    qQRt r    )r   r   zperf_results_{}_B{}_S{}_{}.txtGPUCPU-c                 ,    g | ]}t          |          S r   )r   rh   r   s     r!   r   zmain.<locals>.<listcomp>X  s    CCCc!ffCCCr    z%Y%m%d-%H%M%Szw+r   )newline	
)	delimiterlineterminatorr   )zLatency(ms)Latency_P50Latency_P75Latency_P90Latency_P95Latency_P99zThroughput(QPS)c                 D    g | ]}|                     d           d         S )=r   splitr  s     r!   r   zmain.<locals>.<listcomp>l  s&    @@@AQ@@@r    c                 .    g | ]}t          |d           S )r   )r   r  s     r!   r   zmain.<locals>.<listcomp>o  s"    <<<1fQ&&<<<r    c                 D    g | ]}|                     d           d         S )r  r9   r  r  s     r!   r   zmain.<locals>.<listcomp>p  s&    ;;;q1773<<?;;;r    zTest summary is saved to)4r   r   maxr   samplesr   r   r   Managerdictsetr   r   	Exceptionr#   modelr%   r&   r'   r(   r)   r*   r+   r
   r   r   r   r   r   r   r   r   r?   r   sortedrn   r   r   r   r   parentr   r   r   r   strftimerM   csvwriterr  r   writerow)r   managerr   batch_size_setr   r   r   sorted_resultssummary_filetsv_file
tsv_writerheadersr   perf_resultparamsvaluess                   r!   mainr,  "  s   D!aTDL%8!9!9::#q(('+';$%''G<<>>L))N1$$^)<)<)C)C:;;; 
!"	 	M % C C
" LOLM%IL('
 
  	nl+++|\BBBB L..00%^^TTTN7<<TZ(//\,EEuHHCCfT.-A-A&B&BCCCDD LNN##O44		
 	
 L 
lD"	-	-	- (ZDNNN
 . 	( 	(CYYs^^F   @@@@@AAA##G,,,<<<<<FMM;;F;;;<<<''''#	(( ( ( ( ( ( ( ( ( ( ( ( ( ( (, 

$l33333s   3B1L11L58L5__main__)Nr,   N)'r   r  rO   r   r   r   r   r   dataclassesr   r   pathlibr   typingr   numpyrZ   r   rX   bert_test_datar   r   r
   r#   rV   ra   rs   r   r   r   r   r   r   r   r   r   r,  r   __spec__r   r    r!   <module>r4     sN    



      				       ! ! ! ! ! !                         > > > > > > > > ! ! ! ! ! ! ! !           "V V V Vr     ) ) )  .! ! !2! ! !   B9 B9 B9J  a a a0J J J:_ _ _DQ4 Q4 Q4h zHDFFFFF	 r    