
     NgX                     j   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZ	d dl
Z
d dlZd dlZd dlmZmZ d dlmZ d dlmZ d dlmZmZmZ d dlmZ d dlmZmZmZ d dlZ ej         e!          Z"de j#        fd	Z$de j#        fd
Z%d Z&d Z'd Z(d Z)d Z*d Z+d Z,d Z-e!dk    r e-             dS dS )    N)measure_memorysetup_logger)get_library_path)ORTModelForSpeechSeq2Seq)ProfilerActivityprofilerecord_function)trange)AutoModelForSpeechSeq2SeqWhisperConfigWhisperProcessorargsc                    	
  j         dvrt          d           fd fd	 j         j         j         j         j         j        d} j         dk    r|                                D ]9\  }}t          j
        |gd|v rt          j        nt          j                  ||<   : j        r)t          j
         j        gt          j                  |d	<    j        r)t          j
         j        gt          j                  |d
<    j        r)t          j
         j        gt          j                  |d<   t&                              d j                    	fd}t-           | j                    | j                  } j        r||d<   |S t&                              d            j         dk    rdnd
 
fd}t-           ||            ||          } j         dk    r||d<   |S |                     j        rt4          j        nt4          j         j                  |d<    j        |d<   d|d<   d|d<    j        r
 j        |d<   |S )N>   orthf-orthf-pt-eagerhf-pt-compilez/Unable to auto-detect inputs for provided modelc                  b    t          j        j                  } t          j        |           } | S N)whisper
load_audio
audio_pathpad_or_trim)audior   s    m/var/www/html/ai-engine/env/lib/python3.11/site-packages/onnxruntime/transformers/models/whisper/benchmark.pyload_via_ffmpegz#get_inputs.<locals>.load_via_ffmpeg$   s*    "4?33#E**    c                  
   t          j        d          5 } t          j        t	          |                                           t          j                  }t          j        |g          }d d d            n# 1 swxY w Y   |S )Nrbdtype)openr   npasarraylistreaduint8array)fr   r   s     r   load_via_numpyz"get_inputs.<locals>.load_via_numpy)   s    $/4(( 	&AJtAFFHH~~RX>>>EHeW%%E	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& s   AA88A<?A<)
max_length
min_length	num_beamsnum_return_sequenceslength_penaltyrepetition_penaltyr   penaltyr    decoder_input_idslogits_processortemperaturezLoad audio: c                 0    | r
             n	             S r    )onnx_e2er   r*   s    r   <lambda>zget_inputs.<locals>.<lambda>D   s!    %X^^%5%5%5GXGX r   audio_streamzFeature extraction: r#   ptc                 T    j                             | gj                  j        S )N)return_tensorssampling_rate)	processorfeature_extractorr=   input_features)r   r   return_types    r   r8   zget_inputs.<locals>.<lambda>P   s1    !A!A	4;M "B " " r   r@   )r!   deviceinputsno_repeat_ngram_sizeTearly_stopping	use_cacheforced_decoder_ids)benchmark_type	Exceptionr+   r,   r-   r.   r/   r0   itemsr#   r(   float32int32has_decoder_input_idsr2   has_logits_processorr3   has_temperaturer4   loggerinfor   time_fnhas_audio_streamtouse_fp16torchfloat16target_devicerD   )r   rC   kvload_audio_fn
audio_dataprocessor_fnr@   r   r*   rA   s   `       @@@r   
get_inputsr^       s   "SSSIJJJ    
     oo^ $ 9-"5 F e##LLNN 	X 	XDAq!)q..BJJbhWWWF1II% 	]*,(D4J3KSUS[*\*\*\F&'$ 	[)+43H2IQSQY)Z)Z)ZF%& 	S$&Hd.>-?rz$R$R$RF=! KK0t00111XXXXXMD-!6777t455J !+~ KK&'''-66$$DK    L D,
+++!\*--Ne###1 %((#}?emm%-HZ )  F8 &*%>F!"#FF; >'+'=#$Mr   c                 @   d\  }}d\  }}| j         dv r| j        r| j        n| j        }t          j                    }t	          j        || j        rt          j        nt          j	        d          
                    | j                  }t          j                    }| j         dk    rt          j        |          }n| j         dv rt          j                    }| j        |_        |                    t%                                 | j        r=d|_        d|_        | j        r(t          j        d           t          j        d           nt3          d	| j                    | j         d
k    rt5          | j                  t8          u r| j        d         n| j        }t5          | j                  t8          u r| j        d         nd }t          j                    }t;          j        | j        |||d          }t          j                    }| j         dk    rjt>                               d| j!                    t          j                    }t          j"        | j!        || j        g          }t          j                    }t>                               d||z
   d           |S )N)NN   r   r   T)torch_dtyperF   r   >   r   r      r   Cannot recognize r   )providerprovider_optionssession_optionsuse_io_bindingr   zLoading model from )	providerszLoaded model in  s)#rH   hf_pt_model_path
model_nametimer   from_pretrainedrU   rV   rW   rK   rT   rX   compiler   SessionOptionsr   enable_profilingregister_custom_ops_libraryr   verboselog_verbosity_levellog_severity_leveltuneset_default_logger_severityset_default_logger_verbosityrI   typeexecution_providertupler   hf_ort_dir_pathrP   rQ   ort_model_pathInferenceSession)r   modelsess_options
start_timeend_timesourcerd   re   s           r   	get_modelr   h   s   $E<%J >>>*.*?T&&T_Y[[
)9)-IEM
 
 
 "T
 
 	 	
 9;;/11M%((E		 1	1	1)++(,%001A1C1CDDD< 	4/0L,./L+y 4/2220333 AD,?AABBBh&&15d6M1N1NRW1W1W4*1--]a]t9=d>U9V9VZ_9_9_42155eiY[[
(8 -(
 
 
 9;;e##?$*=??@@@Y[[
$./
 
 

 9;;
KK<8j#8<<<===Lr   c                 .   t          |          t          u r|d         n|}t          |          t          u r|d         n|}t          j        | j                  }| j        dk    rt          | j                  n t          | j        t          j
        d          }| j        r% ||          }t                              |           |D ]} ||           | j        dk    rt          j                            |           t!          j                    }	| j        dk    rt          | j                  n t          | j        t          j
        d          }
|
D ]} ||           | j        dk    rt          j                            |           t!          j                    }| j        dk    rt                              d           d}||	z
  | j        z  }||z  }t                              d	| d
           t                              d| d           d S )Nr   rb   r   zWarm up)filedesccpu	Benchmark z	Latency: ri   zThroughput: z qps)rx   rz   rV   rB   rX   rH   rangewarmup_runsr
   sysstdoutrr   rP   rQ   cudasynchronizerl   num_runs)r   fnrC   warmup_inputsbenchmark_inputstorch_devicewarmup_rangeoutputs_r   bench_ranger   
batch_sizelatency
throughputs                  r   rR   rR      s   !%f!6!6F1IIFM$(LLE$9$9vayyv< 233L
 %'' 	dD$3:IFFF  | "]##G  
= {e
|,,,J %'' 	dmDM
EEE 
   
{e
|,,,y{{H e##BJ*$5Gg%J
KK'G'''(((
KK/z///000
Fr   c                 ,   | j                                          d| j         d| j         d|j                            dd           d| dt          j                                        d}d }| j         dv rt          t          j
        t          j        gdd          5 }t          d          5   ||           d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   |                    d	                              | j        | j        
          }t"          j                            | j        | d          }t+          |d          5 }|                    |           d d d            n# 1 swxY w Y   n ||           | d}|S )N-r   z%Y-%m-%d_%H:%M:%Sr`   T)
activitiesrecord_shapesprofile_memorymodel_inference   )group_by_stack_n)sort_by	row_limitz.logw.json)rH   lower	precisionrB   __name__replacedatetimenowr   r   CPUCUDAr	   key_averagestablept_filter_bypt_num_rowsospathjoin
log_folderr"   write)	r   r   rC   inputs_typeprefixfilenameprof	prof_datar)   s	            r   
profile_fnr      s    #))++  g  gdn  g  gt{  g  gR[M`M`adfiMjMj  g  gmx  g  g  |D  |M  |Q  |Q  |S  |S  g  g  gFH>>>(,.>.CDTXim
 
 
 	 !233  6


              	 	 	 	 	 	 	 	 	 	 	 	 	 	 	
 %%q%99??HYeieu?vv	7<<FAA(C   	AGGI	 	 	 	 	 	 	 	 	 	 	 	 	 	 	
 	6


 ###OsH   C/C;CC	CC	CC"%C"E88E<?E<c                    t          j                    }t          j        |          }|                    d                       t
                              d|                    d            d           t          j                     t          j
                                         t          | j        dk    fd| j                   t          j                                         d S )Ng?)intervalzCPU usage: %r   c                                  S r   r6   )r   rC   s   r   r8   zmeasure_fn.<locals>.<lambda>  s    rr&zz r   )is_gpufuncmonitor_type)r   getpidpsutilProcesscpu_percentrP   rQ   gccollectrV   r   empty_cacher   rB   r   r   r   flush)r   r   rC   pidprocesss    ``  r   
measure_fnr      s    
)++CnS!!G%%%BvJJJ
KKCg1141@@CCCDDD JLLL	J4;%/7I7I7I7I7IX\Xijjjj Jr   c                    
 fd
 
fd}|} j         dk    r ||            j        rt           ||d          } j         dk    r|d t          d                    }j        j                                        }|dz   }t          j        	                    |          rXt                              d| d	|            t          j        |t          j                             j        |                     j        j                                        }|d
z   }t          j        	                    |          rXt                              d| d	|            t          j        |t          j                             j        |                     j        j                                        }|dz   }t          j        	                    |          rXt                              d| d	|            t          j        |t          j                             j        |                     d S t                              d           t%           ||            ||          \  }}	t                              dt          |d                    d           t                              d|	d                     t'           ||           d S )Nc                 "     j         di | }|S )Nr6   )generate)rC   predicted_idsr~   s     r   get_pred_idsz&run_hf_inference.<locals>.get_pred_ids  s     &0000r   c                      |           }g }t          j                  D ]7}|                    j                            |d          d                    8||fS )NTskip_special_tokensr   )r   r.   appendr>   batch_decode)rC   r   transcriptionr   r   r   s       r   gen_and_decz%run_hf_inference.<locals>.gen_and_dec  sq    $V,,t011 	j 	jA  !<!<]`d!<!e!efg!hiiiim++r   r   zgen-and-decr   r   z-encoder.json	Renaming  to z-decoder.jsonz-decoder-with-past.jsonz
Evaluating PyTorch...Generated token length: r    tokensTranscription: )rH   r   r   lenencodersessionend_profilingr   r   isfilerP   warningrenamer   r   decoderdecoder_with_pastrQ   rR   r   )r   rC   r~   r   generate_fnnew_logname
new_prefixold_lognamer   r   r   s   ` `       @r   run_hf_inferencer     s       
, , , , , ,  Ko--F|  {FMJJ(**$_G}_5J-/==??K$6Kw~~k** SI;IIKIIJJJ	+rw||DO['Q'QRRR-/==??K$6Kw~~k** SI;IIKIIJJJ	+rw||DO['Q'QRRR19GGIIK$'@@Kw~~k** SI;IIKIIJJJ	+rw||DO['Q'QRRR KK)***D+v&&&#.;v#6#6 M=
KKI3}Q/?+@+@IIIJJJ
KK4-"244555t[&)))))r   c                     d fd	}fd}fd} fd} j         dk    r|n|} ||          } j        rt           ||d          }	                                }
t                              d|
 d	|	            t          j        |
t          j        	                     j
        |	                     d S t                              d
           |} j        r ||d          }||f}t           ||            ||          } j         dk    r|                                }|d         } j        r*t                              d|d         d                     nz ||d         d                   }t                              dt!          |           d            j                            |d         d          d         }t'          |            t)           ||           d S )NFc                    t          t          d                                                     }t          |                                           }||z
  }t	          |          r,t
                              d|            t          d          |rj        r| d         | d<   ||z
  }t	          |          r&|D ]#}t
          	                    d| d           | |= $j
        dk    r                                }|                                 D ]\  }}	|                    ||	                                           D ])}
|                    |
j        j
        j        	           *|S | S )
Nc                     | j         S r   namemodel_inputs    r   r8   z?run_ort_inference.<locals>.prepare_ort_inputs.<locals>.<lambda>N  s	    ;3C r   z(The following model inputs are missing: zEThere are missing inputs to the model. Please add them and try again.r+   r,   zRemoving unnecessary input 'z' from user provided inputsr   )device_type	device_id)setmapr^   keysr   rP   errorrI   ru   rQ   rB   
io_bindingrJ   bind_cpu_inputget_outputsbind_outputr   r   )rC   warmupmodel_inputsuser_inputsmissing_inputsunnecessary_inputsunnecessary_inputr   rY   rZ   outputr   r~   s              r   prepare_ort_inputsz-run_ort_inference.<locals>.prepare_ort_inputsL  s   3CCUEUEUEWEWXXYY&++--((%3~ 	eLLTNTTUUUcddd 	8di 	8#),#7F<  )<7!"" 	.%7 . .!i;Liiijjj,-- ;%))++J 0 01))!Q////++-- g g&&v{W[We&ffffr   c                 2                         |            | S r   )run_with_iobinding)r   r~   s    r   with_io_bindingz*run_ort_inference.<locals>.with_io_bindingj  s      ,,,r   c                 4                         d |           }|S r   )run)rC   r   r~   s     r   without_io_bindingz-run_ort_inference.<locals>.without_io_bindingo  s    ))D&))r   c                     j         | v r6t          j        | j         k              d         d         }| d |dz            S | S )Nr   rb   )eos_token_idr#   where)r  	first_endr   s     r   handle_outputz(run_ort_inference.<locals>.handle_outputt  sJ    &&4+<!<==a@CI/IM/**r   r   e2er   r   z
Evaluating ONNX Runtime...T)r  r   r   r   r   r   )F)rB   r   r   r   rP   r   r   r   r   r   r   rQ   ru   rR   copy_outputs_to_cpurS   r   r>   r   printr   )r   rC   r~   r  r  r  r  r   
ort_inputsr   r   ort_evaluate_inputsort_warmup_inputsort_outputsactual_outputr   s   ` `             r   run_ort_inferencer  K  se         <    
    
     &*[E%9%9//?QK##F++J|  {JFF ))++A;AAKAABBB
	+rw||DO[IIJJJ KK.///$y >..vdCCC0*=D+2333+j))K{e!5577a.K 	"9k!nQ&799:::: &k!nQ&788Js=/A/AJJJKKK33KNX\3]]^_` !!!t[*-----r   c                     | j         dv rt          | ||           d S | j         dk    rt          | ||           d S t          d| j                    )N>   r   r   r   r   rc   )rH   r   r  rI   )r   rC   r~   s      r   run_inferencer    si    HHHvu-----			%	%$.....AD,?AABBBr   c            	      	   t          j                    } |                     ddt          dg d           |                     ddt          dd	           |                     d
dt          ddg dd           |                     dt          dd           |                     dt          dd           |                     dt          dd           |                     ddt          dd	           |                     ddt          t          j                                        rdndg d            |                     d!d"t          d#$           |                     d%d&t          d'$           |                     d(d)t          d*$           |                     d+t          d,$           |                     d-t          d.d/           |                     d0t          d1$           |                     d2t          d#$           |                     d3t          d4$           |                     d5t          d4$           |                     d6t          d7$           |                     d8t          d7$           |                     d9t          d:$           |                     d;t          d<d=           |                     d>t          d4d?           |                     d@t          d7dA           |                     dBdCdDE           |                     dFt          dGdH           |                     dIt          dJdK           |                     dLdCdDE           |                     dMt          t          j
                            dN          dO           |                     dPdCdDdQR           |                                 }t          j                            |j                   t	          j        |j                   |j        |_        dS|j        v rt|j                                         dT|_        |j        dUk    r|j        dV|j        if|_        n2|j        dWk    r'|j        |j        d4|j        rd4nd#dXf|_        d|_        |j        dYk    r|j        s
J dZ            |j        dSk    r|j        s
J d[            t5          j        |j                  |_        |S )\Nz-btz--benchmark-typeT)r   r   r   r   )rx   requiredchoicesz-mz--model-namez;Hugging Face name of model (e.g. 'openai/whisper-large-v2'))rx   r   helpz-pz--precisionfp32)int8fp16r#  zePrecision for model. For ONNX models, the model's precision should be set before running this script.)rx   r   defaultr!  r"  z--hf-pt-model-pathr   zNPath to directory containing all PyTorch files (e.g. tokenizer, PyTorch model))rx   r&  r"  z--hf-ort-dir-pathzaPath to directory containing all ONNX files (e.g. tokenizer, encoder, decoder, decoder_with_past)z--ort-model-pathzPath to ONNX modelz-az--audio-pathz%Path to audio file for E2E evaluationz-dz--devicer   r   )r   r   rocm)rx   r&  r!  z-idz--device-idr   )rx   r&  z-wz--warmup-runsr   z-nz
--num-runs
   z--seed   z--sampling-ratei>  zSampling rate for audio (in Hz)z--max-lengthi  z--min-lengthz--num-beamsrb   z--num-return-sequencesz--length-penaltyg      ?z--repetition-penaltyz--no-repeat-ngram-size   z--decoder-input-idsz[]zThe forced decoder ids for generation. Format is [start token, timestamp token, language token, task token]. Default is [start token]. See `decoder_input_ids` in https://github.com/microsoft/Olive/tree/main/examples/whisper for details.z--logits-processorzLWhether to use timestamps logits processor or not (0 for false, 1 for true).z--temperaturez!Temperature value for generation.z	--profileF
store_true)r&  actionz--pt-filter-byself_cpu_time_totalz"What to filter PyTorch profiler byz--pt-num-rowsi  z.Number of rows for PyTorch profiler to displayz	--verbosez--log-folder.zFolder to cache log filesz--tunezFOnly used by ROCm EP, enable TunableOp tuning to select fastest kernel)r&  r,  r"  r   ExecutionProviderCUDAExecutionProviderr   ROCMExecutionProvider)r   tunable_op_enabletunable_op_tuning_enabler   z,Please specify a path to `--hf-ort-dir-path`z+Please specify a path to `--ort-model-path`)argparseArgumentParseradd_argumentstrrV   r   is_availableintfloatr   r   r   
parse_argsr#   randomseedmanual_seedrB   r   rH   upperry   r   ru   r{   r|   astliteral_evalr2   )parserr   s     r   r;  r;    s   $&&F
AAA     J     (((t     ]	     p	     !	     n3Lsttt
*1133>'''     }3BBB
oCCCC
lbAAA
sA666 )UIjkkk S#>>>
S!<<<
C;;;
0sAFFF
*DDD
.UCHHH
0sAFFF  |	     [	     0	     U<HHH
s,AHl     c4Fvwww
U<HHH
S"',,s:K:KRmnnn
U	     D INN49	di   D###%)[%6%6%8%8"K"K"K"&==='+'>dn@]&^D##$(???'!%)*59Y0EA 'D# !DK h&&#SS%SSSSe##"QQ$QQQQ !-d.DEEDKr   c                     t                      } t          | j                   t                              | j                   dt          j        j        _	        t          j        | j                  }t          j        | j                  }| j        dk    r
d| j         n| j        }| j        dk    }t#          | d|           t#          | d|           t#          | d|           t#          | dd	           t#          | d
|j                   t                              d| j                    t)          |           }| j        dk    rt-          t/          d |                                                    }d|v | _        t#          | dd|v            t#          | dd|v            t#          | dd|v            | j        g k    r|j        g| _        t1          |           }t7          | ||           d S )NTr   zcuda:r%  r>   rX   rU   rS   Fr  zForced decoder prompt ids: r   c                     | j         S r   r   r   s    r   r8   zmain.<locals>.<lambda>T  s	    {7G r   r9   rM   r2   rN   r3   rO   r4   )r;  r   rr   rP   rQ   __dict__rV   backendscudnn	benchmarkr   rm   rk   r   rB   r   r   setattrr  r2   r   rH   r   r   r^   rS   decoder_start_token_idr  )r   configr>   rX   rU   r~   ort_model_inputsrC   s           r   mainrM  =  s   <<D
KK%)EN"*4?;;F 0AAI04u0D0D,DN,,,$+M~'HD+y)))D/=111D*h'''D$e,,,D.&"5666
KKFd.DFFGGG dOOEe##s#G#GIYIYI[I[\\]] .2B B-/BFV/VWWW,.@DT.TUUU':J)JKKK!R''&,&C%DD"F$&&&&&r   __main__).r4  r@  r   r   loggingr   r   rl   numpyr#   r   rV   r   benchmark_helperr   r   onnxruntime_extensionsr   optimum.onnxruntimer   torch.profilerr   r   r	   tqdmr
   transformersr   r   r   onnxruntimer   	getLoggerr   rP   	Namespacer^   r   rR   r   r   r   r  r  r;  rM  r6   r   r   <module>rZ     s    



  				  				 



         9 9 9 9 9 9 9 9 3 3 3 3 3 3 8 8 8 8 8 8 E E E E E E E E E E       S S S S S S S S S S    		8	$	$EX' E E E EP@H& @ @ @ @F. . .b  :  $=* =* =*@U. U. U.pC C CN N Nb!' !' !'H zDFFFFF r   