
     Ngf^                        d dl Z d dlZd dlZd dlZd dlZd dlmZ 	 g dZddZd Z	d Z
ddZdd	Zd
 Zd Zd Zd Z	 	 	 ddZd Zd Zd Zd Zedk    rs e            Z ede           d dlmZ  eej                   ej        sej        s
J d             ee          Znej        Z eee          ZeD ]Z  ee            dS dS )    N)TensorProto)ScanLoopIfc                 .   t          j                    }|                    dddt          d           |                    dddt          d           |                    d	d
dt          dd           |                    dddt          dd           |                    ddt          dd           |                    ddt          dd           |                    ddt          dd           |                    ddt
          dd           |                    ddt          dd           |                    ddt          d d            |                    d!dt          d d"           |                    d#dt          d d$           |                    d%dd&g d'd()           |                    d*d+dd,d-.           |                    d/           |                    d0dt          d1d2           |                    d3dd,d4.           |                    d5           |                    d6dd,d7.           |                    d8           |                    d9d:dd,;           |                    d<           |                    |           S )=Nz-iz--inputFz2Set the input file for reading the profile results)requiredtypehelpz-mz--modelzIonnx model path to run profiling. Required when --input is not specified.z-bz--batch_size   zbatch size of input)r   r	   defaultr
   z-sz--sequence_length    zsequence length of inputz--past_sequence_lengthzpast sequence length for gpt2z--global_lengthz&number of global tokens for longformerz	--samplesi  z\number of samples to test. Set it large enough to reduce the variance of performance result.z--thresholdg{Gz?zfThreshold of run time ratio among all nodes. Nodes with larger ratio will show in top expensive nodes.z--thread_numznumber of threads to usez--input_ids_namez"input name for input IDs, for bertz--segment_ids_namez$input name for segment IDs, for bertz--input_mask_namez'input name for attention mask, for bertz--dummy_inputsr   )bertgpt2
longformerr   zEType of model inputs. The default will create dummy inputs with ones.)r   r   choicesr
   z-gz	--use_gpu
store_truezuse GPU)r   actionr
   )use_gpuz
--providercudazExecution provider to usez--basic_optimizationz_Enable only basic graph optimizations. By default, all optimizations are enabled in OnnxRuntime)basic_optimizationz--kernel_time_onlyz.Only include the kernel time and no fence time)kernel_time_onlyz-vz	--verbose)r   r   )verbose)argparseArgumentParseradd_argumentstrintfloatset_defaults
parse_args)argvparsers     ]/var/www/html/ai-engine/env/lib/python3.11/site-packages/onnxruntime/transformers/profiler.pyparse_argumentsr%      s   $&&F
A     X     "     '      ,     5     k     u     '     1     3     6     999T     kE,U^___
&&&
(     n	     5111
=	     ///
kE,OOO
&&&T"""    c                     ddl m}  || ||| |d          }|D ]}|                    d |          }	|                                }
|
S )Nr   )create_onnxruntime_sessionT)enable_all_optimizationnum_threadsenable_profiling)benchmark_helperr(   runend_profiling)onnx_model_pathr   providerr   
thread_num
all_inputsr(   sessioninputs_profile_files              r$   run_profiler7      s}    ;;;;;;(($6 6  G  & &KKf%%((**Lr&   c                     t          d|  d           t          |           5 }t          j        |          }d d d            n# 1 swxY w Y   t	          |t
                    sJ |S )Nzloading profile output z ...)printopenjsonload
isinstancelist)r6   opened_file	sess_times      r$   load_profile_jsonrA      s    	
6L
6
6
6777	l		 +{Ik**	+ + + + + + + + + + + + + + + i&&&&&s   AAAc                    i }i }i }d}d}| D ]}|d         dk    r|d         dk    rd}|s|d         dk    rd	|v rd
|v r}d|d
         v rs|d         }|d
         d         }	|	t           v r]|	sd| d}	||v r'||xx         |d	         z  cc<   ||xx         dz  cc<   n|d	         ||<   d||<   |	||<   ||d	         z  }|sdgS g }
|
                    d|dz  dd           |
                    d           |
                    d           t          |                                d d          D ]U\  }}||z  }||k     r||         }|t	          |          z  }|
                    |dd|dz  dd|dd|dd| 	           Vi }|                                D ]'\  }}	||         }|	|v r||	xx         |z  cc<   "|||	<   (|
                    d           |
                    d           |
                    d           t          |                                d  d          D ],\  }	}||z  }|
                    |dd|dz  dd|	            -|
S )!  Parse profile data and output nodes in two sections - nodes in the original order, and top expensive nodes.

    Args:
        sess_time (List[Dict]): profile data
        kernel_time_only (bool, optional): Only include items for kernel time. Defaults to False.
        threshold (int, optional): Minimum ratio of duration among all. Defaults to 0.

    Returns:
        List[str]: lines of string for output.
    r   FcatSessionnamesession_initializationTKerneldurargsop_name()r   zNo kernel record found!z%
Top expensive kernels with Time% >= d   .2f:@----------------------------------------------------------------u&   Total(μs)	Time%	Calls	Avg(μs)	Kernelc                     | d         S Nr    xs    r$   <lambda>z&parse_kernel_results.<locals>.<lambda>   s
    1Q4 r&   keyreverse10d	      Y@5.2f5d8.1fz
Group kernel time by operator:u   Total(μs)	Time%	Operatorc                     | d         S rS   rT   rU   s    r$   rW   z&parse_kernel_results.<locals>.<lambda>  s
    1Q4 r&   )NODES_TYPE_CONTAINING_SUBGRAPHappendsorteditemsr   )r@   	thresholdkernel_name_to_op_namekernel_timekernel_freqtotalsession_inititemkernel_namerK   linesdurationratiocallsavg_timeop_times                   r$   parse_kernel_resultsrt      so     KKEL ! !;)##V8P(P(PL 	;(""u}}4IY]^dYeLeLev,K6l9-G888  -,k,,,k))K(((DK7(((K(((A-((((+/;K(+,K(6=&{3T%[ E +)** E	LLN)C-NNNNOOO	LL	LL=>>>!'(9(9(;(;Y]!^!^!^ j jX5 9K(eEll*hhhhhhehhh(hhh[fhhiiii G 6 < < > > ( (W{+gG('GG	LL3444	LL	LL.///#GMMOOQUVVV I I5 GGGGGGgGGHHHHLr&   Fc                 b   g }i }i }i }d}| D ]@}|d         dk    r0d|v r+d|v r&d|d         v r|d                              dd	                               d
d	                               dd	          }	d|d         v rW|d         d         dk    rd}
n)|d         d         dk    rd}
n|d         d         dk    rd}
|	|vr|
||	<   n||	         |
k    sJ n|r|d         d         }|t          v r|	|v r'||	xx         |d         z  cc<   ||	xx         dz  cc<   n%|d         ||	<   d||	<   |                    |	           ||d         z  }Bg d}d}|D ]v}	||	         }||	         }|t          |          z  }||z  dz  }|                    |	d	          }||z  }|                    |dd|dd|dd|dd|dd|dd|	            w|                    d|dz  dd            |                    d!           |                    d"           t          |                                d# d$%          D ]t\  }	}||z  }||k     r||	         }|t          |          z  }||z  dz  }|                    |	d	          }|                    |dd|dd|dd|dd|dd|	            u|S )&rC   r   rD   NoderI   rJ   rK   rF   _kernel_time _fence_before_fence_afterr0   CPUExecutionProviderCPUCUDAExecutionProviderCUDADmlExecutionProviderDMLr   )z
Nodes in the original order:rQ   u3   Total(μs)	Time%	Acc %	Avg(μs)	Calls	Provider	Nodeg        r]   r[   r\   r^   r`   r_   8sz#
Top expensive nodes with Time% >= rN   rO   rP   rQ   u-   Total(μs)	Time%	Avg(μs)	Calls	Provider	Nodec                     | d         S rS   rT   rU   s    r$   rW   z$parse_node_results.<locals>.<lambda>V  s
    qt r&   TrX   )replacerb   rc   r   getrd   re   )r@   r   rf   node_name_list	node_time	node_freqnode_providerrj   rl   	node_namedevicerK   rn   before_percentagero   rq   rr   
percentager0   rp   s                       r$   parse_node_resultsr     s.    NIIME !! !!;&  Ud]]v~~)W[\bWcJcJcV$$^R88@@RTUU]]^lnpqq  T&\))<
+/EEE"FF&\*-1HHH#FF&\*-1GGG"FM11/5M),,(3v=====! 6l9-G888I%%)$$$U3$$$)$$$)$$$$'+E{	)$'(	)$%%i000T%[ E  E
 # 	
 	
	Y')$eEll*&%/
 $$Y33Z'  D  D  Dz  D  D  D2C  D  D  DH  D  D  D]b  D  D  Diq  D  D  D  yB  D  D	
 	
 	
 	

 
LLL	#LLLLMMM	LL	LLEFFF%ioo&7&7^^UYZZZ 	t 	t	85 9)$eEll*&%/
 $$Y33rrr
rrr8rrrErrrX`rrrgprrssssLr&   c                 H   i }i }d}i }i }i }i }	d}
i }| D ]g}|d         dk    rWd|v rRd|v rMd|d         v rB|d         d         }|t           v r=d|d         vr<d|d	         v r1||	v r|	|xx         |d         z  cc<   n|d         |	|<   |
|d         z  }
|d                             dd
          }||v r||xx         dz  cc<   nd||<   | d| }||v r'||xx         |d         z  cc<   ||xx         dz  cc<   n|d         ||<   d||<   ||v r||xx         |d         z  cc<   n|d         ||<   ||v r'||xx         |d         z  cc<   ||xx         dz  cc<   n|d         ||<   d||<   ||d         z  }id
dg}|                    d           |                    d           t          |                                d d          D ]s\  }}|	                    |d          }||z  }||z   }|||
z   z  }||         }||z  }|                    |dd|dz  dd|dd|dz  dd|dd|dd|dd|            t|d
dgz  }|                    d           |                    d           t          |                                d d          D ]\  }}|                    d          }|d         }|d         }|                    dd
          }||         }||z  }|||         z  }|                    |dd|dz  dd|dd|dd|dd|            |S ) a  Group results by operator name.

    Args:
        sess_time (List[Dict]): profile data
        kernel_time_only (bool): Only include items for kernel time.
        use_gpu (bool): GPU is used in profiling or not.

    Returns:
        List[str]: lines of string for output.
    r   rD   rv   rI   rJ   rK   r0   fencerF   rx   r   rP   zGrouped by operatorrQ   uM   Total(μs)	Time%	Kernel(μs)	Kernel%	Calls	AvgKernel(μs)	Fence(μs)	Operatorc                     | d         S rS   rT   rU   s    r$   rW   z$group_node_results.<locals>.<lambda>  s
    QqT r&   TrX   r[   r\   r]   r^   11dr_   z14.1fzGrouped by provider + operatoru<   Kernel(μs)	Provider%	Calls	AvgKernel(μs)	Provider	Operatorc                     | d         S rS   rT   rU   s    r$   rW   z$group_node_results.<locals>.<lambda>  s    RSTURV r&   ExecutionProviderz9.2fr   )rb   r   rc   rd   re   splitr   )r@   r   r   op_kernel_timeop_kernel_recordstotal_kernel_timeprovider_op_kernel_timeprovider_op_kernel_recordsprovider_kernel_timeop_fence_timetotal_fence_timeprovider_counterrl   rK   r0   rY   rn   rh   
fence_timekernel_time_ratio
total_time
time_ratiokernel_callsavg_kernel_timepartsshort_eprq   provider_time_ratios                               r$   group_node_resultsr   d  sf    N !#M +- +-;&  Ud]]v~~)W[\bWcJcJc6l9-G 888f--d6l**-//%g...$u+=....15eg.$U3$F|''
B77H+++ ***a/****-. *))))C---',,,U;,,,*3///14/////3E{',23*3////$X...$u+=....15e$X..((w'''4;6'''!'***a/*****.u+w'-.!'*e,&'E	LL	LLghhh &~';';'='=>>[_ ` ` ` 	
 	
"&&w22
'*;; :-
#47G#GH
(1%4  y  y  ye!3  y  y  yK  y  y  yO`chOh  y  y  yq}  y  y  y  ET  y  y  y  ^h  y  y  y  pw  y  y	
 	
 	
 	
 
b233E	LL	LLTUUU"#:#@#@#B#B`deee 

 

[		#8(##$7<<*3/%-),@,JJ  D  D  D"5"=  D  D  De  D  D  DRa  D  D  Dks  D  D  D  {B  D  D	
 	
 	
 	
 Lr&   c                     t          |                     d                    t          k    r#t          | |                     d                    nd S )Nvalue)r	   
WhichOneofr   getattr)dims    r$   get_dim_from_type_protor     sC    489P9P4Q4QUX4X4X73w//000^bbr&   c                 8    d | j         j        j        D             S )Nc                 ,    g | ]}t          |          S rT   )r   ).0ds     r$   
<listcomp>z-get_shape_from_type_proto.<locals>.<listcomp>  s!    QQQ1#A&&QQQr&   )tensor_typeshaper   )
type_protos    r$   get_shape_from_type_protor     s    QQ
0F0L0PQQQQr&   c                    i |                                  D ]F}t          |j                  }g }t          |          D ]/\  }}t	          |t
                    r|                    |           0t          |          dk    r dS t          |          dk    r|||d         <   t          |          dk    r|||d         <   |j        j        j	        }	|	t          j        t          j        t          j        fv sJ |	t          j        k    rt          j        n'|	t          j        k    rt          j        nt          j        }
t          j        ||
          }||j        <   Hfdt)          |          D             }|S )a  Create dummy inputs for ONNX model.

    Args:
        onnx_model (OnnxModel): ONNX model
        batch_size (int): batch size
        sequence_length (int): sequence length
        samples (int): number of samples

    Returns:
        List[Dict]: list of inputs
       Nr   r   dtypec                     g | ]}S rT   rT   r   r5   dummy_inputss     r$   r   z'create_dummy_inputs.<locals>.<listcomp>      7771,777r&   )'get_graph_inputs_excluding_initializersr   r	   	enumerater=   r   rc   lenr   	elem_typer   FLOATINT32INT64numpyfloat32int64int32onesrF   range)
onnx_model
batch_sizesequence_lengthsamplesgraph_inputr   symbol_dimsir   r   	data_typedatar2   r   s                @r$   create_dummy_inputsr     s    L!IIKK . .)+*:;;&& 	& 	&FAs#s## &""1%%% {a44{a$.E+a.!{a$3E+a.!$0:	[.0A;CTUUUUU K--- MM!*k.?!?!?%++U[ 	
 z%y111)-[%&&7777g777Jr&   c                 b    ddl m}m}  || |||          \  }	}
} ||||dd|	|
|d	  	        }|S )a-  Create dummy inputs for BERT model.

    Args:
        onnx_model (OnnxModel): ONNX model
        batch_size (int): batch size
        sequence_length (int): sequence length
        samples (int): number of samples
        input_ids_name (str, optional): Name of graph input for input IDs. Defaults to None.
        segment_ids_name (str, optional): Name of graph input for segment IDs. Defaults to None.
        input_mask_name (str, optional): Name of graph input for attention mask. Defaults to None.

    Returns:
        List[Dict]: list of inputs
    r   )find_bert_inputsgenerate_test_data{   F)
test_casesseedr   	input_idssegment_ids
input_maskrandom_mask_length)bert_test_datar   r   )r   r   r   r   input_ids_namesegment_ids_nameinput_mask_namer   r   r   r   r   r2   s                r$   create_bert_inputsr     sv    . DCCCCCCC)9)9*nVfhw)x)x&I{J## 
 
 
J r&   c                 z   |||||z   d}i |                                  D ]}t          |j                  }t          |          D ];\  }}	t	          |	t
                    r!|	|vrt          d|	           ||	         ||<   <|j        j        j        }
|
t          j
        t          j        t          j        fv sJ |
t          j
        k    rt          j        n'|
t          j        k    rt          j        nt          j        }t          j        ||          }||j        <   fdt'          |          D             }|S )a  Create dummy inputs for GPT-2 model.

    Args:
        onnx_model (OnnxModel): ONNX model
        batch_size (int): batch size
        sequence_length (int): sequence length
        past_sequence_length (int): past sequence length
        samples (int): number of samples

    Raises:
        RuntimeError: symbolic is not supported. Use the tool convert_to_onnx.py to export ONNX model instead.

    Returns:
        List[Dict]: list of inputs
    )r   seq_lenpast_seq_lentotal_seq_lensymbol is not supported: r   c                     g | ]}S rT   rT   r   s     r$   r   z&create_gpt2_inputs.<locals>.<listcomp>O  r   r&   )r   r   r	   r   r=   r   RuntimeErrorr   r   r   r   r   r   r   r   r   r   r   rF   r   )r   r   r   past_sequence_lengthr   symbolsr   r   r   r   r   r   r   r2   r   s                 @r$   create_gpt2_inputsr   #  sb   $ !",(+??	 G L!IIKK . .)+*:;;&& 	, 	,FAs#s## ,g%%&'H3'H'HIII&s|E!H$0:	[.0A;CTUUUUU K--- MM!*k.?!?!?%++U[ 	
 z%y111)-[%&&7777g777Jr&   c                    ||d}i |                                  D ])}t          |j                  }t          |          D ];\  }}	t	          |	t
                    r!|	|vrt          d|	           ||	         ||<   <|j        j        j        }
|
t          j
        t          j        t          j        fv sJ |
t          j
        k    rt          j        n'|
t          j        k    rt          j        nt          j        }d|j        v r"t          j        ||          }d|ddd|f<   nt          j        ||          }||j        <   +fdt)          |          D             }|S )a  Create dummy inputs for Longformer model.

    Args:
        onnx_model (OnnxModel): ONNX model
        batch_size (int): batch size
        sequence_length (int): sequence length
        global_length (int): number of global tokens
        samples (int): number of samples

    Raises:
        RuntimeError: symbolic is not supported. Use the tool convert_longformer_to_onnx.py to export ONNX model instead.

    Returns:
        List[Dict]: list of inputs
    )r   r   r   globalr   r   Nc                     g | ]}S rT   rT   r   s     r$   r   z,create_longformer_inputs.<locals>.<listcomp>~  r   r&   )r   r   r	   r   r=   r   r   r   r   r   r   r   r   r   r   r   r   rF   zerosr   r   )r   r   r   global_lengthr   r   r   r   r   r   r   r   r   r2   r   s                 @r$   create_longformer_inputsr   S  s     (OLLGL!IIKK . .)+*:;;&& 	, 	,FAs#s## ,g%%&'H3'H'HIII&s|E!H$0:	[.0A;CTUUUUU K--- MM!*k.?!?!?%++U[ 	 {''';uI666D&'DN]N"##:e9555D)-[%&&7777g777Jr&   c                     t          |           }t          ||j                  }|t          ||j        |j                  z  }|t          ||j        |j                  z  }|S N)rA   rt   rf   r   r   r   r   )r6   rJ   profile_recordsrn   s       r$   process_resultsr     s]    '55O $.AAE	1FWWWE	1FUUUELr&   c           	         | j         dk    r| j         nt          j        d          }dt          j        vrt          |          t          j        d<   ddlm} ddlm	}  | || j
                            }d }| j        dk    r4t          || j        | j        | j        | j        | j        | j                  }n| j        dk    r(t'          || j        | j        | j        | j                  }nT| j        d	k    r(t+          || j        | j        | j        | j                  }n!t/          || j        | j        | j                  }t1          | j
        | j        | j        | j        | j         |          }|S )
Nr   F)logicalOMP_NUM_THREADS)r<   )	OnnxModelr   r   r   )r1   psutil	cpu_countosenvironr   onnxr<   r   r   modelr   r   r   r   r   r   r   r   r   r   r   r   r   r7   r   r0   r   )rJ   r*   r<   r   r   r2   r6   s          r$   r-   r-     s   %)_q%8%8$//f>NW\>]>]>]K 
**(+K(8(8
$%$$$$$$44
++,,JJF""'O L! 
 


 
	f	$	$'O %L
 


 
	l	*	*-O L
 


 )T_dFZ\`\hii

 L r&   __main__	Arguments)setup_loggerzMrequires either --model to run profiling or --input to read profiling resultsr   )r   )Fr   )NNN)!r   r;   r   r   r   r  r   rb   r%   r7   rA   rt   r   r   r   r   r   r   r   r   r   r-   __name__	argumentsr9   r,   r  r   inputr  r6   resultslinerT   r&   r$   <module>r     s/     				         "8!7!7 I# I# I# I#X  &  M M M M`T T T Tna a aHc c cR R R' ' '^ & & & &R- - -`, , ,^	 	 	3 3 3l z!!I	E+y!!!------L"###? 'oo oooos9~~ olI66G  d#   r&   