
    קgM                        d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
mZmZmZ d dlmZ d dlZd dlmZ g dZ G d d	e          Zd
 Zd Zd Zd Z G d d          Z G d d          Z edg d          Z G d de          Z G d de          Z G d de          Z G d d          Z d Z!dZ"dZ#d Z$d'd!Z%	 	 	 	 	 	 	 	 	 d(d&Z&dS ))    N)defaultdict
namedtuple)
attrgetter)AnyDictListOptionalTuple)
deprecated)
DeviceType)	EventListFormattedTimesMixinIntervalKernelFunctionEventFunctionEventAvgStringTableMemRecordsAccc                        e Zd ZdZ fdZd Zd Zd Zd Zd Z	e
d             Z	 	 	 	 	 	 	 ddZd Zd ZdedefdZddZd Z xZS )r   z'A list of Events (for pretty printing).c                     |                     dd           }|                     dd          }|                     dd          } t                      j        |i | || _        || _        d| _        || _        d S )N
use_deviceprofile_memoryF
with_flops)popsuper__init___use_device_profile_memory_tree_built_with_flops)selfargskwargsr   r   r   	__class__s         X/var/www/html/ai-engine/env/lib/python3.11/site-packages/torch/autograd/profiler_util.pyr   zEventList.__init__   s    ZZd33
$4e<<ZZe44
$)&)))%- %    c                     |                                   |                                  |                                  d| _        d S )NT)_populate_cpu_children_remove_dup_nodes_set_backward_stacktracesr   r!   s    r%   _build_treezEventList._build_tree'   sF    ##%%%   &&(((r&   c                 *    |                                  S N)tabler+   s    r%   __str__zEventList.__str__-   s    zz||r&   c                    	 t                      t          t          |                     D ]}| |         j        | |         j        j        | |         j        k    rt          | |         j        j                  dk    rq| |         j        | |         j        _        | |         j        | |         j        _        | |         j        D ]}| |         j        |_                            |           t                    dk    rd S fdt          |           D             }| 	                                 | 
                    |           J)NT   r   c                 "    g | ]\  }}|v	|S  r4   ).0indev	to_deletes      r%   
<listcomp>z/EventList._remove_dup_nodes.<locals>.<listcomp>@   s'    RRRwsBS	=Q=Q=Q=Q=Qr&   )setrangelen
cpu_parentnamecpu_childrenkernelsadd	enumerateclearextend)r!   idxchnew_evtsr8   s       @r%   r)   zEventList._remove_dup_nodes0   s1   	"ISYY'' 
' 
'I(4S	,1T#Y^CCDI0=>>!CC8<S	8NDI(53793DDI(0"3i4 = =(,S	(<MM#&&&9~~""RRRR)D//RRRHJJLLLKK!!!#	"r&   c                 n   d | D             }t          |t          d                    }t          j        |d           }|D ]\  }}t          |d           }g }d}|D ]}	t	          |          dk    r|d         }
|	j        j        |
j        j        k    s|	j        j        |
j        j        k    r|                                 nD|
	                    |	           |	j
        J d	|	j                     |	                    |
           nt	          |          dk    |                    |	           ӌdS )
a4  Populate child events into each underlying FunctionEvent object.

        One event is a child of another if [s1, e1) is inside [s2, e2). Where
        s1 and e1 would be start and end of the child event's interval. And
        s2 and e2 start and end of the parent event's interval

        Example: In event list [[0, 10], [1, 3], [3, 4]] would have make [0, 10]
        be a parent of two other intervals.

        If for any reason two intervals intersect only partially, this function
        will not record a parent child relationship between then.
        c                 J    g | ] }|j         s|j        t          j        k    |!S r4   )is_asyncdevice_typer   CPUr5   evts     r%   r9   z4EventList._populate_cpu_children.<locals>.<listcomp>T   s=     
 
 
<
 %(Oz~$E$E $E$E$Er&   thread)keyc                     | j         | j        fS r.   )rO   node_idevents    r%   <lambda>z2EventList._populate_cpu_children.<locals>.<lambda>a   s    u|U]&C r&   c                 4    | j         j        | j         j         gS r.   )
time_rangestartendrS   s    r%   rU   z2EventList._populate_cpu_children.<locals>.<lambda>s   s    5#3#9E<L<P;P"Q r&   r   Nz(There is already a CPU parent event for )sortedr   	itertoolsgroupbyr<   rW   rX   rY   r   append_cpu_childr=   rP   set_cpu_parentappend)r!   sync_eventseventsthreads	thread_idthread_eventsthread_events_current_eventscur_endrT   parents              r%   r(   z EventList._populate_cpu_childrenD   s    
 

 
 

 8$$
 
 
 #CC
 
 
  )0 	- 	-$I}#QQ  N 35NG' - -.))A--+B/F(.&2C2GGG +/&2C2GGG '**,,,,//666!,44QeiQQ 544,,V444 .))A--  %%e,,,,#-	- 	-r&   c                     fdi }| D ]0} |          #|j         |j        |j        f}||vr
|j         ||<   1| D ]?} |          }|0|j        J |j        |j        f}||v r||         |_         8g |_         @d S )Nc                 F    | d S | j         dk    r| S  | j                  S Nr2   )scoper=   )rN   	bw_parents    r%   rn   z6EventList._set_backward_stacktraces.<locals>.bw_parent   s/    {ta
 y000r&   )stacksequence_nrrO   
fwd_thread)r!   
fwd_stacksrN   tprn   s        @r%   r*   z#EventList._set_backward_stacktraces   s    	1 	1 	1 	1 	1 
 	. 	.Cy~~%#)*?_cj1J&&$'IJqM 	# 	#C	#A}|///]AL1
?? *1CII "CI	# 	#r&   c                 4    t          d | D                       S )Nc              3   $   K   | ]}|j         V  d S r.   )self_cpu_time_totalr5   rT   s     r%   	<genexpr>z0EventList.self_cpu_time_total.<locals>.<genexpr>   s%      ??5,??????r&   )sumr+   s    r%   rw   zEventList.self_cpu_time_total   s    ??$??????r&   Nd   K   7   P   Fc                 H    t          | ||||||| j        | j        |
  
        S )a(  Print an EventList as a nicely formatted table.

        Args:
            sort_by (str, optional): Attribute used to sort entries. By default
                they are printed in the same order as they were registered.
                Valid keys include: ``cpu_time``, ``cuda_time``, ``xpu_time``,
                ``cpu_time_total``, ``cuda_time_total``, ``xpu_time_total``,
                ``cpu_memory_usage``, ``cuda_memory_usage``, ``xpu_memory_usage``,
                ``self_cpu_memory_usage``, ``self_cuda_memory_usage``,
                ``self_xpu_memory_usage``, ``count``.
            top_level_events_only(bool, optional): Boolean flag to determine the
                selection of events to display. If true, the profiler will only
                display events at top level like top-level invocation of python
                `lstm`, python `add` or other functions, nested events like low-level
                cpu/cuda/xpu ops events are omitted for profiler result readability.

        Returns:
            A string containing the table.
        )	sort_by	row_limitmax_src_column_widthmax_name_column_widthmax_shapes_column_widthheaderr   r   top_level_events_only)_build_tabler   r    )r!   r   r   r   r   r   r   r   s           r%   r/   zEventList.table   s?    : !5"7$;/'"7
 
 
 	
r&   c                    ddl }| j        sdn| j        }t          |d          5 }g }d}|                    d           | D ]}|j        
|                    d                    |j        |j        j        |j                                        |j	        s|j
        nd|j         d|j
         d	                     |j        D ]@}|                    d
|j         d|j        j         d|j
         d| d| d           |dz  }At          |           dk    rD|                    |                                dz
  |j                   |                                 |                    d           ddd           dS # 1 swxY w Y   dS )zExport an EventList as a Chrome tracing tools file.

        The checkpoint can be later loaded and inspected under ``chrome://tracing`` URL.

        Args:
            path (str): Path where the trace will be written.
        r   Ncudaw[zc{{"name": "{}", "ph": "X", "ts": {}, "dur": {}, "tid": {}, "pid": "CPU functions", "args": {{}}}}, z
" node_id:z, thread_id:z "z
{"name": "z", "ph": "s", "ts": z	, "tid": z , "pid": "CPU functions", "id": z, "cat": "cpu_to_z", "args": {}}, r2      ])osr   openwrite
trace_nameformatrW   rX   
elapsed_us	is_remoterO   rR   r@   r<   seektellSEEK_SETtruncate)	r!   pathr   device_namefchrome_eventsnext_idrN   ks	            r%   export_chrome_tracezEventList.export_chrome_trace   s     				$($4Jff$:J$__ ,	MG GGCLLL !! !!>)' (.v,1133"}R

Q#+QQ3:QQQ( (     ! !A GG(cn ( (!$!5( ( #&*( (
 ")( ( +6( ( (	 	 	 qLGG! 4yy1}}qvvxx!|R[111

GGCLLLY,	 ,	 ,	 ,	 ,	 ,	 ,	 ,	 ,	 ,	 ,	 ,	 ,	 ,	 ,	 ,	 ,	 ,	s   EE==FFc                 
    g dS )N)rw   self_cuda_time_totalself_xpu_time_totalself_privateuse1_time_totalr4   r+   s    r%   supported_export_stacks_metricsz)EventList.supported_export_stacks_metrics  s    
 
 
 	
r&   r   metricc           	         ||                                  vr1t          dt          |                                            z             t                              dd          }t	          |d          5 }| D ]}|j        rt          |j                  dk    rt          ||                    dd                              dd                              d	d                    }t          |          dk    rxd
}t          |j                  D ]}||                    |          z  }|dz  } |d d         dz   t          t          |                    z   }|                    |dz              	 d d d            d S # 1 swxY w Y   d S )Nzmetric should be one of: z ;	
____r   r   r   devicexpuprivateuse1 ;rZ    
)r   
ValueErrorstr	maketransr   ro   r<   getattrreplaceintreversed	translater   )	r!   r   r   translate_tabler   rN   metric_value	stack_strentrys	            r%   export_stackszEventList.export_stacks  s   ==????+d::<<==>   --&99$__ 	2 2 29 2SY!!3!3#*vx88 11 99	$ $L <((1,,$&	%-ci%8%8 - -E%)I)III%,II$-crcNS$83s<?P?P;Q;Q$Q		D 01112	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2s   3C<E==FFr   c                 r   | j         sJ t          t                    }dt          t          df         fd}| D ](}| ||||                                       |           )t          |                                | j        | j	        | j
                  }|D ]}|j        d|         |_        |sd|_         |S )aH  Averages all function events over their keys.

        Args:
            group_by_input_shapes: group entries by
                (event name, input shapes) rather than just event name.
                This is useful to see which input shapes contribute to the runtime
                the most and may help with size-specific optimizations or
                choosing the best candidates for quantization (aka fitting a roof line)

            group_by_stack_n: group by top n stack trace entries

        Returns:
            An EventList containing FunctionEventAvg objects.
        return.c                 d   t          | j                  t          | j                  t          | j                  t          | j                  t          | j                  g}|r'|                    t          | j                             |dk    r|| j        d |         z  }t          |          S Nr   )
r   rP   rR   rK   	is_legacyis_user_annotationr`   input_shapesro   tuple)rT   group_by_input_shapesgroup_by_stack_nrP   s       r%   get_keyz'EventList.key_averages.<locals>.get_key=  s    EIEM""E%&&EO$$E,--C % 4

3u122333!##u{#4$4#455::r&   r   r   r   Nr   )r   r   r   r
   r   rA   r   valuesr   r   r    ro   r   )r!   r   r   statsr   rN   avg_lists          r%   key_averageszEventList.key_averages+  s     9DEU9V9V	uSRUX 	 	 	 	  	R 	RC''#46FGGHLLSQQQQLLNN'/'	
 
 
  	& 	&C	"3#3"34CI( &#% r&   c                 R    t                      }| D ]}||z  }d|_        d|_        |S )zVAverages all events.

        Returns:
            A FunctionEventAvg object.
        NTotal)r   rP   )r!   
total_statrN   s      r%   total_averagezEventList.total_averageZ  s>     &''
 	" 	"C#J!JNN 
r&   )Nr{   r|   r}   r~   NF)Fr   )__name__
__module____qualname____doc__r   r,   r0   r)   r(   r*   propertyrw   r/   r   r   r   r   r   r   __classcell__)r$   s   @r%   r   r      s<       11& & & & &       " " "(D- D- D-L# # #4 @ @ X@
   "#(
 (
 (
 (
T7 7 7r
 
 
2# 2s 2 2 2 20- - - -^      r&   r   c                 R    d}d}| |k    r	| |z  ddS | |k    r	| |z  ddS | ddS )+Define how to format time in FunctionEvent.g    .Ag     @@z.3fsmsusr4   )time_usUS_IN_SECONDUS_IN_MSs      r%   _format_timer   h  sb    "LH,L(/////(H$,,,,,r&   c                 P    |dk    r| dk    sJ d|              dS | dz  |z  ddS )r   r   zExpected time_us == 0 but got NaNg      Y@.2f%r4   )r   total_time_uss     r%   _format_time_sharer   s  sI    !|||GgGG|||uo-44444r&   c                     d}d|z  }d|z  }t          |           |k    r| dz  |z  ddS t          |           |k    r| dz  |z  ddS t          |           |k    r| dz  |z  ddS t          |           dz   S )z&Return a formatted memory size string.i         ?r   z Gbz Mbz Kbz b)absr   )nbytesKBMBGBs       r%   _format_memoryr   {  s    	B	B	B
6{{b3,#,,,,,	V		3,#,,,,,	V		3,#,,,,,6{{T!!r&   c                 (     t           fd          S )Nc                 >    t          t          |                     S r.   )r   r   )r!   r>   s    r%   rU   z!_attr_formatter.<locals>.<lambda>  s    gdD.A.A!B!B r&   )r   r>   s   `r%   _attr_formatterr     s    BBBBCCCr&   c                       e Zd ZdZ ed          Z ed          Z ed          Z ed          Z ed          Z	 ed          Z
ed             Zed	             Ze ed
e          d                         ZdS )r   z{Helpers for FunctionEvent and FunctionEventAvg.

    The subclass should define `*_time_total` and `count` attributes.
    cpu_timedevice_timecpu_time_totaldevice_time_totalrw   self_device_time_totalc                 @    | j         dk    rdnd| j        z  | j         z  S Nr   g        r   )countr   r+   s    r%   r   zFormattedTimesMixin.cpu_time  s%    jAooss31D+Dtz+QQr&   c                 @    | j         dk    rdnd| j        z  | j         z  S r   )r   r   r+   s    r%   r   zFormattedTimesMixin.device_time  s%    jAooss31G+G$*+TTr&   z<`cuda_time` is deprecated, please use `device_time` instead.categoryc                     | j         S r.   )r   r+   s    r%   	cuda_timezFormattedTimesMixin.cuda_time  s     r&   N)r   r   r   r   r   cpu_time_strdevice_time_strcpu_time_total_strdevice_time_total_strself_cpu_time_total_strself_device_time_total_strr   r   r   r   FutureWarningr   r4   r&   r%   r   r     s         
 #?:..L%om44O()9::+O,?@@-o.CDD!01I!J!JR R XR U U XU ZF     	  X
     r&   r   c                       e Zd Zd Zd ZdS )r   c                 "    || _         || _        d S r.   )rX   rY   )r!   rX   rY   s      r%   r   zInterval.__init__  s    
r&   c                      | j         | j        z
  S )z4
        Returns the length of the interval
        )rY   rX   r+   s    r%   r   zInterval.elapsed_us  s     x$*$$r&   N)r   r   r   r   r   r4   r&   r%   r   r     s2          % % % % %r&   r   r   )r>   r   durationc                      e Zd ZdZdddddddddddej        ddddddddfdZd Zd Zd	 Z	e
d
             Ze
d             Ze
 ede          d                         Ze
d             Ze
d             Ze
d             Ze
 ede          d                         Ze
d             Ze
 ede          d                         Ze
d             Zd ZdS )r   z.Profiling information about a single function.Nr   FrZ   c                    || _         || _        || _        || _        t	          ||          | _        || _        || _        g | _        d| _	        g | _
        d | _        || _        || _        || _        || _        |	| _        |
| _        || _        || _        || _        || _        || _        || _        || _        ||n|| _        || _        || _        || _        d| _        d| _        d| _        d S )Nr2   rZ   ) idrR   r>   r   r   rW   rO   rq   r@   r   r?   r=   r   concrete_inputskwinputsro   rm   r   cpu_memory_usagedevice_memory_usagerJ   r   rp   rK   device_indexdevice_resource_idr   flopsr   self_cpu_percenttotal_cpu_percenttotal_device_percent)r!   r  r>   rO   start_usend_usrq   r   ro   rm   r   r  r  rJ   r   rp   rR   rK   r  r  r   r  r   r  r  r   s                             r%   r   zFunctionEvent.__init__  s	   8 #	)$,Xv$>$>!)3%'
1337-9*9(0 

)3%5(; &( +'2!-(0FF6H 	  )$)
2D "!#$&!!!r&   c                     | j         t          j        k    sJ | j                            t          |||                     d S r.   )rK   r   rL   r@   r`   r   )r!   r>   r   r	  s       r%   append_kernelzFunctionEvent.append_kernel  s@    :>1111F4::;;;;;r&   c                     | j         t          j        k    sJ t          |t                    sJ |j         t          j        k    sJ | j                            |           dS )zAppend a CPU child of type FunctionEvent.

        One is supposed to append only direct children to the event to have
        correct self cpu time being reported.
        N)rK   r   rL   
isinstancer   r?   r`   )r!   childs     r%   r^   zFunctionEvent.append_cpu_child   sc     :>1111%///// JN2222  '''''r&   c                     | j         t          j        k    sJ t          |t                    sJ |j         t          j        k    sJ || _        dS )a$  Set the immediate CPU parent of type FunctionEvent.

        One profiling FunctionEvent should have only one CPU parent such that
        the child's range interval is completely inside the parent's. We use
        this connection to determine the event is from top-level op or not.
        N)rK   r   rL   r  r   r=   )r!   ri   s     r%   r_   zFunctionEvent.set_cpu_parent  sQ     :>1111&-00000!Z^3333 r&   c                     | j         s| j        t          j        k    rdS | j        t          d | j        D                       z
  S )Nr   c              3   $   K   | ]}|j         V  d S r.   )r  r5   r  s     r%   ry   z6FunctionEvent.self_cpu_memory_usage.<locals>.<genexpr>  s6       +
 +
',E"+
 +
 +
 +
 +
 +
r&   )rJ   rK   r   rL   r  rz   r?   r+   s    r%   self_cpu_memory_usagez#FunctionEvent.self_cpu_memory_usage  s[    = 	D,
>>1$s +
 +
040A+
 +
 +
 (
 (
 
 	
r&   c                     | j         s| j        t          j        k    rdS | j        t          d | j        D                       z
  S )Nr   c              3   $   K   | ]}|j         V  d S r.   )r  r!  s     r%   ry   z9FunctionEvent.self_device_memory_usage.<locals>.<genexpr>%  s6       .
 .
*/E%.
 .
 .
 .
 .
 .
r&   )rJ   rK   r   rL   r  rz   r?   r+   s    r%   self_device_memory_usagez&FunctionEvent.self_device_memory_usage!  s[    = 	D,
>>1'# .
 .
373D.
 .
 .
 +
 +
 
 	
r&   zO`self_cuda_memory_usage` is deprecated. Use `self_device_memory_usage` instead.r   c                     | j         S r.   r%  r+   s    r%   self_cuda_memory_usagez$FunctionEvent.self_cuda_memory_usage)  s     ,,r&   c                 b    | j         t          j        k    r| j                                        S dS r   )rK   r   rL   rW   r   r+   s    r%   r   zFunctionEvent.cpu_time_total1  s+    z~--?--///1r&   c                     | j         s| j        t          j        k    rdS | j        t          d | j        D                       z
  S )Nr   c              3   $   K   | ]}|j         V  d S r.   )r   r!  s     r%   ry   z4FunctionEvent.self_cpu_time_total.<locals>.<genexpr><  s6       )
 )
%*E )
 )
 )
 )
 )
 )
r&   )rJ   rK   r   rL   r   rz   r?   r+   s    r%   rw   z!FunctionEvent.self_cpu_time_total8  s[    = 	D,
>>1"S )
 )
.2.?)
 )
 )
 &
 &
 
 	
r&   c                    | j         s| j        sdS | j        t          j        k    rb| j        s=t          d | j        D                       t          d | j        D                       z   S t          d | j        D                       S | j        t          j	        t          j
        t          j        fv sJ | j                                        S )Nr   c              3   $   K   | ]}|j         V  d S r.   r	  r5   kinfos     r%   ry   z2FunctionEvent.device_time_total.<locals>.<genexpr>G  $      DDe5>DDDDDDr&   c              3   $   K   | ]}|j         V  d S r.   r   )r5   rF   s     r%   ry   z2FunctionEvent.device_time_total.<locals>.<genexpr>G  s>       K K-/B(K K K K K Kr&   c              3   $   K   | ]}|j         V  d S r.   r.  r/  s     r%   ry   z2FunctionEvent.device_time_total.<locals>.<genexpr>L  r1  r&   )rJ   r   rK   r   rL   r   rz   r@   r?   CUDAPrivateUse1MTIArW   r   r+   s    r%   r   zFunctionEvent.device_time_total@  s    = 	 	1z~--> EDDt|DDDDDs K K373DK K K H H  
 DDt|DDDDDD#&(    
 ?--///r&   zA`cuda_time_total` is deprecated. Use `device_time_total` instead.c                     | j         S r.   r3  r+   s    r%   cuda_time_totalzFunctionEvent.cuda_time_totalU  s     %%r&   c                     | j         s| j        sdS | j        t          j        k    r&| j        t          d | j        D                       z
  S | j        t          j        t          j	        t          j
        fv sJ | j        S )Nr   c              3   $   K   | ]}|j         V  d S r.   r3  r!  s     r%   ry   z7FunctionEvent.self_device_time_total.<locals>.<genexpr>b  s6       0 0,1'0 0 0 0 0 0r&   )rJ   r   rK   r   rL   r   rz   r?   r5  r6  r7  r+   s    r%   r   z$FunctionEvent.self_device_time_total]  s    = 	 	1z~--)C 0 0595F0 0 0 - -   #&(    
 ))r&   zK`self_cuda_time_total` is deprecated. Use `self_device_time_total` instead.c                     | j         S r.   r   r+   s    r%   r   z"FunctionEvent.self_cuda_time_totalm  s     **r&   c                     | j         S r.   r   r+   s    r%   rP   zFunctionEvent.keyu  s
    yr&   c                    | j         }| j        }| j        }d                    g d| j         d| j         d| j         d| j         d| j         d| j	        j
         d| j	        j         d	t          d
 | j        D                        d| d| d| j         d| j         dt          | j                   d| j         d| d| d| j         d| j         d| j         d| j         d          S )Nr   z<FunctionEvent id=z name=z device_type=z	 node_id=
 cpu_time=z
 start_us=z end_us=z cpu_children=c                     g | ]	}|j         
S r4   )r  r!  s     r%   r9   z*FunctionEvent.__repr__.<locals>.<listcomp>  s     I I Ie I I Ir&   r   _time=z thread= input_shapes= cpu_memory_usage=_memory_usage=z
 is_async=z is_remote=z seq_nr=z is_legacy=>)r   r   r  joinr  r>   rK   rR   r   rW   rX   rY   r   r?   rO   r   r  rJ   r   rp   r   )r!   r   r   r  s       r%   __repr__zFunctionEvent.__repr__y  s   o*"6y y y y y y y y y y	 y y y yHX y y y ycgco y y y y)y y y y59_5Jy y y yTXTcTgy y y y I It7H I I IJJy y y yMXy y y y`ky y y y Iy y y y (,{y y y y CFdFWBXBXy y y y !% 5	y y y y 9D	y y y y Tg	y y y y
 y y y y
 37.y y y y
 KOJZy y y y
 hlguy y y y y y	
r&   )r   r   r   r   r   rL   r   r  r^   r_   r   r"  r%  r   r  r(  r   rw   r   r9  r   r   rP   rH  r4   r&   r%   r   r     s%       88 N 5<' <' <' <'|< < <	( 	( 	(
! 
! 
! 
 
 X
 
 
 X
 ZY  - -	  X
-   X 
 
 X
 0 0 X0( ZK  & &	  X
& * * X* ZU  + +	  X
+   X
 
 
 
 
r&   r   c                   ,    e Zd ZdZddZd Zd Zd ZdS )	r   z:Used to average stats over multiple FunctionEvent objects.r   Nc                 N   d | _         d| _        d| _        d| _        d| _        d | _        d| _        d| _        d| _        d| _	        d | _
        d | _        d | _        d| _        d| _        d| _        d| _        d | _        d | _        t&          j        | _        d| _        d| _        d S )Nr   F)rP   r   rR   rJ   r   r   r   r   rw   r   r   ro   rm   r  r  r"  r%  r?   r=   r   rL   rK   r   r  r+   s    r%   r   zFunctionEventAvg.__init__  s    "&
#$)-#$&'() +,#7;%)
$(
%&() *+"-.%;?37'1~$


r&   c                    | j         |j         | _         |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |j	        | _	        |j
        | _
        |j        | _        |j        | _        t          |t          t          f          sJ |j         | j         k    sJ | xj        |j        z  c_        | xj        |j        z  c_        | xj        |j        z  c_        | xj        |j        z  c_        | xj        |j        z  c_        | xj        |j        z  c_        | xj        |j        z  c_        | xj        |j        z  c_        | xj        |j        z  c_        | j        |j        | _        n|j        | xj        |j        z  c_        | S r.   )rP   rR   rJ   r   r=   r?   r   ro   rm   rK   r   r   r   r  r   r   r   r   rw   r   r  r  r"  r%  r   r  r!   others     r%   rA   zFunctionEventAvg.add  s   8 yDH =DL!NDM"_DN#.DO % 2D % 2DDJDJ$0D"_DN#.DO&+&>D#%-1A!BCCCCCyDH$$$$u33%"99  E$==  ##u'CC##!77  E$==  ""e&AA""%%)GG%%

ek!

:DJJ[$JJ%+%JJr&   c                 ,    |                      |          S r.   )rA   rL  s     r%   __iadd__zFunctionEventAvg.__iadd__  s    xxr&   c                     | j         sdn| j         }| j        }| j        }| j        }d| j         d| j         d| j         d| d| d| d| dt          | j                   d	| j	         d| d
| dS )Nr   z<FunctionEventAvg key=z self_cpu_time=r@  z  self_rB  r   rC  rD  rE  rF  )
r   r  r   r  rP   r  r   r   r   r  )r!   r   self_device_timer   device_memorys        r%   rH  zFunctionEventAvg.__repr__  s    $(OHff:*0dTX d dd>Z d dfjfw d d d d(8d d;Fd dNYd dilmqm~iid d $ 5d d8Cd dS`d d d	
r&   )r   N)r   r   r   r   r   rA   rO  rH  r4   r&   r%   r   r     s\        DD   0" " "H  	
 	
 	
 	
 	
r&   r   c                       e Zd Zd ZdS )r   c                     t          |          dk    rt          j                            |          n|| |<   | |         S rl   )r<   torch_C	_demangle)r!   rP   s     r%   __missing__zStringTable.__missing__  s:     033xx!||EH&&s+++S	Cyr&   N)r   r   r   rX  r4   r&   r%   r   r     s#            r&   r   c                       e Zd ZdZd Zd ZdS )r   z=Acceleration structure for accessing mem_records in interval.c                     || _         g | _        g | _        t          |          dk    r>t	          d t          |          D                       }t          | \  | _        | _        d S d S )Nr   c                 L    g | ]!\  }}|d                                           |f"S )r   )start_ns)r5   irs      r%   r9   z*MemRecordsAcc.__init__.<locals>.<listcomp>  s-    RRR41a1Q4==??A.RRRr&   )_mem_records_start_nses_indicesr<   r[   rB   zip)r!   mem_recordstmps      r%   r   zMemRecordsAcc.__init__  so    '&(#%{aRR9[;Q;QRRRSSC.13i+Ddmmm  r&   c              #      K   t          j        | j        |dz            }t          j        | j        |dz            }t	          ||          D ]}| j        | j        |                  V  dS )z
        Return all records in the given interval
        To maintain backward compatibility, convert us to ns in function
        i  N)bisectbisect_leftr`  bisect_rightr;   r_  ra  )r!   r  r  	start_idxend_idxr]  s         r%   in_intervalzMemRecordsAcc.in_interval  s{      
 &t'7DII	%d&6FFy'** 	6 	6A#DM!$455555	6 	6r&   N)r   r   r   r   r   rk  r4   r&   r%   r   r     s8        GG8 8 86 6 6 6 6r&   r   c                 B     g d}t           fd|D                       S )N))autograd/__init___make_grads)rm  backward)ztorch/tensorro  )_internal/common_utilsprof_callable)rp  prof_func_call)rp  prof_meth_callc              3   B   K   | ]}|d          v o	|d         v  V  dS )r   r2   Nr4   )r5   r   r   s     r%   ry   z&_filter_stack_entry.<locals>.<genexpr>  s;      OOAaDEM3adem4OOOOOOr&   )all)r   filtered_entriess   ` r%   _filter_stack_entryrw    s;       OOOO>NOOOOOOr&   z[memory]z[OutOfMemory]c                 2    t           t          ddddddg}| |v S )Nz profiler::_record_function_enterz$profiler::_record_function_enter_newzprofiler::_record_function_exitzaten::is_leafzaten::output_nrzaten::_version)MEMORY_EVENT_NAMEOUT_OF_MEMORY_EVENT_NAME)r>   filtered_out_namess     r%   _filter_namer|    s2     	 *.)	 %%%r&   Fc                 d    t                      }||          } |r|                     d          rd} | S )NzProfilerStep#zProfilerStep*)r   
startswith)r>   with_wildcardstring_tables      r%   _rewrite_namer    s;    ==LD #???++ 	#"DKr&   r{   r|   r}   r~   c
                 \  ,-./0 t          |           dk    rdS t          d | D                       }
t          d | D                       }| d         j        }|s|
rt          d          t          d | D                       }&t	          t          | fdd	
          |||          } t          d | D                       dz   }|t          ||          }t          d | D                       dz   }|t          ||          }d}|}d}g }| D ];}|j        2t          |j                  dk    r|	                    |j                   <t          |          dk    }|r.t          d |D                       dz   }|t          ||          }g d}||
                                nd}|
r&|                    d| d| d| d| dg           |r9|                    ddg           |r |r|                    | dd| dg           |	                    d           t          d | D                       }|r|	                    d           d,dg0dg-, g.d2,-.0fd	}d  } ||           |d!d         D ]} ||           |r |	                    d"            ||           |r"|	                    d#            ||d$%           |rg }| D ]'}|j        dk    r|	                    |j                   (t          |          dk    r? |t          |                    \  }}|	                    d&|             ||           nd'}0d         }-d         } .d         }!d}g //fd(}"d}#d}$| D ]n}|#|j        z  }#|j        t          j        k    r|j        r|$|j        z  }$3|j        t          j        t          j        t          j        fv r|j        s
|$|j        z  }$o| |"d)|!z              |"|           |	r |"d)|!z              |"d*            |"|             |" |j        |             |"|            d+ }%d}&| D ]}|&|k    r n|	r|j        |&d!z  }&|j        }'|&t          |'          |d,z
  k    r|'d|d,z
           d-z   }'t5          |j        |#          |_        |j        st5          |j        |#          nd|_        |'|j        |j        |j        |j         |j!        g}(|
rGt5          |j        |$          |_"        |(                    |j#        |j"        |j$        |j%        g           |rz|(                    tM          |j'                  tM          |j(                  g           |r=|r;|(                    tM          |j)                  tM          |j*                  g           |(	                    |j+                   |r|(	                    |j,                   |r/|(	                    t[          |j.                  d|                    |r@|j        dk    r|(	                    d.           n|(	                    |j        |z  d/           |rFd})t          |j                  dk    r |%|j        d         |          })|(	                    |)            |" |j        |(            |rqdgt          |          d!z
  z  }*|j        d!d         D ]#}+ |" |j        |* |%|+|          gz               $|*	                    d            |" |j        |*             |"|             |"d0t_          |#                      |
r4 |"d||
                                nd d1t_          |$                      d0                    /          S )3zUPrint a summary of events (which can be a list of FunctionEvent or FunctionEventAvg).r   r   c              3   ,   K   | ]}|j         d k    V  dS r   Nr=  rx   s     r%   ry   z_build_table.<locals>.<genexpr>0  s*      OOu%6:OOOOOOr&   c              3   ,   K   | ]}|j         d k    V  dS r  r'  rx   s     r%   ry   z_build_table.<locals>.<genexpr>1  s*      PP7!;PPPPPPr&   z9use_device is None, but there is device performance data.c              3   X   K   | ]%}|j         d uot          |j                   dk    V  &d S r   )r   r<   rx   s     r%   ry   z_build_table.<locals>.<genexpr>9  sS         
	4	'	GC0B,C,Ca,G     r&   Nc                     t          |                     dd                              dd                              dd                    S )Nr   r   r   r   )r   r   )rN   r   s    r%   rU   z_build_table.<locals>.<lambda>B  sC    OOFH55WUH--W]H55	! ! r&   T)rP   reverser   c              3   >   K   | ]}t          |j                  V  d S r.   )r<   rP   rM   s     r%   ry   z_build_table.<locals>.<genexpr>O  s*      ;;SCLL;;;;;;r&      c              3   X   K   | ]%}t          t          |j                            V  &d S r.   )r<   r   r   rM   s     r%   ry   z_build_table.<locals>.<genexpr>S  s5      KKSc#c&6"7"788KKKKKKr&      c              3   H   K   | ]}t          d  |D                       V  dS )c              3   4   K   | ]}t          |          V  d S r.   r<   )r5   r   s     r%   ry   z)_build_table.<locals>.<genexpr>.<genexpr>b  s(      225CJJ222222r&   N)max)r5   ro   s     r%   ry   z_build_table.<locals>.<genexpr>b  s9      GGu22E22222GGGGGGr&   )Namez
Self CPU %zSelf CPUzCPU total %z	CPU totalzCPU time avgNonezSelf z %z totalz	 time avgzCPU MemzSelf CPU Memz Memz
# of Callsc              3   ,   K   | ]}|j         d k    V  dS )rZ   N)rR   rM   s     r%   ry   z_build_table.<locals>.<genexpr>  s)      ==s*======r&   zNode IDr   rF  c                     dxx         d|z   t          |           z   dz   dz  z   z  cc<   dxx         d| z  dz  z   z  cc<   dxx         | z   z  cc<   d S )Nr   z{: }r   -)r   )paddingtext_dirSPACING_SIZEheader_sep_lstline_length_lstrow_format_lsts     r%   
add_columnz _build_table.<locals>.add_column  s    qHs7||+c1S<5GH	
 	qS7]cL.@AAg44r&   c                 d   g d}| dk    sJ t          dt          t          j        |           dz  t	          t          |          dz
                                }|dk    r|t          |          k     sJ t          dt          j        |          dz            |t          |                   fS )N)FLOPsKFLOPsMFLOPsGFLOPsTFLOPsPFLOPsr      r2   
   g      )	r  minmathlog10floatr<   powfloorr   )r  flop_headers	log_flopss      r%   auto_scale_flopsz&_build_table.<locals>.auto_scale_flops  s    
 
 
 qyyyy3tz%0014eC<M<MPQ<Q6R6RSSTT	A~~)c,.?.?"?"?"?"?BI..577c)nn9UVVr&   r2   zInput ShapeszSource Location<)r  zTotal Fc                 \                         |                                 d           d S )Nr   )r`   )r   results    r%   r`   z_build_table.<locals>.append  s-    adr&   =z1This report only display top-level ops statisticsc                     t          |           |k    r<t          |           |z
  }| |d          } t          |           dk    rd| dd          z   } | S )Nr  ...r  )r   src_column_widthoffsets      r%   	trim_pathz_build_table.<locals>.trim_path  sX    t99'''YY!11F=D4yy1}}tABBx'r&   r  r  z--z8.3fzSelf CPU time total: z time total: )rF  )1r<   anyr   RuntimeErrorr   r[   r  r  ro   r`   upperrD   r  rw   rK   r   rL   r   r   r5  r6  r7  r   r   r=   rP   r   r  rJ   r   r  r  r  r   r  r  r  r   r   r  r"  r  r%  r   rR   r   r   r   rG  )1rb   r   r   r   r   r   r   r   r   r   has_device_timehas_device_memr   has_input_shapesname_column_widthshapes_column_widthDEFAULT_COLUMN_WIDTHflops_column_widthr  stacksrN   	has_stackheadersr   append_node_idr  r  _	raw_flopsflops_scaleflops_header
row_format
header_sepline_lengthr`   sum_self_cpu_time_totalsum_self_device_time_totalr  event_limitr>   
row_values	src_fieldempty_headersr   r  r  r  r  r  s1    `                                          @@@@@r%   r   r      s
    6{{arOOOOOOOOPPPPPPPN%J  X/ XVWWW      
     	 	 	 ")!
 
 
  ;;F;;;;;a?( 13HIIKKFKKKKKaO*!"57NOO-F % %9 S^^a%7%7MM#)$$$FaI KGGGGGGG!K 	  +"#35IJJ  G )3(>*""$$$FK 
%%%''''&&&)))		
 	
 	
  	
 	
 	
  	. 	NN"(((-K---   NN<   ==f=====N "y!!! LTNTN$}oO5 5 5 5 5 5 5 5 5W W W J !!!QRR[ ) )
'(((( (~&&&
&''' 3()))
#c2222 	 	, 	,Cy1}}  +++y>>Q*:*:3y>>*J*J'[,NN2L22333J)****J"J"J!!$KJ F      !" E E3#::?jn,,,&#*DD&&O&  * '#*DD& s[ !!!v Ds[ !!!BCCC
F:
F:g&'''
F:   K X6 X6)##E  	S^%?1Kw ,T>SVW>W1W1W501456>D1#%< 
  

 <s13JKKK 	  '!"	

  	'9*,F( (C$ 2,-'    	 #3#788"3#<==	    n !! 's'>??&s'CDD	   	I	
 	
 	
  	+ck*** 	Kc#"2334H5H4HIJJJ 	EyA~~!!$''''!!SY%<"C"CDDD 	)I39~~!!%Icil4DEE	i((( z *-... 		6DCLL1$45M122  %J%'99U<L+M+M*NN   
   $$$F$:$m4555
F:
FJ<0G#H#HJJKKK 
F**@J$$&&&f F F'(BCCF F	
 	
 	
 776??r&   )F)	NNr{   r|   r}   r~   FFF)'rf  r\   r  collectionsr   r   operatorr   typingr   r   r   r	   r
   typing_extensionsr   rU  torch.autogradr   __all__listr   r   r   r   r   r   r   r   r   r   r   r   rw  ry  rz  r|  r  r   r4   r&   r%   <module>r     s         / / / / / / / /       3 3 3 3 3 3 3 3 3 3 3 3 3 3 ( ( ( ( ( (  % % % % % %	 	 	K K K K K K K K\
  5 5 5" " "D D D               <	% 	% 	% 	% 	% 	% 	% 	% 
H<<<	=	=I
 I
 I
 I
 I
' I
 I
 I
XK
 K
 K
 K
 K
* K
 K
 K
\    +   6 6 6 6 6 6 6 6,	P 	P 	P  * & & &&    v v v v v vr&   