
    Χgy6                        d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
 d dlmZ d dlmZ e
rd dlmZ d d	fd
efdZ e j        ed d          Z e j        ed d	          Ze G d d                      Ze G d d                      Z G d d          Z G d d          ZddZd d dfdZd Zd ZdS )    N)deque)	dataclass)DictListTYPE_CHECKINGprofile)
DeviceType)_KinetoEventc                     | j         S N)childrenxs    Q/var/www/html/ai-engine/env/lib/python3.11/site-packages/torch/profiler/_utils.py<lambda>r      s    1:     Freversec              #      K   |rt           nd }t           ||                     }|r? ||          }|V   | ||                    D ]}|                    |           |=d S d S )Nc                     | S r    r   s    r   r   z_traverse.<locals>.<lambda>   s    q r   )reversedr   append)treenext_fnchildren_fnr   order	remaining
curr_eventchild_events           r   	_traverser!      s      0HH[[EeeDkk""I
 *WY''
 5Z!8!899 	* 	*K[))))	  * * * * *r   c                 *    |                                  S r   )popr   s    r   r   r      s    aeegg r   T)r   r   c                 *    |                                  S r   )popleftr   s    r   r   r      s     r   c                   ^    e Zd ZU dZeed<   dZeed<   dZeed<   dZeed<   e	d             Z
dS )EventMetricsr   duration_time_nsself_time_nsidle_time_nsqueue_depthc                 :    | j         dk    rdS | j        | j         z  S )Nr   g        )r(   r*   selfs    r   fraction_idle_timezEventMetrics.fraction_idle_time(   s&     A%%3 4#888r   N)__name__
__module____qualname__r(   int__annotations__r)   r*   r+   propertyr/   r   r   r   r'   r'   !   sp         cL#L#K9 9 X9 9 9r   r'   c                   2    e Zd ZU eed<   eed<   dZeed<   dS )Intervalstartendr   r+   N)r0   r1   r2   r3   r4   r+   r   r   r   r7   r7   /   s4         JJJ	HHHKr   r7   c                   >    e Zd Zd Zd Zd Zd Zdee         fdZ	dS )EventKeyc                     || _         d S r   event)r.   r>   s     r   __init__zEventKey.__init__7   s    


r   c                 4    t          | j        j                  S r   )hashr>   idr-   s    r   __hash__zEventKey.__hash__:   s    DJM"""r   c                 6    | j         j        |j         j        k    S r   )r>   rB   )r.   others     r   __eq__zEventKey.__eq__=   s    z}..r   c                     | j         j         S r   )r>   namer-   s    r   __repr__zEventKey.__repr__@   s    */##r   	intervalsc                 b   d}t          |d           }|rXt          | j        j        |d         j                  }t          | j        j        |d         j                  }||k     r|||z
  z  }d\  }}|t          |          k     r||         }||         }|dz  }|j        |j        k    r$|j        |j        k    r|dz  }N|j        |_        |}t          | j        j        |j                  }t          | j        j        |j                  }||k     r|||z
  z  }|t          |          k     |S )Nr   c                     | j         S r   r8   r   s    r   r   z,EventKey.intervals_overlap.<locals>.<lambda>E   s    AG r   key)r      rP   )	sortedmaxr>   start_time_nsr8   minend_time_nsr9   len)	r.   rJ   overlap_timeoverlap_startoverlap_endijprev_intervalcurr_intervals	            r   intervals_overlapzEventKey.intervals_overlapC   sK   9*;*;<<<	 	<
 8)A,:LMMMdj4il6FGGK{**m ;;1#i..  %aLM%aLMFA =#666 $}'888FA*7*;M'A
 8-:MNNMdj4m6GHHK{**m ;;! #i..  $ r   N)
r0   r1   r2   r?   rC   rF   rI   r   r7   r^   r   r   r   r;   r;   6   so          # # #/ / /$ $ $4>      r   r;   c                   D    e Zd ZdefdZd Zd Zd Zd Zdd	e	d
e
fdZdS )BasicEvaluationprofc                 N   || _         i | _        |                                  t          d | j                                        D             d           | _        d | j        D             | _        g | _        |                                 | _	        | 
                                 d S )Nc              3      K   | ]}|V  d S r   r   .0es     r   	<genexpr>z+BasicEvaluation.__init__.<locals>.<genexpr>j   s"      ,,1Q,,,,,,r   c                     | j         j        S r   )r>   rS   r   s    r   r   z*BasicEvaluation.__init__.<locals>.<lambda>j   s    AG<Q r   rN   c                     g | ]	}|j         
S r   r=   rd   s     r   
<listcomp>z,BasicEvaluation.__init__.<locals>.<listcomp>l   s    8881qw888r   )r	   metricscompute_self_timerQ   keys
event_keyseventscuda_eventscompute_queue_depthqueue_depth_listcompute_idle_time)r.   ra   s     r   r?   zBasicEvaluation.__init__e   s    57    ,,))++,,,2Q2Q
 
 
 98888/1 $ 8 8 : :     r   c                    | j         j        J t          | j         j                                                  }|r|                                }|j        }|j        D ]!}||j        z  }|                    |           "t          |          | j	        vsJ d|j
         d|j                     t          |          | j	        t          |          <   |j        | j	        t          |                   _        |dS dS )zM
        Computes event's self time(total time - time in child ops).
        NzDuplicate id: z, )r)   )r	   kineto_resultsr   experimental_event_treer#   r(   r   r   r;   rk   rB   rH   r'   )r.   stackr   	self_timer    s        r   rl   z!BasicEvaluation.compute_self_timeq   s    |*666dl1IIKKLL  	=J"3I)2 * *[99	[))))$$DL888B
BBBB 9881=91U1U1UDL*--. ",!< L$$  	= 	= 	= 	= 	=r   c                    | j         j        J | j         j                                        }d d t          fd|D             d           }t          fd|D             d           }t          ||z   d	           | _        i }d
}|D ]"t          |fd|          }||<   ||n|}#d
}d}||z   | j        z   }	d }
g }|	                    |
           |	D ]}t          |d          rW|                                dz  }|                                |	                                z   dz  }||v r||         ||         }t          |d          rR|
                                }|
                                |                                z   }||v r||         ||         }nt          |d          r|j        }|j        }|t          |          k     rT||         
                                |k    r6|dz  }|t          |          k     r||         
                                |k    6||z
  dz   }t          |d
          }t          |d          st          |d          r&|                    t#          |||                     t          |d          r|| j        t'          |                   _        |S )z
        Computes queue_depth at each event. This will calculate the queue depth data for
        All the events in the tree.
        This will return a list of Interval of queue depth data of cuda launch and kernels.
        Nc                     | j         dk    S )NcudaLaunchKernel)rH   rf   s    r   is_cuda_launch_kernelzBBasicEvaluation.compute_queue_depth.<locals>.is_cuda_launch_kernel   s    6///r   c                 |    |                                  t          j        k    od| j                                        vS )Nmem)device_typer
   CUDArH   lowerr|   s    r   is_cuda_kernelz;BasicEvaluation.compute_queue_depth.<locals>.is_cuda_kernel   s+    ==??jo5U%qv||~~:UUr   c              3   2   K   | ]} |          |V  d S r   r   )re   rf   r}   s     r   rg   z6BasicEvaluation.compute_queue_depth.<locals>.<genexpr>   s4      DD1+@+@+C+CDQDDDDDDr   c                 *    |                                  S r   start_nsr   s    r   r   z5BasicEvaluation.compute_queue_depth.<locals>.<lambda>       !**,, r   rN   c              3   2   K   | ]} |          |V  d S r   r   )re   rf   r   s     r   rg   z6BasicEvaluation.compute_queue_depth.<locals>.<genexpr>   s2      ==1>>!+<+<=Q======r   c                 *    |                                  S r   r   r   s    r   r   z5BasicEvaluation.compute_queue_depth.<locals>.<lambda>   r   r   c                 *    |                                  S r   r   r   s    r   r   z5BasicEvaluation.compute_queue_depth.<locals>.<lambda>   s    1::<< r   r   c                 X    |                                                                   k    S r   )linked_correlation_id)r   cuda_launch_events    r   r   z5BasicEvaluation.compute_queue_depth.<locals>.<lambda>   s'    !1133$::<<= r   rM   c                     t          | d          r|                                 dz  S t          | d          r|                                 S t          | d          r| j        S t	          d          )Nstart_us  r   rS   zUnknown Event Type)hasattrr   r   rS   	Exceptionr=   s    r   new_old_event_comparatorzEBasicEvaluation.compute_queue_depth.<locals>.new_old_event_comparator   su    uj)) /~~''$..uj)) (~~'''uo.. +**0111r   r   r   r   rS   rP   )r	   ru   ro   rQ   rp   index_of_first_matchsortr   r   duration_usr   duration_nsrS   rU   rV   rR   r   r7   rk   r;   r+   )r.   cuda_event_listcuda_launch_eventscuda_kernel_eventskernel_mappinglast_mapped_kernelindexcurrent_kernel_indexspawned_kernel_index
all_eventsr   rr   r>   
start_timeend_timecurrent_queue_depthr   r   r}   s                   @@@r   rq   z#BasicEvaluation.compute_queue_depth   s    |*666,5<<>>	0 	0 	0	V 	V 	V $DDDDDDD&&
 
 
 $=======&&
 
 

 "!339O9O
 
 
 35!3 	T 	T("= = = =(	  E 16N,-*/*;AS !'*<<t{J
	2 	2 	2 ,.4555  	P  	PEuj)) A"^^--4
!NN,,u/@/@/B/BBdJN**~e/D/P+9%+@(uj)) -"^^--
 >>++e.?.?.A.AAN**~e/D/P+9%+@(00 -"0
 , %s+='>'>>>'(<=FFHH  %)$	 %s+='>'>>>'(<=FFHH  #79M"MPQ"Q"%&91"="=uj)) PWUJ-G-G P ''Z3FGG    00 P<OXe__-9r   c                 p   d}d}g }| j         rj| j        rc|t          | j        d         j        | j         d         j                  t          | j         d         j        | j        d         j                  gz  }| j         D ]O}|j        dk    r|s	|j        }d}|j        dk    r,|r*|                    t          ||j                             d}Pd | j	        
                                D             }|D ]A}t          |                              |          | j	        t          |                   _        BdS )z4
        Computes idle time of the profile.
        Fr   r   Tc                     g | ]	}|j         
S r   r=   rd   s     r   rj   z5BasicEvaluation.compute_idle_time.<locals>.<listcomp>   s    ;;;!ag;;;r   N)rr   ro   r7   rS   r8   r9   rU   r+   r   rk   rm   r;   r^   r*   )r.   idle
idle_startidle_intervals
data_point
event_listr>   s          r   rs   z!BasicEvaluation.compute_idle_time   sU   
 
)+  	T[ 	Q5t7LQ7O7UVV.r26B8STT N
 / 	 	J%**4*'^
%))d)%%hz:;K&L&LMMM;;t|'8'8':':;;;
 	0 	0E9A: :// L%)66	0 	0r   c                 l    ddl }t          t           j                            }d |D             }dd}g d}|t	          |          k     r||         k    r|dz  }%t          |dz   t	          |                    D ]x}t          |fd|          }t          |||          }	|	M||	         |k    rA                    t          ||	         j
        ||         j
                             ||n|} ny|dz  }|t	          |          k     Èfd	 j                                        D             }
|
r|                     fd
|
D             |j                  }|                     fd|
D             |j                  }||                    |          z
  |                    |          z  }||                    |          z
  |                    |          z  }|d|z  z   }d t#          t%          ||
          t'          j        d          d          D             }
|
d|         }
|
S )a  
        Filter and Rank the events based on some heuristics:
        1) Events that are in the falling phase of the queue depth.
        2) Events that have a high idle_time, self_time difference.

        Parameters:
            length: The number of events to return.
        r   Nc                     g | ]	}|j         
S r   )r+   rd   s     r   rj   z/BasicEvaluation.rank_events.<locals>.<listcomp>  s    ===qQ]===r      rP   c                     | k    S r   r   )r   bottom_threasholds    r   r   z-BasicEvaluation.rank_events.<locals>.<lambda>  s    .?)? r   rM   )r8   r9   c                 >    g | ]}|                               |S r   )r^   )re   r>   decrease_intervals     r   rj   z/BasicEvaluation.rank_events.<locals>.<listcomp>,  s>     
 
 
&&'899

 
 
r   c                 4    g | ]}j         |         j        S r   )rk   r)   re   r>   r.   s     r   rj   z/BasicEvaluation.rank_events.<locals>.<listcomp>3  s#    JJJee$1JJJr   )dtypec                 4    g | ]}j         |         j        S r   )rk   r/   r   s     r   rj   z/BasicEvaluation.rank_events.<locals>.<listcomp>7  s#    PPPEe$7PPPr   g333333?c                     g | ]\  }}|S r   r   )re   _r>   s      r   rj   z/BasicEvaluation.rank_events.<locals>.<listcomp>?  s,       Au   r   T)rO   r   )torchlistr   rr   rV   ranger   argmaxr   r7   r8   rk   rm   tensorfloat32meanstdrQ   zipoperator
itemgetter)r.   lengthr   rr   	qd_valuestop_threasholdrZ   r[   next_minimum_idxpeak_idxr   rx   	idle_timenormalized_gainnormalized_selfheuristic_score_listr   r   s   `               @@r   rank_eventszBasicEvaluation.rank_events  s    	)> ? ?@@==,<===	#i..  |///Q1q5#i..11   $8????q$ $ $  ")1:JKKK 'Ih,?>,Q,Q%,, ,X6<>Nq>Q>W   
 -=,H((aAEFA+ #i..  .
 
 
 
**,,
 
 


  	-JJJJzJJJm %  I PPPPZPPPm %  I  )5::i+@+@@EIIiDXDXXO(5::i+@+@@EIIiDXDXXO#2S?5J#J   &,j99 +A.. ! ! !  J $GVG,Jr   rP   Tr   print_enablec                                            |          }|s|S |rdnd}|d                     fd|D                       z  }|rt          |           |S )NzOptimizable events:
zNo events to optimize

c                 |    g | ]8}d  d| dt          |j                   dj        |         j        dz  ddd  	9S )zP--------------------------------------------------------------------------------z
Event:                z
Source code location: z
Percentage idle time: d   z.2fz%
)source_code_locationr>   rk   r/   r   s     r   rj   z:BasicEvaluation.get_optimizable_events.<locals>.<listcomp>Q  s            +EK88  |E*=C   	   r   )r   joinprint)r.   r   r   r   outputs   `    r   get_optimizable_eventsz&BasicEvaluation.get_optimizable_eventsJ  s    %%f--
 	,6U((<U$))    (  	
 	
 		
  	&MMMr   N)rP   T)r0   r1   r2   r	   r?   rl   rq   rs   r   r3   boolr   r   r   r   r`   r`   d   s        
!W 
! 
! 
! 
!= = =,\  \  \ |0 0 08G G GR S D      r   r`   c                     ||t          |           k    rt          |           }t          ||          D ]} || |                   r|c S d S r   )rV   r   )seq	predicater8   r9   rZ   s        r   r   r   _  sb    
{cSXXoo#hh5#  9SV 	HHH	4r   c                     | S r   r   r   s    r   r   r   h  s    a r   c                     | ||         } t          |           dk    rd S |                     t          | |                    |z   S )Nr   rN   )rV   r   rR   )r   rO   r8   r9   s       r   r   r   h  sG    
eCi.C
3xx1}}t99S#&&&''%//r   c                 `    | +t          j        d| j                  }|| j        } &| j        S dS )Nz
\.py\(.*\)zNo source code location found)researchrH   parent)r>   matchs     r   r   r   o  s8    

	-44=LEz**r   c                  Z    ddl m}   |             5  	 d d d            d S # 1 swxY w Y   d S )Nr   r   )torch.autograd.profilerr	   r   s    r   _init_for_cuda_graphsr   }  s    //////	                   s    $$)r   N)	functoolsr   r   collectionsr   dataclassesr   typingr   r   r   r   r	   torch.profilerr
   torch.autogradr   r   r!   partialtraverse_dfstraverse_bfsr'   r7   r;   r`   r   r   r   r   r   r   r   <module>r      s/        				       ! ! ! ! ! ! , , , , , , , , , , + + + + + + % % % % % %  ,++++++ *>)=u * * * * * * !y 4E4EtTTT y ,,e  
 
9 
9 
9 
9 
9 
9 
9 
9        + + + + + + + +\x x x x x x x xv     Kqd 0 0 0 0+ + +    r   