
    קg2                       U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZmZmZmZmZ d dlmZ d dlmZ erd dlmZ d ad ad ag ad	ed
<   g Zded<   d ad aej         G d d                      Zg aded<   d a d a!d a"d Z#e G d d                      Z$d Z% G d d          Z&i Z'ded<   e G d d                      Z(e()                    dg d           e()                    dg d           e()                    dg d            e()                    d!g d"           e()                    d#g d$           d% Z*d& Z+d' Z,d( Z-d) Z.d* Z/d+ Z0d, Z1d- Z2d. Z3d/ Z4e	d5d2            Z5d3 Z6d4 Z7dS )6    )annotationsN)	dataclass)	lru_cache)DictListSetTupleTYPE_CHECKING)config)get_benchmark_name)BaseSchedulerNodez#List[Tuple[BaseSchedulerNode, int]]nodes_num_elemz%List[Tuple[BaseSchedulerNode, float]]node_runtimesc                  (    e Zd ZU ded<   dZded<   dS )CppOuterLoopFusedCountintinner_kernel_numberr   local_buffer_numberN)__name__
__module____qualname____annotations__r        S/var/www/html/ai-engine/env/lib/python3.11/site-packages/torch/_inductor/metrics.pyr   r   (   s0                r   r   zList[CppOuterLoopFusedCount]!cpp_outer_loop_fused_inner_countsc                     da dadat                                           t
                                           dadat                                           da	da
dad S )Nr   )generated_kernel_countgenerated_cpp_vec_kernel_countnum_bytes_accessedr   clearr   ir_nodes_pre_fusioncpp_to_dtype_countr   num_comprehensive_padding)num_matches_for_scatter_upon_const_tensornum_loop_reorderingr   r   r   resetr'   8   sp     %&"%++--- !01-r   c                  P    e Zd ZU dZded<   ded<   ded<   ded<   ded<   ded<   d	S )
CachedMetricsDeltasz]
    The subset of metrics we want update across cache hits, e.g., the
    FxGraphCache.
    r   r   r   r"   r#   r    r%   N)r   r   r   __doc__r   r   r   r   r)   r)   P   sf          
  ''''222222r   r)   c                 H    d t          j        t                    D             S )Nc                    g | ]	}|j         
S r   name).0fields     r   
<listcomp>z%get_metric_fields.<locals>.<listcomp>`   s    LLL5EJLLLr   )dataclassesfieldsr)   r   r   r   get_metric_fieldsr4   _   s"    LLK$67J$K$KLLLLr   c                  :    e Zd ZdZd
dZddZedd            Zd	S )CachedMetricsHelperz
    A helper class to help calculate and apply counter deltas for those
    metrics we want to save with cache entries (e.g., FxGraphCache) and
    apply on a cache hit.
    returnNonec                n    i | _         t                      D ]}t                      |         | j         |<   d S N)cached_metricsr4   globals)selfmetrics     r   __init__zCachedMetricsHelper.__init__j   sB     ')) 	< 	<F*1))F*;D''	< 	<r   r)   c                    i }t                      D ]'}t                      |         | j        |         z
  ||<   (t          di |S )Nr   )r4   r<   r;   r)   )r=   delta_metricsr>   s      r   
get_deltaszCachedMetricsHelper.get_deltaso   sU    ')) 	T 	TF$+IIf$58KF8S$SM&!!"33]333r   deltac                |    t                      D ],}t                      |xx         t          | |          z  cc<   -d S r:   )r4   r<   getattr)rC   r>   s     r   apply_deltasz CachedMetricsHelper.apply_deltasv   sN    ')) 	8 	8FIIf!7!77	8 	8r   N)r7   r8   )r7   r)   )rC   r)   )r   r   r   r*   r?   rB   staticmethodrF   r   r   r   r6   r6   c   sf         < < < <
4 4 4 4 8 8 8 \8 8 8r   r6   zDict[str, MetricTable]REGISTERED_METRIC_TABLESc                  `    e Zd ZU ded<   ded<   dZded<   d Zd	 Zd
 Zd Ze	d             Z
dS )MetricTablestr
table_namez	List[str]column_namesr   r   num_rows_addedc                R   | j         t                      vrd S  |            t          | j                  t                    k    s.J t          | j                   dt                                 t	          | j                  t	                                                    k    s@J t	          | j                   dt	                                                                 t                      g}|fd| j        D             z  }|                     |           d S )Nz v.s. c                     g | ]
}|         S r   r   )r/   column_namerow_dicts     r   r1   z'MetricTable.add_row.<locals>.<listcomp>   s    KKK+%KKKr   )rL   enabled_metric_tableslenrM   setkeysr   
_write_row)r=   row_fnrowrR   s      @r   add_rowzMetricTable.add_row   sC   ?"7"9"999F6884$%%*
 *
 
 
 
$#$$;;CMM;;
 
 
 4$%%MMOO*
 *
 
 
 
$#$$BBC,@,@BB
 
 

   
 	KKKK9JKKKKr   c                    d| j          dS )Nmetric_table_z.csv)rL   )r=   s    r   output_filenamezMetricTable.output_filename   s    4t4444r   c                    |                                  }t          |d          5 }t          j        |d          }|                    dg| j        z              d d d            d S # 1 swxY w Y   d S )Nw
lineterminator
model_name)r]   opencsvwriterwriterowrM   )r=   filenamefdrf   s       r   write_headerzMetricTable.write_header   s    ''))(C   	@BZ4888FOO\NT->>???	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@s   5A''A+.A+c                   |                                  }| j        dk    r3t          j                            |          s|                                  | xj        dz  c_        t          |          D ]+\  }}t          |t                    r|d}n|d}n|}|||<   ,t          |d          5 }t          j        |d          }|                    |           d d d            d S # 1 swxY w Y   d S )Nr      z.6f ar`   ra   )r]   rN   ospathexistsrj   	enumerate
isinstancefloatrd   re   rf   rg   )r=   rY   rh   idxorig_valnew_valri   rf   s           r   rW   zMetricTable._write_row   sF   ''))!##BGNN8,D,D#q &s^^ 	 	MC(E** #%++!"CHH(C   	!BZ4888FOOC   	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	!s   .,C''C+.C+c                :    t          | |          }|t          | <   d S r:   )rJ   rH   )r.   rM   tables      r   register_tablezMetricTable.register_table   s"    D,//). &&&r   N)r   r   r   r   rN   rZ   r]   rj   rW   rG   rz   r   r   r   rJ   rJ      s         OOON  $5 5 5@ @ @! ! !( / / \/ / /r   rJ   slow_fusion)kernel1_pathkernel1_latencykernel2_pathkernel2_latencyfused_kernel_pathfused_kernel_latencyslow_down_ratiograph_stats)graph_idnum_nodes_before_fusionnum_nodes_after_fusionpersistent_red_perf)kernel1_namekernel2_namer}   r   
size_hintsreduction_hintspeedup'fusion_failure_due_to_indexing_mismatch)pre_grad_graph_idpost_grad_graph_id
node1_name
node2_namenode1_debug_strnode2_debug_strcommon_buffer_namesfailure_reasonkernel_metadatakernel_namekernel_pathkernel_categoryr   r   line_of_codenum_load	num_storenum_for_loopnum_atomic_addnum_argsxnumelynumelrnumelkernel_args_num_gbc                    ddl m} ddlm} |                    |           } ||          }t          j        |j        j                  S )z
    The kernel_module_code is the python module that contains kernel function code.
    kernel function is the proper triton kernel function annotated with
    @triton.jit
    rl   )PyCodeCache)get_triton_kernel)	codecacher   wrapper_benchmarkr   loadinspect	getsourcefn)kernel_module_coder   r   modkernels        r   _parse_kernel_fn_coder     sc     '&&&&&444444


-
.
.Cs##F VY\***r   c                D    t          |                                           S )zJ
    Return the line of code for the kernel excluding the decorators.
    )rT   
splitlines)proper_kernel_fn_codes    r   _parse_kernel_line_of_coder     s     $//11222r   c                ~    |dk    rd S t          j        d|           }|s
J d            |                    d          S )Nforeachzsize_hints=(\[[0-9, ]*\]),zsize_hints missing!rl   researchgroup)r   r   ms      r   _parse_size_hintsr   #  sJ    )##t
	/1CDDA######771::r   c                z    | dvrd S t          j        d|          }|s
J d            |                    d          S )N)	reductionpersistent_reductionz$reduction_hint=ReductionHint\.(\w*),z/reduction_hint not found in kernel source code!rl   r   )r   r   r   s      r   _parse_reduction_hintr   ,  sK    CCCt
	9;MNNA??????771::r   c                ,    |                      |          S r:   )count)r   patterns     r   _count_patternr   4  s     &&w///r   c                   |                                  d         }|                    d          sJ |                    d          }|                    d          }||dz   |         }|                    d          }t	          |          S )Nr   def (z):rl   ,)r   
startswithindexsplitrT   )r   def_line	start_idxend_idxdecl_csvcompss         r   _count_argsr   8  s    $//11!4Hv&&&&&s##InnT""G	A/0HNN3Eu::r   c                @    |                      d          }| |d         S )z
    Skip decorators.
    r   N)r   )kernel_fn_code	start_poss     r   _parse_proper_kernel_fn_coder   B  s&     $$V,,I)**%%r   c                ~    t          j        | d|           }|r"t          |                    d                    S d S )Nz
 = ([\d]+)rl   )r   r   r   r   )r   numel_arg_namer   s      r   _parse_numelr   J  s@    
	^0002GHHA 1771::tr   c                z    t          j        d|           }|r"t          |                    d                    S 	 dS )z
    inductor meta looks like:
        inductor_meta={... 'mutated_arg_names': [], 'no_x_dim': False, 'kernel_num_gb': 2.0},
    z.kernel_num_gb.:\s*([0-9.]+)rl   N)r   r   rt   r   )r   r   r   s      r   _parse_kernel_args_num_gbr   R  sB    
 		1>BBA 
QWWQZZ   	 tr   c           
     *   	 ddl m}  ||          t          |          t          |          	t	          |          t                    t                    t          d                               	fd           dS )z
    An utility to log kernel metadata. We may parse metadata from kernel source code here.

    It's fine to parse the generated kernel code here since the logging is
    disabled by default. It would hurt compilation time.
    rl   )"get_kernel_category_by_source_coder   c                 "    t          d          t          d          t          d          t          d          t                    t          d          t          d          t          d          t                     dS )	Nztl.loadztl.storezfor ztl.atomic_addr   r   r   r   )r   r   r   r   )r   r   kernel_line_of_coder   r   r   r   r   s   r   <lambda>z%log_kernel_metadata.<locals>.<lambda>y  s    &&.$,/&'<iHH'(=zJJ*+@&II,-BOTT#$9::"#8(CC"#8(CC"#8(CC";# #
 
 r   N)	r   r   r   r   r   r   r   get_metric_tablerZ   )
r   r   r   r   r   r   r   r   r   r   s
   ``  @@@@@@r   log_kernel_metadatar   e  s     FEEEEE889KLLO*?<NOON"#5GGJ*+=>>N8HH 55JKK&''//	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
    r   c                    t                                           D ]p\  } }| t                      v r[|                                }t          j                            |          rt	          j        |           |                                 qdS )z
    Purge the old log file at the beginning when the benchmark script runs.
    Should do it in the parent process rather than the child processes running
    each individual model.
    N)	rH   itemsrS   r]   ro   rp   rq   unlinkrj   )r.   ry   rh   s      r   purge_old_log_filesr     s     05577 ! !e(****,,..Hw~~h'' $	(###   ! !r   r7   Set[str]c                     t           j        } t                      }|                     d          D ]E}|                                }|s|t
          v sJ d| d            |                    |           F|S )Nr   zMetric table name z is not registered)r   rS   rU   r   striprH   add)
config_strenabledr.   s      r   rS   rS     s    -JeeG  %%  zz|| 	,,,,8888 -,,DNr   c                "    | t                      v S r:   )rS   r-   s    r   is_metric_table_enabledr     s    (****r   c                J    | t           v sJ d|  d            t           |          S )NzMetric table z is not defined)rH   r-   s    r   r   r     s3    ++++-RT-R-R-R+++#D))r   )r7   r   )8
__future__r   re   r2   r   ro   r   r   	functoolsr   typingr   r   r   r	   r
   torch._inductorr   torch._inductor.utilsr   torch._inductor.schedulerr   r   r   r    r   r   r   r"   r#   r   r   r$   r%   r&   r'   r)   r4   r6   rH   rJ   rz   r   r   r   r   r   r   r   r   r   r   r   rS   r   r   r   r   r   <module>r      s   " " " " " " " 



      				 				 ! ! ! ! ! !       8 8 8 8 8 8 8 8 8 8 8 8 8 8 " " " " " " 4 4 4 4 4 4  <;;;;;;  !"         8: 9 9 9 9     ! ! ! ! ! ! ! ! CE ! D D D D ,- )   0 3 3 3 3 3 3 3 3M M M8 8 8 8 8 8 8 82 46  5 5 5 5 8/ 8/ 8/ 8/ 8/ 8/ 8/ 8/v                        -	 	 	         6+ + + 3 3 3    0 0 0  & & &    &' ' 'T! ! !    + + +* * * * *r   