
    קg                   ^
   U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dlmZ d dlmZmZmZ d dl m!Z! d dlm"Z" d dl#m$Z$ d d	l%m%Z%m&Z& d d
l'm(Z( d dl)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8 d dl9m:Z: d dl;Z;d dl<m=Z> d dl;m?Z?m@Z@ d dlAmBZBmCZCmDZD d dlEmFZFmGZGmHZH d dlImJZJ d dlKmLZLmMZM d dlNmOZO ddlPmQZQ  e7d          ZRe6rd dlSmTZT ddlUmVZVmWZW 	 d dlXmYZYmZZZm[Z[m\Z\m]Z]m^Z^m_Z_m`Z`maZa d dlbmcZc d dldmeZemfZfmgZg d dlhmiZimjZjmkZk d dllmmZmmnZn d dlompZpmqZqmrZrmsZsmtZtmuZumvZv d dlwmxZx d dlymzZzm{Z{m|Z| d d l}m~Z~mZmZ e6rd d!lmZ d d"lmZ d d#lmZ d d$lmZmZ ej                            e          Zej                            ej                            e                    Zej                            ed%          Zej        d&k    Z eFj                    rd d'lmZ d d(lmZ d d)lmZmZmZmZ ndd/Zdd0Zdd1Zdd3Ze;j                            ed4          Zd5Zej        d&k    Z e
j        e          Zdd8Zdd9Z G d: d;          Z G d< d=e          Z G d> d?e          Zdd@ZddCZdddHZ	 dddMZ	 dddPZ	 	 	 dddRZddTZ	 	 dddYZej         G dZ d[                      Zdd]ZddcZddeZddgZddkZddmZ G dn doej                  ZdduZ ej        d          ddv            ZddwZej         G dx dy                      Z G dz d{e          Z G d| d}          ZddZddZddZddZ G d d          ZeZded<   ej         G d d                      ZddZ ej        d          dd            Zes G d d                      Z G d d          Zesej        dd                        ZddZddZddZdaded<   ddZes G d d                      ZddZes G d deצ                      Zes G d de٦                      Zes G d de٦                      ZddZddZes G d dæ                      Z G dĄ dŦ          Zߐd dǄZddȄZddɄZddʄZdd˄Z	 dddфZ G d҄ dӦ          Zes G dԄ dզ                      Zes G dք dצ                      Z G d؄ d٦          Z G dڄ de          Z G d܄ de          ZdS (      )annotationsN)bisect_right)copy)c_void_pCDLLcdll)	timedelta)partial)Path)timetime_ns)
ModuleType)AnyCallablecastCounterDict	GeneratorListNoReturnOptionalSequenceSetTupleTYPE_CHECKINGTypeVarUnion)	TypeAlias)SymIntTensor)countersdynamo_timedget_chromium_event_logger)configexcmetrics)cuda_env)rocm_compile_commandrocm_compiler)log_cache_bypass   )_alignT)KeysView)
JsonDataTyRemoteCache)	_set_gpu_runtime_env_transform_cuda_paths
CppBuilder
CppOptionsCppTorchCudaOptionsget_compiler_version_infoget_cpp_compiler&get_name_and_dir_from_output_file_pathnormalize_path_separator)pick_vec_isa)BoxedDeviceIndexCudagraphCachedInfo#log_cudagraph_skip_and_bump_counter)_module_to_triton_kernel_reload_python_module _reload_python_module_in_subproc)	cache_dirdefault_cache_dir)ALIGN_BYTESalign_inputs_from_check_idxs	BoxedBoolclear_on_fresh_inductor_cacheis_linux
is_windows"set_tracing_context_output_strides)trace_structured)extract_tensor_metadata
FakeTensorTensorMetadata)has_hinthint_intShapeEnv)FutureGraphLowering)ChoiceCaller)HalideInputSpec
HalideMetaz_inductor/script.ldwin32)build_paths)_run_build_command)log_global_cache_errorslog_global_cache_statslog_global_cache_valsuse_global_cacheargsr   kwargsreturnNonec                     d S N r^   r_   s     U/var/www/html/ai-engine/env/lib/python3.11/site-packages/torch/_inductor/codecache.pyrZ   rZ              c                     d S rc   rd   re   s     rf   r[   r[      rg   rh   c                     d S rc   rd   re   s     rf   r\   r\      rg   rh   boolc                     dS NFrd   rd   rh   rf   r]   r]      s    urh   output_codeiX  namestrc                   t           j        j        dn'dt           j        j                            dd           }dt          j        j         t          j        j         }| d| }t          j	        
                    t                      |          }t          j	        
                    ||           }t          j        |d           |S )	Ncpucu. py_Texist_ok)torchversioncudareplacesysversion_infomajorminorospathjoinrA   makedirs)ro   cu_strpython_versionbuild_foldercpp_wrapper_dircpp_wrapper_build_directorys         rf   cpp_wrapper_cache_dirr      s     =% 	7%-$,,S"5577 
 K#*0J#2B2HJJN$//v//Lgll9;;==O"$',,"E"EK+d;;;;&&rh   c                 ,    t           j        j        dndS )N
cubin_path
hsaco_path)rz   r{   hiprd   rh   rf   get_cpp_wrapper_cubin_path_namer      s     =,4<<,Frh   c                     e Zd Ze ej        d          dd                        Zee ej        d          dd                                    Ze ej        d          dd                        Z	dd
Z
ddZddZdS )	CacheBaseNr`   Dict[str, Any]c                 x   	 ddl m}   |             }n# t          $ r d }Y nw xY w	 dd id|id}t          j                            t          j                                                  }t          j        j        +|j        |d         d<   t          j        j        |d         d<   n*|j	        |d         d<   t          j        j
        |d         d	<   n# t          t          f$ r i }Y nw xY wt          j        t          j        |d
                              d                                                    |d<   |S )Nr   )
triton_keyro   triton)devicer{   r   r{   r|   r   T)	sort_keysutf-8hash)triton.compiler.compilerr   ModuleNotFoundErrorrz   r|   get_device_propertiescurrent_devicer{   ro   gcnArchNamer   AssertionErrorRuntimeErrorhashlibsha256jsondumpsencode	hexdigest)r   triton_versionsystemdevice_propertiess       rf   
get_systemzCacheBase.get_system   si   	";;;;;; (Z\\NN" 	" 	" 	"!NNN	"	!4.n& &F !&
 @ @
))++! ! }!-+<+Ax (,1M,>y!&))+<+Hx (+0=+<y!%(- 	 	 	FFF	 !Jv...55g>>
 

)++ 	v s    ""B*C C'&C'r   c                     t          t          j                            t	                      dt
                                          d                             S )Ncacher   )r   r   r   r   rA   r   r   rd   rh   rf   get_local_cache_pathzCacheBase.get_local_cache_path   s9     BGLLgy7K7K7M7Mf7UVVWWWrh   Optional[Path]c                     t           j        Tt          t          j                            t           j        t                                          d                             nd S )Nr   )r$   global_cache_dirr   r   r   r   r   r   rd   rh   rf   get_global_cache_pathzCacheBase.get_global_cache_path   sI    
 &2 f5y7K7K7M7Mf7UVVWWW	
rh   ra   c                B    t                                           | _        d S rc   )r   r   r   selfs    rf   __init__zCacheBase.__init__   s    **,,rh   c                    |                                  }|                                si S t          |          5 }t          j        |          }d d d            n# 1 swxY w Y   |d         S Nr   )r   is_fileopenr   load)r   local_cache_pathlocal_cache_fplocal_caches       rf   get_local_cachezCacheBase.get_local_cache   s    4466'')) 	I"## 	4~)N33K	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	47##s   AA"Ar   c                    |                                  }t          t          |          t          j        | j        |dd          d           d S )N)r   r      )indentT	make_dirs)r   write_atomicrp   r   r   r   )r   r   r   s      rf   update_local_cachezCacheBase.update_local_cache   s`    4466 !!J$+DDQOOO	
 	
 	
 	
 	
 	
rh   r`   r   )r`   r   )r`   r   r`   ra   )r   r   r`   ra   )__name__
__module____qualname__staticmethod	functools	lru_cacher   rF   r   r   r   r   r   rd   rh   rf   r   r      s       Y" " "  \"H "YX X X  #" \X Y
 
 
  \
- - - -$ $ $ $
 
 
 
 
 
rh   r   c                      e Zd ZddZdd	Zd
S )
LocalCachekeysrp   r`   Optional[Dict[str, Any]]c                Z    |                                  }|}|D ]}||v r	||         } d S |S rc   )r   )r   r   r   	sub_cachekeys        rf   lookupzLocalCache.lookup	  sH    $$&&	 	 	Ce||!#J		ttrh   valuer   ra   c                   |                                  }|}|dd         D ] }|                    |i            ||         }!|||d         <   |                     |           d S )Nr   )r   
setdefaultr   )r   r   r   r   r   r   s         rf   	set_valuezLocalCache.set_value  sx    $$&&	": 	' 	'C  b)))!#II#	$r(&&&&&rh   N)r   rp   r`   r   )r   rp   r   r   r`   ra   )r   r   r   r   r   rd   rh   rf   r   r     s<        
 
 
 
	' 	' 	' 	' 	' 	'rh   r   c                  J    e Zd Z ej        d          dd            ZddZdS )PersistentCacheNr`   r   c                    |                                  }||                                si S t          |          5 }t          j        |          }d d d            n# 1 swxY w Y   |d         S r   )r   r   r   r   r   )r   global_cache_pathglobal_cache_fpglobal_caches       rf   get_global_cachez PersistentCache.get_global_cache"  s     6688$,=,E,E,G,G$I#$$ 	69_55L	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6G$$s   AA!$A!choicesList[ChoiceCaller]oprp   inputs	benchmark4Optional[Callable[[Any], Dict[ChoiceCaller, float]]]Dict[ChoiceCaller, float]c                J   t          j                    t          t          | j                  }t          t
          | j                  }t          t          | j                  }i ddfd}t          j        st          j	        r`t          j
        r|                                 ni }	 ||	          s1t                      r  ||                                 |	          s| 	  |          t          fd
D                       sJ |	                    i            |	                             i                               i                                            D ].\  }
}||	                           |
                                <   /n# t$          $ r} ||           |d}~ww xY w|                     |	           fdD             } ||           n-t                      r ||                                 |	           S )aG  
        Check to see if we have benchmarked the given choice callers. For each
        choice caller:

            1. Check global_cache[op][inputs][choice][precision], return benchmark if cached.
            2. Check local_cache[op][inputs][choice][precision], return benchmark if cached.
            3. If benchmark is not None:
                a. `max_autotune_gemm=True`: benchmark the choice, update
                    local_cache[op][inputs][choice], and return the benchmark.
                b. `max_autotune_gemm=False`: don't benchmark the choice, return nothing.
        Nr   r   callbackr   r`   rk   c                   d}D ]v}|                                 }||                     i                               i                               i           v r|                            |         	|<   td} |r ||           |S )z2Check if `cache` contains data for all the choicesTF)cached)hash_keyget)
r   r   hitchoicechoice_hashr   r   r   	precisiontimingss
        rf   check_cachez+PersistentCache.lookup.<locals>.check_cacheF  s    C!  $oo//%))B"3"3"7"7"C"C"G"G	SU"V"VVV&+Bi&7	&B;&OGFOO  C %$$$$Jrh   )r   c              3      K   | ]}|v V  	d S rc   rd   .0r   r   s     rf   	<genexpr>z)PersistentCache.lookup.<locals>.<genexpr>d  s(      GGVv0GGGGGGrh   c                F    i | ]}|                                 |         S rd   )r   r   s     rf   
<dictcomp>z*PersistentCache.lookup.<locals>.<dictcomp>p  s6     " " ";AFOO%%wv" " "rh   rc   )r   r   r   r   r`   rk   )rz   get_float32_matmul_precisionr
   r[   r   r\   rZ   r$   max_autotunemax_autotune_gemmautotune_local_cacher   r]   r   allr   itemsr   r   r   )r   r   r   r   r   	log_statslog_vals
log_errorsr   r   r   timingetimings_to_logr   r   s    ```          @@rf   r   zPersistentCache.lookup+  s   $ 688	2DKVYWW	0$+r69UU#T["fi
 

 	 	 	 	 	 	 	 	 	 	 	    	E&":  	E4:4OW$..000UWK  K,,) %&&) $D$9$9$;$;iPPP	) )'i00GGGGGwGGGGGGGG**2r222O..vr::EEiQSTTT*1--// W WPVB/	:6??;L;LMMW#   JqMMMG
 ''444" " " "EL" " " ((( 	EK--//)DDDD s   6B1F( (
G2F??Gr   )
r   r   r   rp   r   rp   r   r   r`   r   )r   r   r   r   r   r   r   rd   rh   rf   r   r   !  sZ        Y% % % %N N N N N Nrh   r   c                     t           j                            t                      d          } t           j                            |           st          j        | d           | S )NlocksTrx   )r   r   r   rA   existsr   )lock_dirs    rf   get_lock_dirr  |  sK    w||IKK11H7>>(## -
Ht,,,,Orh   databytesc                    t          j        t          j        |                                                     d d                             d                                          S )N3   r   )base64	b32encoder   r   digestdecodelower)r  s    rf   sha256_hashr    sL    GN4007799::3B3?FFwOOUUWWWrh   ru   codeUnion[str, bytes]extrac                    t          | t                    r| n|                     d          }|dk    r|dz   |                    d          z   }dt          |          z   S )Nr   ru   s   ||c)
isinstancer  r   r  )r  r  hashing_strs      rf   	code_hashr    s]    $T511K$$t{{77K7KK{{!E)ELL,A,AA[))))rh   basename	extensionspecified_dirTuple[str, str, str]c                `   |rOt           j                            |          r|}nat           j                            t	                      |          }n4t           j                            t	                      | dd                   }t           j                            ||  d|           }| ||fS )Nr+      rt   )r   r   isabsr   rA   )r  r   r!  subdirr   s        rf   get_pathr'    s      :7=='' 	>"FFW\\)++}==FFikk8AaC=997<<8 9 9i 9 9::DVT!!rh   content	hash_typec                    |dk    rt          | |          S |dv rt          t          |                     S t          d|           )Nr  )cubinhsacospvzUnknown hash type )r  reprr   )r(  r  r)  s      rf   get_hashr/    sV     F%(((---g'''
9i99
:
::rh   Tuple[str, str]c                    t          |                                 ||          }t          |||          \  }}}|dk    }	t          j                            |          st          || d           ||fS )Nr  Tr   )r/  stripr'  r   r   r
  r   )
r(  r   r  r)  r!  r   r  r&  r   encode_utf_8s
             rf   writer4    st     	::C%c9mDDHfd"f,L7>>$ 4T7d3333T>rh   textc                .    t          | d          d         S )zT
    Write the `text` to a file and return the path computed based on the hash.
    txtr+   r4  )r5  s    rf   
write_textr9    s     ua  rh   Fpath_r   r3  c                   t          |t          t          f          s
J d            t          |           }|r|j                            dd           |j        dt          j                     dt          j	                     dz  }t          |t                    rdnd}|
                    ||rdnd 	          5 }|                    |           d d d            n# 1 swxY w Y   |                    |           d S )
Nz6Only strings and byte arrays can be saved in the cacheTparentsry   rt   z.tmpwwbr   )encoding)r  rp   r  r   parentmkdirr   getpid	threading	get_identr   r4  rename)r:  r(  r   r3  r   tmp_path
write_modefs           rf   r   r     sN    #u  @ @?@ @ @ ;;D 7$666{JJJy/B/D/DJJJJH"7C00:dJ	z|,MGG	N	N RS	              OODs   ;CC!$C!c                  (    e Zd ZU dZded<   ded<   dS )TensorMetadataAndValueszk
    TensorMetadata plus the elements as a list of raw values.
    Used for hashing inlined constants.
    rM   tensor_metadata	List[Any]valuesNr   r   r   __doc____annotations__rd   rh   rf   rK  rK    s6          
 $###rh   rK  xc                    | S rc   rd   rR  s    rf   _identrU    s    Hrh   
device_map Dict[torch.device, torch.device]tr    rM   c                    t          |          }t          |d          st          j        |dd          }|j        | vr|j        | |j        <   t          j        || |j                           }|S )zs
    Extracts the tensor metadata and removes fields of the TensorMetadata
    that are not needed for caching
    _is_inductor_staticr   N)storage_offsetstorage_bytes)r   )rK   hasattrdataclassesr}   r   )rV  rX  metas      rf   %extract_tensor_metadata_for_cache_keyr`    sz     #1%%D1+,, O"4NNN {*$$"&+
4;tJt{,CDDDDKrh   .Tuple[Callable[[T], T], Tuple[TensorMetadata]]c                6    t          | |          }t          |ffS )zH
    See FxGraphCachePickler. Custom reducer to pickle FakeTensors.
    )r`  rU  )rV  rX  metadatas      rf   _reduce_fake_tensorrd    s      5ZCCHXK  rh   7Tuple[Callable[[T], T], Tuple[TensorMetadataAndValues]]c                "   |j         rt          d          t                      }|                                }t                      |z
  }|dk    rt	          j        d|dd           t          | |          }t          t          ||          ffS )a4  
    See FxGraphCachePickler. Custom reducer to pickle Tensors.
    If we see tensors, we know they're constants stored as attributes on
    the GraphModule. Include the values in the key calculation. Small
    tensors will be inlined, so we can't serve the same cache entry for
    different values anyway. Large constants are treated as parameters,
    so we could conceivably reuse a cache entry. To do that, however,
    PyCodeCache would need more complexity to create a new module from its
    cache, but with the right constants attached as attributes.
    zmkldnn tensors unpickleable.g      ?z1FX graph cache handling of a large constant took z.1zs. Please file an issue.)		is_mkldnnBypassFxGraphCacher   tolistwarningswarnr`  rU  rK  )rV  rX  startrN  elapsedrc  s         rf   _reduce_tensorrn    s     	{ A !!?@@@ FFEXXZZFffunG}}ddddd	
 	
 	
 5ZCCH,Xv>>@AArh   sr   #Tuple[Callable[[T], T], Tuple[str]]c                0    t           t          |           ffS )zD
    See FxGraphCachePickler. Custom reducer to pickle SymInts.
    )rU  rp   ro  s    rf   _reduce_symintrs  (  s     SVVIrh   r   c                     t          d          )z
    See FxGraphCachePickler. Custom reducer to handle any objects that we don't
    support and therefore raise to bypass caching.
    zReduce unsupported.)rh  rr  s    rf   _reduce_unsupportedru  2  s    
 2
3
33rh   c                  <   e Zd ZU dZi Zded<   ej                                        Z e	j
        ee          ee<    e	j
        ee          eej        <   eeej        <   eeej        j        j        j        <   edd            Zedd
            Zedd            ZdS )FxGraphCachePicklera:  
    Custom pickler to customize the pickling of some objects (Tensors), only for the
    purpose of computing a hash for keying into the FxGraphCache. Tensors contain
    objects that don't pickle and/or vary between runs, and we want to capture the
    data that allow us to compute a stable, but safe hash.
    rW  _device_mapobjr   r`   r  c                `   t          j                    5 } | |          }d|_        	 |                    |           nE# t          t
          f$ r1}t                              dd           t          d          |d}~ww xY w|	                                cddd           S # 1 swxY w Y   dS )zA
        Pickle an object using the FxGraphCachePickler.
        TzCan't pickleexc_infoz#Config options may be unpickleable.N)
ioBytesIOfastdump	TypeErrorAttributeErrorlogwarningrh  getvalue)clsry  streampicklerr  s        rf   r   zFxGraphCachePickler.dumpsN  s   
 Z\\ 	%Vc&kkGGLWS!!!!~. W W W NT:::()NOOUVV	W
 ??$$	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	%s1   B#>B#B ,A;;B  B##B'*B'rp   c                J    |                      |          }t          |          S )zt
        Serialize an object using the FxGraphCachePickler and return a hash
        of the pickled object.
        )r   r  )r  ry  serialized_datas      rf   r/  zFxGraphCachePickler.get_hash`  s!     ))C..?+++rh   inpFxGraphHashDetails	List[str]c                    d
 fd}g }t          |                                          D ]1\  }}t          |t                    rkt	          t          |                    D ]M}                     ||                   }|                    d| d| d| d |||                               Nt          |t                    rZ|                                D ]D\  }}	                     |	          }|                    d| d| d| d ||	                      E                     |          }|                    d| d| d	 ||                      3|S )z
        Get a printable string describing in more detail all the attributes
        comprising an object. Useful for debugging when one graph hashes
        to a different value than another.
        ry  r   r`   rp   c                `   t          | t          j                  r"t          t	          j        |                     S t          | t                    rdS t          |           j        v r6t           j        t          |                    |           d                   S t          |           S )Nz<bytes>r+   )	r  rz   r    rp   r`  rx  r  typedispatch_table)ry  r  s    rf   get_strz0FxGraphCachePickler.debug_lines.<locals>.get_strq  s    #u|,,  @RUVVWWWC''   ycc00083-d3ii8==a@AAA3xxrh   [z] z]: z: ry  r   r`   rp   )	varsr  r  listrangelenr/  appenddict)
r  r  r  linesattrry  iihkvs
   `         rf   debug_lineszFxGraphCachePickler.debug_linesi  s   		  		  		  		  		  		  c** 	> 	>ID##t$$ 
>C// L LBSW--ALL!JQ!J!J$!J!J!J!JB8H8H!J!JKKKKL C&& >IIKK E EDAqQALL!CQ!C!C$!C!C!C!Cwwqzz!C!CDDDDE LL%%<<<d<<ggcll<<====rh   N)ry  r   r`   r  r  )r  r  r`   r  )r   r   r   rP  rx  rQ  copyregr  r   r   r
   rd  rL   rn  rz   r    rs  r   ru  fxexperimental_backward_stateBackwardStateclassmethodr   r/  r  rd   rh   rf   rw  rw  :  s          57K6666+0022N!2!23F!T!TN:#49#4^[#Q#QN5< #1N5<  	 -; % % % [%" , , , [,    [  rh   rw  rootsList[str] | Noneprefixhasherhashlib._Hashc                   t          t          j        | |          d           D ]}|j                            |j        d           }|J |j        }|J t          |d          5 }|                    |j        	                    d                     |                    |
                                           d d d            n# 1 swxY w Y   |j        rt          |j        |j         d|           d S )Nc                    | j         S rc   )ro   rT  s    rf   <lambda>z!build_code_hash.<locals>.<lambda>  s     rh   r   rbr   rt   )sortedpkgutiliter_modulesmodule_finder	find_specro   originr   updater   readispkgbuild_code_hashsubmodule_search_locations)r  r  r  libspecmodulerI  s          rf   r  r    sA    g*5&99?O?OPPP 
V 
V **38T::!!!&$ 	$1MM$)**733444MM!&&((###	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 9 	VD;	___fUUU
V 
Vs   'ACC	C	c                     t          j                    sd
d}  | t                    S ddlm} |                    d                                                              d	          S )zS
    Compute a key that contains relevant information about torch source files
    rootrp   r`   r  c                (   d}t           j                            t                    fd|D             }t	          j                    }|                    t          j        	                    d                     t          | gd|           |D ]p}t           j                            |          rOt          |d          5 }|                    |                                           d d d            n# 1 swxY w Y   q|                                S )N)z"codegen/aoti_runtime/interface.cppz'codegen/aoti_runtime/implementation.cppcodegen/cpp_prefix.h	script.ldc                P    g | ]"}t           j                            |          #S rd   )r   r   r   )r   rR  inductor_roots     rf   
<listcomp>z4torch_key.<locals>.get_code_hash.<locals>.<listcomp>  s)    OOOa27<<q99OOOrh   r   ru   r  )r   r   dirname__file__r   r   r  rz   __version__r   r  r
  r   r  r  )r  extra_filesr  r   rI  r  s        @rf   get_code_hashz torch_key.<locals>.get_code_hash  s,   K GOOH55MOOOO;OOOK^%%FMM%+227;;<<<TFB///# 0 07>>$'' 0dD)) 0Qaffhh///0 0 0 0 0 0 0 0 0 0 0 0 0 0 0==??"s    (C44C8	;C8	r   parutilztorch/src_hash.txtascii)r  rp   r`   r  )r$   	is_fbcode_TORCH_PATHlibfb.pyr  get_file_contentsrstripr   )r  r  s     rf   	torch_keyr    s|    
  *	# 	# 	# 	#* }[)))      $$%9::AACCJJ7SSSrh   c                 J    t           j                            t                    S rc   )r   r   r  r  rd   rh   rf   get_inductor_rootr    s    7??8$$$rh   c                      e Zd ZU dZded<   dS )OrderedSetHolderzb
    See FxGraphHashDetails. Holds a sorted list to support stable hashing
    of set kwargs.
    rM  r  NrO  rd   rh   rf   r  r    s*          
 rh   r  c                      e Zd ZdZdS )rh  zI
    Exception to indicate that the FxGraphCache should be bypassed.
    N)r   r   r   rP  rd   rh   rf   rh  rh    s           rh   rh  c                  (    e Zd ZdZdgZddZddZdS )r  zz
    Object to capture all the details for a compiled FX graph relevant to computing
    a safe and stable cache key.
    graph_idgmtorch.fx.GraphModuleexample_inputsList[torch.Tensor]	fx_kwargsr   inputs_to_checkSequence[int]r`   ra   c                   || _         || _        i | _        t          |          D ]b}|| j        vrWt          ||                   t          u r+t          t          ||                             | j        |<   R||         | j        |<   c|| _        t          j
                    t          j                    t          j        j        j        f| _        t          j        j        j        j        t          j        j        j        j        t          j        j        j        j        f| _        t/                      | _        t2                                          | _        t9          j                    | _        d S rc   )r  r  r  r  EXCLUDED_KWARGSr  setr  r  rz   $are_deterministic_algorithms_enabled-is_deterministic_algorithms_warn_only_enabledutilsdeterministicfill_uninitialized_memory!deterministic_algorithms_settingsbackendsr|   matmul
allow_tf32&allow_fp16_reduced_precision_reduction&allow_bf16_reduced_precision_reductioncuda_matmul_settingsr  torch_versionr   r   system_infor$   save_config_portableinductor_config)r   r  r  r  r  r  s         rf   r   zFxGraphHashDetails.__init__  s6    , 	"" 	5 	5A,,,	!%%,, )9	!9M9M(N(NDN1%%(1!DN1%  / 688?AAK%?2
. N&1N&MN&M%
! '[[$//11%:<<rh   r  c                6    t                               |           S )z
        Get a printable string describing in more detail all the attributes
        comprising this object. Useful for debugging when one graph hashes
        to a different value than another.
        )rw  r  r   s    rf   r  zFxGraphHashDetails.debug_lines  s     #..t444rh   N)
r  r  r  r  r  r   r  r  r`   ra   r`   r  )r   r   r   rP  r  r   r  rd   rh   rf   r  r    sP          "lO)= )= )= )=V5 5 5 5 5 5rh   r  r  r  r  r  r  r   r  r  Tuple[str, List[str]]c                    t          | |||          }dt                              |          z   }|                                }d                    |          }t
                              d| d|            ||fS )z=
    Generate a unique hash of the FX graph for caching.
    rI  
z$FX graph cache hash details for key z:
)r  rw  r/  r  r   r  debug)r  r  r  r  detailsr   r  	debug_strs           rf   compiled_fx_graph_hashr    s     !^YPPG #,,W55
5C%%''K		+&&IIIHSHHYHHIIIrh   rM  compiled_graphCompiledFxGraph
cudagraphsrE   c                2   |j         J |j        J |j        }|j        }|j        }|j        }|j        d         }|j        d         }|s|j        }	|	d         }
|j        }|j        }t          j	        j
        s.| D ]+}t          |t          j                  rt          |           ,|8|s6|s4|                    t!          t#          |j                                       ddlm} |j         }|J  |||
t!          t#          |j                            |||t+          |j                                                  |t+          |j                  	  	        |_         dS t3          j        |           |rbt          j	        j
        rQ|J |j        J |j         t          j        j
                            |j        d	          J dfd}||_         d|j        v r1|j        rtA          |j                   dS tA          d|            dS dS )z
    Checks for any reasons not to run cudagraphs and then
    runs it on compiled_graph.
    Mutates the `compiled_graph.current_callable` and `cudagraphs`
    Nis_inferenceis_backwardstatic_input_idxsr+   )cudagraphify)r  device_indexstack_tracesr  r  	constantsplaceholdersmutated_input_idxsF)create_if_none_exists
new_inputsrM  r`   Callable[..., Any]c                B                                       |           S rc   )set_to_running_backward)r  compiled_graph_callablemanagers    rf   compiled_artifactz1cudagraph_post_compile.<locals>.compiled_artifactj  s&    //111..z:::rh   r|   skipping cudagraphs due to )r  rM  r`   r  )!current_callablecudagraph_infocudagraph_fail_reasonsr  boxed_forward_device_indexr  r  r	  r$   r   cudagraph_treesr  rz   r   intr  nextiterdevice_idxs
compile_fxr  tupler
  rN  r  rE   disabler   	_inductorget_managerdevice_typesdisabled_cudagraphs_reasonr=   )r  r   r  cached_infor  r  r  r  r  r  r  r  r	  rX  r  r  r  r  r  s                    @@rf   cudagraph_post_compiler'  #  s    *666(444 /K(?$4O!/!J!+N;L *=9K! D",	%&9:"/"/}, 	#  a.. FFF '2  3 3 '**4^5O0P0P+Q+QRRR,,,,,,):+++*6,/d>#=>>??%#%N4;;==>>%$^%FGG
+
 
+
 
+
''' 	*%%%
  	@6=8 	@-999-3???&4&E#o5AA*0 B  G &&&; ; ; ; ; ; ; /@N+^000 8 3"=     4J2HJJ     10rh   ran_cudagraphsc                n    | s0|j         J t          |j         |          }||j         ur||_         dS dS dS )z
    Realigns input strides from inputs_to_check if
    we didn't end up running cudagraphs. Mutates
    `compiled_graph.current_callable` if cudagraphs
    was run. Otherwise, does nothing.
    N)r  rD   )r(  r   r  new_callables       rf   maybe_realign_inputsr+  }  sb      ;.:::3+_
 
 ~>>>.:N+++; ;
 ?>rh   time_saved_nsr  c                   t           j                                        rt           j                                        sdS t	          | dz            }t          j                    rSt           j                            d          }t          
                    d||           |t	          ||z  dz            z  }t          
                    d|           t          j                            t          |                     |S )z}
    Ephemerally increases the NCCL timeout when compiling for a distributed job
    Returns amount of seconds increased
    r   g    eAz>pytorch/remote_cache:ephemeral_timeout_fudge_factor_percentagezNEphemeral NCCL timeout increase fudge factor %d and original increase value %dd   zIncreasing NCCL timeout by %d)seconds)rz   distributedis_availableis_initializedr  r$   r  _utils_internaljustknobs_getval_intr  infodistdistributed_c10d"_add_ephemeral_timeout_for_all_pgsr	   )r,  increased_timeout_secfudge_factors      rf   .add_ephemeral_timeout_increase_for_distributedr;    s    
 ))++ 53D3S3S3U3U q 455 	Q,AAL
 
 	\!	
 	
 	

 	%:\%IC%O!P!PPHH,.CDDD<</000   ! rh   c                     e Zd ZdZed)d            Zed*d            Zed+d
            Zed,d            Zed-d            Z	ed.d            Z
ed/d            Zed0d            Zed1d&            Zed2d'            Zd(S )3FxGraphCachea7  
    Supports caching and reusing compiled Fx graphs.

    The overall strategy is as follows:
    - This cache stores entries on disk. When saving an entry, we can't
      serialize callables (that could be C++, Triton, etc.), so we serialize
      their own disk cache location. We then recreate the compiled artifact
      after fetching from disk.
    - For indexing the cache, we gather the fields relevant to identifying an
      FxGraph (the graph module, graph inputs, system settings etc.) into an
      FxGraphCacheDetails object, pickle it, and compute a hash for the key.
      See FxGraphCachePickler.
    - Among the metadata we store, we also include a guards expression that's
      appropriate for validating any symbols for Tensor arguments that have
      symbolic bounds. On cache lookup then, we evaluate those guards in the
      current context to validate that a cached entry can be served.
    - A given graph could have multiple compiled versions, corresponding to
      different sets of guards. Therefore, we store cache entries in the form:
          <temp dir>/<fx graph hash>/<serialized metatdata>
    - On lookup, we compute the key from the graph details, iterate over all
      leaf files in the corresponding subdirectory, deserialize the entry, and
      evaluate its guards expression. If the evaluation succeeds, we have a
      cache hit. If it fails, we compile the graph and store a new entry.
    - Finally, on a cache hit, we need to make sure any guards that would
      have been created during compilation are added to the current context.
    r`   rp   c                 Z    t           j                            t                      d          S )zS
        Get the toplevel temporary directory for storing compiled graphs.
        fxgraph)r   r   r   rA   rd   rh   rf   _get_tmp_dirzFxGraphCache._get_tmp_dir  s    
 w||IKK333rh   r   c                    t           j                            t                                          | dd         |           S )zA
        Return the disk location for a given cache key.
        r+   r$  )r   r   r   r=  r@  r  s    rf   _get_tmp_dir_for_keyz!FxGraphCache._get_tmp_dir_for_key  s0    
 w||L5577QqS3GGGrh   r   rM  List[torch.SymInt]c                    d | D             S )z
        Get the backed SymInt objects from the input list. Note that we can never
        have guards that depend on unbacked symint.
        c                d    g | ]-}t          |t          j                  t          |          +|.S rd   )r  rz   r   rN   r   ro  s     rf   r  z7FxGraphCache._filter_backed_symints.<locals>.<listcomp>  s4    QQQaZ5<%@%@QXa[[QQQQrh   rd   )r   s    rf   _filter_backed_symintsz#FxGraphCache._filter_backed_symints  s     RQ6QQQQrh   Optional[ShapeEnv]c                 h    t           j        j                                        } | sdS | j        j        S )zG
        Helper to get the shape env from the tracing context.
        N)rz   _guardsTracingContexttry_get	fake_mode	shape_env)ctxs    rf   _get_shape_envzFxGraphCache._get_shape_env  s2    
 m*2244 	4}&&rh   r  r  localrk   remote_cache!Optional[RemoteCache[JsonDataTy]]Optional[CompiledFxGraph]c                |    t                                           }|J t                               |          }d |D             }d fd}d} |            D ]^}	|	j        s|	} nRt	          |                    |	j        |                    }
t                              d |	j        ||
           |
r|	} n_|dS t          |j	        d          d         |j
        t          j                                      st          d	         d
xx         dz  cc<   t          t          j                                                                    dd           t%                      }t          j                            |          v rB|v rn=dt          j                            |           d}t)          j        |d| d          t-          d           	 t.                              |j	        |j        |j                  j        |_        n,# t:          $ r t                              d           Y dS w xY w|j        rOt	          |                    |j        |                    }|du sJ t                              d |j                   t@          j!        "                    |j#                   t          d	xx         |j$        z  cc<   ddl%m&}  |j'                   tP                              d           tP                              d           tS          dfdfd           |S )z
        Lookup a compiled graph in the cache by key. On a hit, return the
        deserialized CompiledFxGraph object. On a miss, return None.
        Nc                ,    g | ]}t          |          S rd   )rO   rF  s     rf   r  z.FxGraphCache._lookup_graph.<locals>.<listcomp>  s    ...!...rh   r`   &Generator[CompiledFxGraph, None, None]c               3  @  K   rt                                         } t          j                            |           rt          t          j        |                     D ]}	 t          t          j                            | |          d          5 }t          j
        |          V  d d d            n# 1 swxY w Y   `# t          $ r t                              dd           Y w xY wr	                               x}it          |t                     sJ |d         }t          |t"          t$          f          sJ t'          j        |          }t          j        |          V  d S d S # t          $ r  t                              dd           Y d S w xY wd S )Nr  z,fx graph cache unable to load compiled graphTr{  r  )r=  rB  r   r   r
  r  listdirr   r   pickler   	Exceptionr  r  r   r  r  rp   r  r  	b64decodeloads)	r&  r   rI  
cache_datar  r(  r   rQ  rR  s	         rf   iterate_over_candidatesz;FxGraphCache._lookup_graph.<locals>.iterate_over_candidates  s      %::3??7>>&)) 	 &rz&'9'9 : :  !%bgll64&@&@$!G!G 51&,k!nn 4 4 45 5 5 5 5 5 5 5 5 5 5 5 5 5 5(   KK N)- (       
&2&6&6s&;&;;
H)*d;;;;;)&1)$e====="("24"8"8$l73333333 IH !   KKFQU         sI   #.C B4(C 4B8	8C ;B8	<C  &C)(C)/A>E1 1&FFzEfx graph cache key %s evaluating guards [%s] with values %s => hit=%srv      inductorfxgraph_lookup_write_filer+   Tr<  z#include\s*"[^"]+"
#include "r   z"Failed to load cached artifact: %sz*fx graph cache key %s post-load guards: %srR   Output code written to: %szOutput code: 
%sinductor_output_codec                     d iS )Nfilenamerd   )artifact_paths   rf   r  z,FxGraphCache._lookup_graph.<locals>.<lambda>n  s    Z/ rh   c                      S rc   rd   )r  s   rf   r  z,FxGraphCache._lookup_graph.<locals>.<lambda>o  s    t rh   
payload_fn)r`   rW  )*r=  rP  rG  guards_exprrk   evaluate_guards_expressionr  r  r'  	cache_keysource_coder   r   r
  r!   r   r  rB  cpp_prefix_pathr  resubr   PyCodeCacheload_by_key_pathcache_linemapr
  callr  OSErrorerrorguardsr&   CachedMetricsHelperapply_deltasmetrics_deltascounter_deltasgraphrS   save_output_codeoutput_code_logrJ   )r   r  rQ  rR  rN  symintshintsr_  r  	candidater   cpp_pppatterncheckrS   ri  r  s   ` ``           @@rf   _lookup_graphzFxGraphCache._lookup_graph  s    !//11	$$$55nEE..g...	 	 	 	 	 	 	 	: 0022 	 	I( ! 44Y5JERR C IIW%    ! =4 !$77: w~~m,, 	>Z !<===B===//0066td6SSS$&&Fw''4//T>> O273C3CF3K3KNNNG6'+A+A+A+A4HHD====	%0%A%A#	& &
  ""  	 	 	 II:MJJJ44		  	44U5FPP E D====II<c9CS   	#001EFFF 44((((((&&t,,,:MJJJ14888"////#|||	
 	
 	
 	

 s   !6H %I Ir   r  r  rE   c                ,   t          ||            |ri| j        rQd| j        v rt          d| j                    nt          d         dxx         dz  cc<   t          j        |           nt          || |           | j        }t          || |           | S )a  
        Run a set of post processing steps after loading from the cache. These involve:
         - Setting the tracing context output strides
         - Running cudagraphs if enabled
         - Realigning inputs

        This runs whether or not we have a cache hit, and always runs directly after we get a CompiledFxGraph.
        The results of this function are *not* saved in the cache itself.
        r|   r  ra  cudagraph_skipsr+   )
rI   r%  r$  r=   r!   rE   r!  r'  r  r+  )r   r  r  r  s       rf   post_compilezFxGraphCache.post_compiles  s     	+>>JJJ 	 8 ^8887an6_aa    Z():;;;q@;;;!*----&""  
 )8 		
 	
 	
 rh   ra   c                *   t          |          }d|_        t                                          }|J t                              |          }|                    |          }|                    ||          |_        	 t          j	        |          }	nH# t          $ r; t                              dd           t          d         dxx         dz  cc<   Y dS w xY w	 |rt                              |           }
t          j                            |
          st          j        |
d	           t          j                            |
t)          |	                    }t+          ||	d
           |r[t-          |j        pddz            }t1          j        |	                              d          |d}|                    | |           dS dS # t          $ r; t                              dd           t          d         dxx         dz  cc<   Y dS w xY w)z=
        Store a serialized CompiledFxGraph on disk.
        N)r  rz  z1fx graph cache unable to serialize compiled graphTr{  ra  fxgraph_cache_pickle_errorr+   rx   r   r   g    .Ar  )r  time_taken_msz!fx graph unable to write to cachefxgraph_cache_write_error)r   r  r=  rP  rG  get_pruned_guardsproduce_guards_expressionrm  rZ  r   r[  r  r  r!   rB  r   r   r
  r   r   r  r   r  _time_taken_nsr  	b64encoder  put)r   r   r  rQ  rR  disk_compiled_graphrN  r  rz  r(  r&  r   r  r^  s                 rf   _save_graphzFxGraphCache._save_graph  s]    #>22
 04, !//11	$$$55nEE,,W55*3*M*M  +N +
 +
'	l#677GG 	 	 	KKCd     Z !=>>>!C>>>FF		C 	<%::3??w~~f-- 7K6666
 w||FK,@,@AAT7d;;;; 2 #%8%G%L1QT$T U U",W55<<WEE%2* *
   j111112 2  	C 	C 	CKK;dKKKKZ !<===B======	Cs'    B ACCC+G AHHr  r  c                   t           j        st           j        j        rt	          d          t
                                          )t                              d           t	          d          | j	        j
        D ]}t          |j        t          j        j                  rt	          d          |j        dk    rAt          t#          | |j                  t          j        j                  rt	          d          dS )z
        Check some conditions that would preclude caching and raise BypassFxGraphCache
        to bypass in case caching is not possible.
        z@Freezing may introduce constants that aren't static across runs.Nzfx graph cache no shape envzNo shape env.z!Can't cache HigherOrderOperators.getattrzCan't cache torchbind objects.)r$   freezingaot_inductoruse_runtime_constant_foldingrh  r=  rP  r  r  r  nodesr  targetrz   _opsHigherOrderOperatorr   r  _CScriptObject)r  nodes     rf   _check_can_cachezFxGraphCache._check_can_cache  s     ? 	f1N 	$R   &&((0II3444$_555
 HN 	K 	KD$+uz'EFF N()LMMMw)##
DK((%(*?) )# ))IJJJ	K 	Krh   compile_fx_fnr  r  r   r  r  remotec                b   |s|s
J d            d}d}d}	i 	 t                               |           t          ||||          \  }
}|
d<   |d<   d}|rd}	 t          j                    rddlm}  ||          }nddlm}  ||          }n]# t          $ r'}d}t                              d	|           Y d}~n1d}~wt          $ r! d}t                              d
d           Y nw xY wt                               |
|||          }|t                              d|
           t          d         dxx         dz  cc<   d}t!                      }|}	 | ||||          }t!                      |z
  |_        |j        d<   t                               |
||||           nnt                              d|
           t          d         dxx         dz  cc<   d}t!                      }	|j        x}|d<   t'          |          x}dk    r|d<   |
|_        n# t*          $ r}t          d         dxx         dz  cc<   d}t                              d|           t/          |          d<   |rt1          dt/          |                     t!                      }	Y d}~nd}~ww xY w|s | ||||          }|J |d<   t3                      }|                    d| |	           t6          j                            d d! fd"#           t                               |||d$                    |S )%z
        Load a compiled graph from the cache. If a cached entry does not exist,
        compile the graph and save it to the cache.
        z(at least one of them needs to be enabledNr   
componentszfx-graph-v1r   )FbRemoteFxGraphCache)RemoteFxGraphCachez#Unable to create a remote cache: %szUnable to create a remote cacheTr{  zfx graph cache miss for key %sra  fxgraph_cache_missr+   misstime_taken_nszfx graph cache hit for key %sfxgraph_cache_hitr   r,  ephemeral_timeout_increasefxgraph_cache_bypassbypassz%Bypassing FX Graph Cache because '%s'cache_bypass_reasonbypass_fx_graphcache_statefx_graph_cache_)rc  artifactc                     dddS )Nfx_graph_cache_hashr   )ro   r@  rd   rd   rh   rf   r  z#FxGraphCache.load.<locals>.<lambda>e  s    -"! ! rh   c                 ,    t          j                   S rc   )r   r   )
cache_infos   rf   r  z#FxGraphCache.load.<locals>.<lambda>i  s    tz*55 rh   )metadata_fnrl  r  )r=  r  r  r$   r  torch._inductor.fb.remote_cacher  torch._inductor.remote_cacher  r   r  r  r[  r  r  r!   r   r  r  r;  _fx_graph_cache_keyrh  r5  rp   r*   r#   log_instant_eventrz   _loggingrJ   r  )r  r  r  r  r  rQ  r  r   r  cache_event_timer   r  rR  cache_idr  r  r  
start_timer,  ephemeral_increasechromium_logr  s                        @rf   r   zFxGraphCache.load  sr    JJJ JJJJ%'
G	)))"---5NI   C !$Ju'2J|$>BL R(R')) DXXXXXX';';H'E'ESSSSSS'9'9('C'C* J J J#'LKK EqIIIIIIII  R R R#'LKK ADKQQQQQR *77^UL N %		:C@@@$%9:::a?:::$$YY
#- !." " 18		J0F-.<.K
?+((""     		93???$%8999Q>999##*99 %3%BBMO2?J/.\)/ / * 	 
 DV
#?@14N..! 	) 	) 	)Z !7888A=888"KHH<a@@@03AJ,- < !2CFF;;;&yy	)  	*]NOY N )))$/
=!022&&+k++-=
 	' 	
 	
 	
 	''  6555 	( 	
 	
 	
 	!!NIl,C	
 	
 	
 sO   ?H 6B H 
C*B<7H <+C*'H )C**D,H 
J"!A7JJ"c                     	 t          j        t                                                     dS # t          $ r Y dS w xY w)z.
        Clear out the on-disk cache.
        N)shutilrmtreer=  r@  FileNotFoundErrorrd   rh   rf   clearzFxGraphCache.clearq  sK    
	M,335566666  	 	 	DD	s   +/ 
==Nr`   rp   )r   rp   r`   rp   )r   rM  r`   rC  )r`   rH  )
r   rp   r  r  rQ  rk   rR  rS  r`   rT  )r   r  r  r  r  rE   r`   r  )r   rp   r   r  r  r  rQ  rk   rR  rS  r`   ra   )r  r  r`   ra   )r  r  r  r  r  r  r  r   r  r  rQ  rk   r  rk   r   )r   r   r   rP  r   r@  rB  rG  rP  r  r  r  r  r   r  rd   rh   rf   r=  r=    sz        : 4 4 4 \4 H H H \H R R R \R ' ' ' \' E E E \EN + + + \+Z <C <C <C \<C| K K K \K8 q q q \qf    \  rh   r=  r   _StrideExprStrc                  .   e Zd ZU dZded<   ded<    ej        d          Zded<   d	ed
<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   d ed!<   d"ed#<   d$ed%<   d&Zd'ed(<   d&Z	d)ed*<   d&Z
ded+<   d6d1Zd7d5Zd&S )8r  zr
    Class holding a compiled FX graph. This is the object serialized on disk
    to support FxGraph caching.
    Optional[Callable[..., Any]]r  rp   ro  F)r.  rp  Optional[List[Tuple[int, str]]]rv  zSet[str]r$  zSet[int]r  mutated_inputsr  zDict[str, torch.Tensor]r
  z Dict[str, torch._C.ScriptObject]torchbind_constantsz4Optional[List[Optional[Tuple[_StrideExprStr, ...]]]]output_stridesOptional[str]r%  metrics.CachedMetricsDeltasr}  Counter[str]r~  rm  zOptional[CudagraphCachedInfo]r  r   r  r  r  zOptional[BoxedDeviceIndex]r  NzOptional[int]r  zOptional[bool]_boxed_callr  r  rS   *List[Optional[Tuple[_StrideExprStr, ...]]]r`   ra   c                R   || _         |j        | _        |j        rEt          |j                  5 }|                                | _        d d d            n# 1 swxY w Y   |j        | _        t          |j                  | _        t          |j	                  | _	        t          |j
                  | _
        t          |j                  | _        |j        | _        |j        | _        || _        || _        || _        || _        d | _        d | _        i | _        d| _        d | _        d S )Nrd   )r  ro  
cache_pathr   r  rp  rv  r  r$  r  r  r  r
  r  r  r%  r}  r~  rm  r  r  r  r  )r   r  r  r  r%  r}  r~  rI  s           rf   r   zCompiledFxGraph.__init__  sK    !1 	,e&'' ,1#$6688 , , , , , , , , , , , , , , ,"0 233u011!%"677"%e&>"?"?#(#< ,*D',,"!*.'''s   AAAr   rM  r   c                >    | j         J |                      |          S rc   )r  )r   r   s     rf   __call__zCompiledFxGraph.__call__  s%    $000$$V,,,rh   )r  r  r  rS   r  r  r%  r  r}  r  r~  r  r`   ra   )r   rM  r`   r   )r   r   r   rP  rQ  r^  fieldrp  r  r  r  r   r  rd   rh   rf   r  r    sw         
 3222NNN({(e444K44442222    &&&&9999HHHH----////     1111""""::::$(N(((("&K&&&&)-----/ / / /@- - - - - -rh   cmd_c                    t          j        |           }	 t          j        |           d S # t          j        $ r }t          j        ||j                  |d }~ww xY wrc   )shlexsplit
subprocess
check_callCalledProcessErrorr%   CppCompileErroroutput)r  cmdr  s      rf   run_command_and_checkr    sf    
+d

C8c"""""( 8 8 8!#qx00a78s   , AAAr   c                    |                      d          rt          j                            |           S |                      d          rt          j                            |           S | dfS )zDReturns the path where the AOT Inductor compiled kernels are stored..soz.pt2ru   )endswithr   r   r  )r   s    rf   split_aot_inductor_output_pathr    s]     }}U w}}T"""	v		 w}}T"""Rxrh   c                      e Zd ZU i Zded<    eej                  Zedd            Z	edd            Z
edd            ZdS )CudaKernelParamCachezDict[str, Dict[str, str]]r   r   rp   paramsDict[str, str]r+  bin_typer`   ra   c                    t          |||t          t          j        j                  d                   \  }}||t                      <   || j        |<   d S )Nr   )r)  r!  )r4  r  r$   r  output_pathr   r   )r  r   r  r+  r  rw   r   s          rf   r  zCudaKernelParamCache.set  sc    8#/ 	
 
 
4 59.001	#rh   Optional[Dict[str, str]]c                8    | j                             |d           S rc   )r   r   )r  r   s     rf   r   zCudaKernelParamCache.get  s    y}}S$'''rh   KeysView[str]c                4    | j                                         S rc   )r   r   )r  s    rf   get_keyszCudaKernelParamCache.get_keys  s    y~~rh   N)
r   rp   r  r  r+  rp   r  rp   r`   ra   )r   rp   r`   r  )r`   r  )r   r   r   r   rQ  r   r  cache_clearr  r  r   r  rd   rh   rf   r  r    s         ')E)))),u{++K      [  ( ( ( [(       [     rh   r  c                  &    e Zd Zedd
            ZdS )AotCodeCompilerr  rS   rp  rp   serialized_extern_kernel_nodesr  r|   rk   r`   c           
     h  01234567 t           j        dk    rt          d          t                       t	                      }t          ddt          ||j                            }t          |	                                          }d3d}t          j                    rHt          j                    6|sj        rt          j                    7d3d}nt          j                    7nd	6d
7t!          t          j        j                  \  }	}
t'          d||	          \  }5t(                              d5           t-          d5fdfd           t.          j                            t.          j                            5          d         |          2dE2367fd}dE2fd}ddlm} t;                      } |t.          j                            ||dz             t<                    }|5  |ret.          j                            5          d         dz   }tA          |d          5 }|                    |           d d d            n# 1 swxY w Y   |
rt          j        j        n't.          j                            5          d         dz   }t.          j                            5          d         dz   }tC          fd j"        #                                D                       1dFd&4tI          14fd'j"        %                                D                       }t          j                     o|d(k    }t          j        j&        rd}t          j        j'        rtQ          5          \  }}t          ||j        d||)          }t          |5||*          }|	                                }|)                                }t.          j                            5          d         d+z   }|*                    |           ntQ          5          \  }}t          ||j        d||)          }t          |5||*          }|	                                }|)                                }tV          ,                    d,|           3rat.          j                            5          d         dz   }t[          5||                                           t/          j.        |d-           nt_          |           dGd/0d0                    01fd1j"        #                                D                       }|s|}d}nuta          tb          te          j3        dte          j4        td          j5                  j6        d2          7                                          }tq          j9        d3|d4z   |          } ||d5t           j                 |          } t          j        j'        rutQ          |          \  }!}"t          ||j        |6          }#t          |!|| g|"|#*          }$|$	                                }%|$)                                }t.          j                            5          d         d7z   }&|#*                    |&           dd8l:m;}' |rt.          j                            5          d         d9z   }(tA          |(d:          5 })|)                    |           |)                    tq          j9        d;|                     d d d            n# 1 swxY w Y    |'t.          j                            5          d                   }*|*cd d d            S tQ          |          \  }!}"t          ||j        |6          }#t          |!|| g|"|#*          }$|$	                                }%|$)                                }tV          ,                    d<|%           3rv|
rt          j        j        n't.          j                            5          d         dz   }t[          || g||%                                           t/          j.        |d=           nt_          |%           |rdd l<}+|+=                                },tm          d>|,          }-tA          |d?          5 }.|.>                                }/|.                    d@|-|/|-z  z
  z             |.                    |           |.                    tq          j9        d;|                     d d d            n# 1 swxY w Y   tA          5dA          5 }|                    dB           |                    dC| dB           |                    dD|% dB           d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   |S )HNrW   z.AotCodeCompiler not yet supported for inductoroi)vec_isar|   aot_modero   sourcesBuildOptionFTldobjcopycpp)r  r!  re  
graph_dumpc                     dd dS )Ninductor_aot_coder  )ro   r  rh  rd   )
input_paths   rf   r  z)AotCodeCompiler.compile.<locals>.<lambda>3  s    +&  rh   c                      S rc   rd   )rp  s   rf   r  z)AotCodeCompiler.compile.<locals>.<lambda>8  s    { rh   rk  r   constsr  r`   rp   c           	        t          | d          \  }}t          j                            |          d         dz   }	r} dt          j                            |           dt          j                            |           }t          |||                                           t          j        |d           n d| d| }t          |           t          
                    d|           
j        t          
j                                                  z  r%t          |           d	k    rt!          d
          d}nd}t"          t"          dz
  z  dk    rt"          dk    s
J d             d| dt"           d| d| 	}t          
                    d|           t          |           d| }t          
                    d|           t          |           	r4t%          j        ddt          j                            |                    }nt%          j        dd|          }g }|                     d| d|            |                     d| d|            |                     d| d|            t          
                    dd                    |                     |D ]}t          |           |S )Nbinr!  r   .oz -r -b binary -o    zaot constant binary command: %s 5wzPModels with buffer mutation included doesn't support constants greater than 2GB!z .data=.ldataz1 .data=.lrodata,alloc,load,readonly,data,contentsr+   @   zmust be power of 2 and >= 64z --rename-sectionz --set-section-alignment .data=z'aot constant rename section command: %szrm z$aot constant bin removal command: %sz[\W]rw   z --redefine-sym _binary_z#_start=_binary_constants_bin_start z!_size=_binary_constants_bin_size z_end=_binary_constants_bin_end z'aot constant binary redefine symbol: %s)r4  r   r   splitextr  compile_filer  chmodr  r  r  mutated_buffersr  r
  r   r  
ValueErrorrC   rr  rs  r  r   )r  rw   consts_pathconsts_or  rename_databodysymbol_listconsts_specified_dirfbcode_aot_cpu_rer  
ld_commandobjcopy_commands           rf   _compile_consts_linuxz6AotCodeCompiler.compile.<locals>._compile_consts_linux@  sS   "2  NA{ w''44Q7$>H  +#rrbg6F6Fx6P6PrrSUSZScScdoSpSprr[(CIIKK@@@5))))#NNhNNNN%c***II7===$s5?+?+?+A+A'B'BB R v;;..$j   .
 R {Q/ "b(((*H(((" * ** *2=* * * *  (* *  II?EEE!#&&&%%%CII<cBBB!#&&&  9vgsBG,<,<[,I,IJJvgsK88K"ooDooemoo   "mmDmmckmm   "kkDkkaikk   II?+AVAVWWW" + +%c****Orh   c                   t           j        j        r0t          | d          \  }}t                              d|           t          |           dk    }d}|dz  }|dz  }|s| D ]}|d| d	z  }| s|d
z  }n|dz  }|dt          |           dz
   d	z  }|dz  }|dz  }t          |d          \  }}t          j        	                    |          d         dz   }t                       d| d| }t          |           |rt          |d          5 }	|	                    d           |	                    d          }
|
                    d          }|dk    sJ |	                    |           d}|t          |           k     r5|	                    | |d                    }||z  }|t          |           k     5d d d            n# 1 swxY w Y   |S )Nr  r  zbinary constants path: %si   z	.section	__DATA,__data
z%	.globl	__binary_constants_bin_start
z__binary_constants_bin_start:
z	.byte r  z
	.space 1
z	.quad 0x1234567899abcdef
z	.space    z".globl	__binary_constants_bin_end
z__binary_constants_bin_end:
Sr   r   -c -o r  zr+bs   ͫxV4r   )r$   r  debug_dump_consts_binr4  r  r  r  r   r   r  r7   r  r   seekr  find)r  rw   _binary_constants_pathis_large_consts
consts_asmr  r  r  r  rI  hdr	start_idxposrcr  s                 rf   _compile_consts_darwinz7AotCodeCompiler.compile.<locals>._compile_consts_darwin  sy   "8 O,1"6- - -))
 		57MNNN!&kkD0O6JDDJ;;J" 	> 3 3A"2Q"2"2"22JJ  1.0J<<
=#f++/====
@@J99J"2  NA{
 w''44Q7$>H%''HHHH;HHC!#&&& "(E** 
"aFF1III&&,,C #)L M MI$????FF9%%%CF++WWVCDD\22r	 F++
" 
" 
" 
" 
" 
" 
" 
" 
" 
" 
" 
" 
" 
" 
" Os   B'G		GGFileLock.locktimeoutz.jsonr>  r  r  c              3  ^   K   | ]'}|j         v                    |          j        V  (d S rc   )folded_constantsget_original_value_of_constantis_cuda)r   ro   r  s     rf   r   z*AotCodeCompiler.compile.<locals>.<genexpr>  sN        u555 44T::B5555 rh   tensortorch.Tensorall_cudark   r  c                    | j         r$t          j        j                            |           n%|                                                                 }|r|nt          |          S rc   )rg  rz   opsmkldnn_nbytesuntyped_storagenbytesr,   )r;  r=  n_bytess      rf   get_nbytes_of_tensorz5AotCodeCompiler.compile.<locals>.get_nbytes_of_tensor  s^     ';EI$,,V444//1188:: 
 #+?www?rh   c              3  H   K   | ]\  }}|j         v |          V  d S rc   )r8  )r   ro   r;  r=  rE  r  s      rf   r   z*AotCodeCompiler.compile.<locals>.<genexpr>  sN        "T6u555 %$VX665555 rh   r  )r  r|   r  compile_onlyuse_absolute_pathuse_mmap_weightsro   r  
output_dirr  z_compile_flags.jsonzaot compilation command: %sr  rX  c                *   dd}dd l }|                                 dk    rdS | j        rIt          j        j                            |           }t          j        j                            |           }nN|                                 	                                }|                                }|
                                }|                    ||                    |j        |z                      }t          |j                  }|r|n
 ||          S )N	raw_bytesr  r`   c                    |                      t          |           t          z   dz
  t          z  t          z  d          }|S )Nr+       )ljustr  rC   )rM  padded_bytess     rf   _pad_to_alignmentzEAotCodeCompiler.compile.<locals>._to_bytes.<locals>._pad_to_alignment  s=    #,??Y+59kIKW$ $L ('rh   r   rh   )rM  r  r`   r  )ctypesnumelrg  rz   r?  r@  data_ptrrA  rB  rr   rC  r   POINTERc_ubyter  contents)	rX  r=  rR  rS  rU  rC  t_cpu	raw_arrayrM  s	            rf   	_to_bytesz*AotCodeCompiler.compile.<locals>._to_bytes  s   ( ( ( ( 7799>>3; ,$y/88;;H"Y-55a88FF--//3355E$~~//H"\\^^F"KKNN6>F#:;; 	 ")"455	$,Nyy2C2CI2N2NNrh   rh   c              3  h   K   | ],}|j         v                     |                    V  -d S rc   )r8  r9  )r   ro   r[  r=  r  s     rf   r   z*AotCodeCompiler.compile.<locals>.<genexpr>4  sW       * *u555 	%>>tDDhOO5555* *rh   )r+   qqr$  )linuxdarwin)r  r|   r  rH  z_linker_flags.json)package_aotiz_serialized_weights.binr?  qzaot linkage command: %si  i @  za+b    ar  z// Compile cmd
// z// Link cmd
// )r  r  r`   rp   )r;  r<  r=  rk   r`   r  )rX  r<  r=  rk   r`   r  )?r~   platformr   r1   r:   r3   r5   r  r.  get_command_liner$   r  rX   r  objcopy_fallbackr  r  r  r  r4  r  r5  rJ   r   r   r   r  filelockr3  r  LOCK_TIMEOUTr  r   r   r
  r   sumr  force_mmap_weightspackager8   get_target_file_pathsave_flags_to_filer  r  r  r  r  r   r  rz   randintiinfoint64maxitemstructpacktorch._inductor.packager`  resourcegetpagesizetell)8r  r  rp  r  r|   picked_vec_isavec_isa_cmd_gencpp_commandrH  specified_output_pathspecified_so_namer   r"  r1  r3  r  lockoutput_jsonrI  	output_sooutput_oconsts_sizerI  object_output_nameobject_output_dirobject_build_optionsobject_buildercompile_cmdcompile_flagsserialized_weightsaot_constantsmagic_numberr  output_namerK  so_build_options
so_builderlink_cmdlinker_flagsr`  weight_file	f_weightsarchive_pathrv  
page_size_	page_sizef_soso_sizer[  r=  r  r  rE  r	  r   r!  s8    ``                                             @@@@@@@@rf   compilezAotCodeCompiler.compile  sh    <7""OPPP%$+&  
 
 
 ?;;==>>!! 
	($))J 8EN 8"-">"@"@$(!$(!!"-"5"7"7J'O
 +6+>+JKK	
!/	
 
 
Z 	9:FFF   
 +***	
 	
 	
 	
  "w||BGMM*,E,Ea,H#NNB	 B	 B	 B	 B	 B	 B	 B	 B	 B	H-	 -	 -	 -	 -	 -	^ 	&%%%%%>>xXsW}==|TTT b	= b	= . < g..z::1=G+s++ <qGG:;;;< < < < < < < < < < < < < < <
 %=#//W%%j11!4u<  w''
33A6=H    !O0022    H@ @ @ @       &+o&;&;&=&=    K $*#3#5#55U+:U"5 (#' "* 57 ;:FF&%':*"^!%&7%5( ( ($ ",+&0 4	" " " -==??)>>@@ " 0 0 < <Q ?BW W$77FFFF ;:FF&%':*"^!%&7%5( ( ($ ",+&0 4	" " " -==??)>>@@		7EEE$ 7!w//
;;A>EH X{7H7H7J7JKKKHXu----)+666O O O O< "% * * * * * *!O0022* * * " "
 $ Q 2 #q%+ek*B*B*FMMRRTT    !'D+/< P P.0  l *+ +H
 "* R=*P+ +'Z $7*"^&7	$ $ $  ($%x0) 0	  
 &6688&;;==	!w//
;;A>AUU 33LAAA@@@@@@# H((44Q7:SS   k400 HI!(:;;;!C(F(FGGGH H H H H H H H H H H H H H H  ,|BGMM*,E,Ea,HII#cb	= b	= b	= b	= b	= b	= b	= b	=f +Q+ +'Z $7*"^&7	$ $ $  ($%x0) 0	  
 &6688&;;==			3X>>>$ 	4 -E+77W--j99!<uD 
 !(H!5y(..BRBRSSSHY....)(333# C#OOO!)!5!5!7!7J #E: 6 6Ii// C4"&))++

49w7J+J#KLLL

#5666

6;sL#A#ABBBC C C C C C C C C C C C C C C *c** =aGGDMMMGGA+AAABBBGG;x;;;<<<= = = = = = = = = = = = = = =b	= b	= b	= b	= b	= b	= b	= b	= b	= b	= b	= b	= b	= b	= b	=H s   7;f'2If'I	f'I	Q5f'>\f'\	f'"\	#2f'"Ef'%A0d!f'!d%	%f'(d%	)f'<Aff'f	f'f	f''f+.f+N)
r  rS   rp  rp   r  r  r|   rk   r`   rp   )r   r   r   r  r  rd   rh   rf   r  r    s8        ` ` ` [` ` `rh   r  c                     t          t                    j        dz  } |                                 5 }|                                }t          |d          \  }}d d d            n# 1 swxY w Y   t          |          S )Nr  r  )r   r  rA  r   r  r4  r9   )r   rI  r(  rw   rh  s        rf   rq  rq    s     >> #99D	 
&&((
 
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 $H---s   (A%%A),A)c                     t                      } t          j                    r#dt          j                            |            dS d|  dS )Nrd  rc  )rq  r$   r  r   r   r  rh  s    rf   
cpp_prefixr    sR      H ( :BG,,X669999'H''''rh   r	  Union[str, List[str]]r  r  r  c                v    t          d          5  t          | ||          cd d d            S # 1 swxY w Y   d S )Nr  )r"   _compile_file)r	  r  r  s      rf   r  r    s     
n	%	% ; ;Zc::; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ;s   .22c           	     P   t          | t                    r| gn| }d |D             }	 t          j                    rt	                      }t
          j                            |          }t
          j                            |          }t
          j                            t          d          }t          j                    5 }	t          j        |t
          j                            |	|                     t          j        t          t
          j                            |	d                     t          ||          D ]8\  }
}t          j        |
t
          j                            |	|                     9t
          j                            |	d          }t          j        ||           t#          ||	|          }t
          j                            |          rt          j        |           t          j        ||           d d d            n# 1 swxY w Y   d S d S t)          j        |t(          j                   d S # t(          j        $ rV}|j                            d          }d|v pd|v }|rt4          j        dk    rd	}||z  }t9          j        ||          |d }~ww xY w)
Nc                v    g | ]6}t          j                    rt          j                            |          n|7S rd   )r$   r  r   r   r  )r   ips     rf   r  z!_compile_file.<locals>.<listcomp>  sH       ?A 0 2 2:  rh   includer  )stderrr   z'omp.h' file not foundlibompr_  a  

OpenMP support not found. Please try one of the following solutions:
(1) Set the `CXX` environment variable to a compiler other than Apple clang++/g++ that has builtin OpenMP support;
(2) install OpenMP via conda: `conda install llvm-openmp`;
(3) install libomp via brew: `brew install libomp`;
(4) manually setup OpenMP and set the `OMP_PREFIX` environment variable to point to a path with `include/omp.h` under it.)r  rp   r$   r  rq  r   r   r  r   r  tempfileTemporaryDirectoryr  r   _LINKER_SCRIPTzipcopytreerY   r
  remover  check_outputSTDOUTr  r  r  r~   rd  r%   r  )r	  r  r  input_pathsinput_filesheader_pathheader_namer  torch_includes_pathtmp_dirprI  dest_include_pathoutput_file_pathr  r  openmp_probleminstructions                     rf   r  r    s    #-Z"="=M:,,:K EP  K(6 	C)++K'**;77K'**;77K #%',,{I"F"F,.. ;'Kg{)K)KLLLNBGLL+,N,NOOO[99 = =DAqK27<<#;#;<<<<$&GLL)$D$D! 35FGGG#5c7K#P#P 7>>+.. +Ik***,k:::; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; #C
0ABBBBBB( 6 6 6))1V;Qx6?Q 
	"clh662  k!F!#v..A56sD   BI   EHI  HI  HI   I   J%AJ  J%zOptional[CDLL]_libgompr   Union[list[c_void_p], c_void_p]c                   dfdfd|D             }|                      d          sJ | dz               d }t          |                     d                    D ]/\  }}|d	k    rt          j        |          }t          ||          }0t          |          sJ | d
z                || }t          |t          t          f          rP|D ])}t          |t          j                  sJ | dz               *t          j        j                            |          S t          |t          j                  sJ | dz               t          j        j                            |          S )Nargr   r`   c                   t          t          |                     dk    r$t          j        j                            |           S t          | t          t          f          r$ t          |           fd| D                       S | S )Nz<class 'PyCapsule'>c              3  .   K   | ]} |          V  d S rc   rd   )r   rc  convert_args     rf   r   z9custom_op_wrapper.<locals>.convert_arg.<locals>.<genexpr>  s+      99[[^^999999rh   )	rp   r  rz   r  _aoti&alloc_tensor_by_stealing_from_void_ptrr  r  r   )r  r  s    rf   r  z&custom_op_wrapper.<locals>.convert_arg  sy    tCyy>>2228>HHMMMdE]++ 	4999999S999999Jrh   c                &    g | ]} |          S rd   rd   )r   r  r  s     rf   r  z%custom_op_wrapper.<locals>.<listcomp>  s#    7773kk#&&777rh   z
torch.ops.z, can not be called through custom_op_wrapperrt   r   z, can not be loaded through custom_op_wrapperz returns a list of non-tensorsz returns a non-tensor)r  r   r`   r   )
startswith	enumerater  	importlibimport_moduler  callabler  r  r   rz   r    r  r  #unsafe_alloc_void_ptrs_from_tensors!unsafe_alloc_void_ptr_from_tensor)	r   r^   converted_argsfuncr  ro  resultrr  s	           @rf   custom_op_wrapperr    s         8777$777N==&&  
;;   D"((3--((    166*1--DtQD>>NN2 NNNNNT>"F&4-(( H 	V 	VAa..UU5U0UUUUUx~AA&III&%,//MM6M1MMMMx~??GGGrh   c                      e Zd ZU i Zded<    eej                  Zi Zded<   edd
            Z	e
dd            Ze
	 	 	 ddd            Ze
ddd            ZdS )CppCodeCache0Dict[str, Callable[[], Union[CDLL, ModuleType]]]r   r   cpp_compile_command_flagsr   rp   r   r`   Union[CDLL, ModuleType]c                *    t          j        |           S rc   )r   LoadLibrary)r   r   s     rf   _load_library_innerz CppCodeCache._load_library_inner#  s    %%%rh   c           	        	 |                      ||          }||_        |S # t          t          f$ r}dt	          |          v rWt
          j                            d          r8t          j	        d          a
|                      ||          }||_        |cY d }~S dt	          |          v r;t          | dt          j                     dt          j                     d          | d }~ww xY w)Ngompz/usr/lib64/libgomp.so.1z(failed to map segment from shared objectz3.  The most common reason this may occur is if the zl folder is mounted with noexec (e.g., by default Docker mounts tmp file systems as noexec).  Please remount zi with exec enabled, or set another temporary directory with TORCHINDUCTOR_CACHE_DIR environment variable.)r  r   ImportErrorrx  rp   r   r   r
  r   r  r  r  
gettempdir)r  r   r   r  r  s        rf   _load_libraryzCppCodeCache._load_library'  s    	,,T377FFJMW% 	 	 	QBGNN3L$M$M  +,EFF00s;; 
9SVVCC ] ]XM`MbMb ] ]3;3F3H3H] ] ] 
  	s"   ! C,A"C'C,AC''C,FNrd   rp  r|   rk   	submit_fnr   extra_flagsSequence[str]c           	         i  j         |t                      |d}t                       t          ddt	          di |          }t          |                                          }t          |d|          \  } j        vr6ddl	m
}	 t          j                            t                      d	z             }
t          |          \  }}	 |d d
         dz   }d d t	          di |}t          ||||          }t!          j        t$          |
|||          t'          t)          j                    r|n|                                          d fd}|T |	|
t.                    5  t          j                                      s |          d d d            n# 1 swxY w Y   | j        <    j                 S )N)r|   r  r  r  r  r   r  r  r   r2  r4  sorJ  r`   r   c                     >                                               } | J                               J S rc   )r  r  )r  binary_pathr  futurer   r  	worker_fns    rf   load_fnz(CppCodeCache.load_async.<locals>.load_fn}  sR    ;)&Y[[F!>>>++K==C???
rh   r5  rd   r`   r   )r  r:   r1   r3   r5   r.  re  r4  r   rg  r3  r   r   r   r  r8   r   r
   _worker_compile_cppr9   r$   r  rl  rh  r
  )r  rp  r|   r  r  compile_commandcommand_genvec_isa_cmdr	  r3  	lock_pathr  rK  fb_output_pathcpp_build_optioncpp_builderr  r  r  r   r  r  s   `                @@@@@rf   
load_asynczCppCodeCache.load_async>  s   
+
#~~&	
 
 
 	 c/B/U/U_/U/U
 
 
 ;7799::U+FFFZci))))))\^^S7]CCI&LZ&X&X#K (_t3N,0FC2EE_EE$ "%,	  K ")# I 3#%%8 5577 K	 	 	 	 	 	 	 	 	 	 	 $Xi>>> 6 67>>+66 6!*9!5!56 6 6 6 6 6 6 6 6 6 6 6 6 6 6 %CIcNy~s   6+F--F14F1c                >     |                      ||                      S rc   )r  )r  rp  r|   s      rf   r   zCppCodeCache.load  s    0s~~k400222rh   )r   rp   r   rp   r`   r  )FNrd   )
rp  rp   r|   rk   r  r   r  r  r`   r   )F)rp  rp   r|   rk   r`   r   )r   r   r   r   rQ  r   r  r  r  r  r  r  r  r   rd   rh   rf   r  r    s         >@E@@@@,u{++K022222& & & \&    [,  %'P P P P [Pd 3 3 3 3 [3 3 3rh   r  r  r  r3   fb_input_pathr  c           	        ddl m}  || t                    5  t          j                    r|n|                                }t          j                            |          s]t          j                    r6t          ||t          j        |                                                     n|                                 d d d            d S # 1 swxY w Y   d S )Nr   r2  r5  )rg  r3  rh  r$   r  rl  r   r   r
  r  r  r  re  build)r  r  r  r  r3  r  s         rf   r  r    s(    "!!!!!	)\	2	2	2 $ $$.00XNNk6V6V6X6X 	 w~~k** 	$!! $!"K < < > >??    !!###$ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $s   B&CCCc                      e Zd ZU i Zded<    eej                  ZdddZdZ	dZ
dZ ej        d	          Zed"d            Ze	 	 	 	 d#d$d            Zed%d!            ZdS )&CppPythonBindingsCodeCacher  r   FTinclude_pytorchsharedkernelzkernel(%s);Py_RETURN_NONE;ru   a  
        // Python bindings to call %s():
        #define PY_SSIZE_T_CLEAN
        #include <Python.h>
        #include <sstream>
        #include <cstdlib>

        #ifndef _MSC_VER
        #if __cplusplus < 202002L
        // C++20 (earlier) code
        // https://en.cppreference.com/w/cpp/language/attributes/likely
        #define likely(x)       __builtin_expect(!!(x), 1)
        #define unlikely(x)     __builtin_expect(!!(x), 0)
        #endif
        #else
        #define likely(x) (x)
        #define unlikely(x) (x)
        #endif

        // This is defined in guards.cpp so we don't need to import PyTorch headers that are slooow.
        // We manually link it below to workaround issues with fbcode build.
        static void* (*_torchinductor_pyobject_tensor_data_ptr)(PyObject* obj);

        template <typename T> static inline T parse_arg(PyObject* args, size_t n) {
            static_assert(std::is_pointer<T>::value, "arg type must be pointer or long");
            return static_cast<T>(_torchinductor_pyobject_tensor_data_ptr(PyTuple_GET_ITEM(args, n)));
        }
        template <> inline int64_t parse_arg<int64_t>(PyObject* args, size_t n) {
            auto result = PyLong_AsSsize_t(PyTuple_GET_ITEM(args, n));
            if(unlikely(result == -1 && PyErr_Occurred()))
                throw std::runtime_error("expected int arg");
            return result;
        }
        template <> inline uintptr_t parse_arg<uintptr_t>(PyObject* args, size_t n) {
            auto result = PyLong_AsVoidPtr(PyTuple_GET_ITEM(args, n));
            if(unlikely(result == reinterpret_cast<void*>(-1) && PyErr_Occurred()))
                throw std::runtime_error("expected int arg");
            return reinterpret_cast<uintptr_t>(result);
        }

        %s

        static PyObject* %s_py(PyObject* self, PyObject* args) {
            try {
                if(unlikely(!PyTuple_CheckExact(args)))
                    throw std::runtime_error("tuple args required");
                if(unlikely(PyTuple_GET_SIZE(args) != %s))
                    throw std::runtime_error("requires %s args");
                %s
            } catch(std::exception const& e) {
                PyErr_SetString(PyExc_RuntimeError, e.what());
                return nullptr;
            } catch(...) {
                PyErr_SetString(PyExc_RuntimeError, "unhandled error");
                return nullptr;
            }
        }

        static PyMethodDef py_methods[] = {
            {"%s", %s_py, METH_VARARGS, ""},
            {NULL, NULL, 0, NULL}};

        static struct PyModuleDef py_module =
            {PyModuleDef_HEAD_INIT, "%s", NULL, -1, py_methods};

        PyMODINIT_FUNC PyInit_%s(void) {
            const char* str_addr = std::getenv("_TORCHINDUCTOR_PYOBJECT_TENSOR_DATA_PTR");
            if(!str_addr) {
                PyErr_SetString(PyExc_RuntimeError, "_TORCHINDUCTOR_PYOBJECT_TENSOR_DATA_PTR must be set");
                return nullptr;
            }
            std::istringstream iss(str_addr);
            uintptr_t addr = 0;
            iss >> addr;
            _torchinductor_pyobject_tensor_data_ptr =
                reinterpret_cast<decltype(_torchinductor_pyobject_tensor_data_ptr)>(addr);
            return PyModule_Create(&py_module);
        }
        r   rp   r   r`   r   c                   t          t          j        j        j        j                  t          j        d<   | d| j         }	 t          j
        |         S # t          $ r Y nw xY wt          j                            ||          }|J t          j                            |          }|t          j
        |<   |j                            |           |S )N'_TORCHINDUCTOR_PYOBJECT_TENSOR_DATA_PTRrt   )rp   rz   r  _dynamorz  '_torchinductor_pyobject_tensor_data_ptrr   environentry_functionr~   modulesKeyErrorr  utilspec_from_file_locationmodule_from_specloaderexec_module)r  r   r   module_namer  r  s         rf   r  z.CppPythonBindingsCodeCache._load_library_inner	  s    @CH#KA
 A

<= 33s133	;{++ 	 	 	D	~55k4HH0066#)K '''s   A 
A"!A"r   Nrd   argtypesr  rp  r|   rk   num_outputsr  r  r   r  r  c                z   
 d                     d t          |          D                       } j         j         j        r
 j        |z  nd j        t          |          t          |           j        |z   j         j         j         j        f
z  }                     ||z   |||          
dd	 
fd}	|	S )
a5  
        Wrap a C++ function in fast Python bindings.

        Args:
            argtypes: The types of args to ENTRY_FUNCTION(), e.g. ["float*", "long"]
            source_code: C++ source code containing a ENTRY_FUNCTION() function

        Returns:
            A python version of ENTRY_FUNCTION()
        , c              3  V   K   | ]$\  }}d |                     dd           d| dV  %dS )z
parse_arg<zconst ru   z>(args, )N)r}   )r   nargtypes      rf   r   zBCppPythonBindingsCodeCache.load_pybinding_async.<locals>.<genexpr>1	  s\       
 
7 E266DDDDD
 
 
 
 
 
rh   ru   )r  r  Nr`   r   c                 t    !             t          t                    sJ t           j                  S rc   )r  r   r  r  )r  
get_resultr  s   rf   r  z?CppPythonBindingsCodeCache.load_pybinding_async.<locals>.futureF	  s<    ~#!&*5555563#5666rh   r  )r   r  suffix_templater  extra_parse_argr  call_entry_functionr  )r  r
  rp  r|   r  r  r  	parseargssuffixr  r  r  s   `         @@rf   load_pybinding_asyncz/CppPythonBindingsCodeCache.load_pybinding_async	  s	   ( II 
 
'11
 
 
 
 
	 $141DLC+--"MMMM#i/(
 
 ^^& $) $ 
 

 	7 	7 	7 	7 	7 	7 	7 	7 rh   r^   r_   c                ,      | j         |i |            S rc   )r  r  r^   r_   s      rf   load_pybindingz)CppPythonBindingsCodeCache.load_pybindingO	  s#    8's'888:::rh   )r   rp   r   rp   r`   r   )Fr   Nrd   )r
  r  rp  rp   r|   rk   r  r  r  r   r  r  r`   r   )r^   r   r_   r   r`   r   )r   r   r   r   rQ  r   r  r  r  r  r  r  textwrapdedentr  r  r  r  r  rd   rh   rf   r  r    s         >@E@@@@,u{++K !! !
 N6O%hoN	P POd    [  
 %'/ / / / [/b ; ; ; [; ; ;rh   r  c                  p    e Zd ZU i Zded<    eej                  ZdddZdZ	dZ
 ej        d          ZdS )	CppWrapperCodeCacher  r   Tr  inductor_entry_cppzreturn inductor_entry_cpp(%s);a  
        #include <torch/csrc/inductor/aoti_torch/c/shim.h>

        static inline std::vector<AtenTensorHandle> unpack_tensor_handle_list(PyObject* pyvec) {
            std::vector<AtenTensorHandle> result;
            size_t result_len = PyList_GET_SIZE(pyvec);
            result.reserve(result_len);
            for (size_t i = 0; i < result_len; i++) {
                // AtenTensorHandle is essentially a pointer
                void* elem = PyCapsule_GetPointer(PyList_GET_ITEM(pyvec, i), NULL);
                result.push_back(reinterpret_cast<AtenTensorHandle>(elem));
            }
            return result;
        }

        static inline PyObject* pack_tensor_handle_list(const std::vector<AtenTensorHandle>& cppvec) {
            size_t result_len = cppvec.size();
            PyObject* result = PyList_New(static_cast<Py_ssize_t>(result_len));
            for (size_t i = 0; i < result_len; i++) {
                PyObject *elem =
                    cppvec[i] == nullptr
                        ? Py_None
                        // Store AtenTensorHandle as PyCapsulate
                        : PyCapsule_New(reinterpret_cast<void*>(cppvec[i]), NULL, NULL);
                PyList_SET_ITEM(result, i, elem);
            }
            return result;
        }

        template <> inline std::vector<AtenTensorHandle> parse_arg<std::vector<AtenTensorHandle>>(PyObject* args, size_t n) {
            return unpack_tensor_handle_list(PyTuple_GET_ITEM(args, n));
        }

        PyObject* inductor_entry_cpp(std::vector<AtenTensorHandle>&& input_handles) {
            // For outputs, we only allocate a vector to hold returned tensor handles,
            // not allocating the actual output tensor storage here
            std::vector<AtenTensorHandle> output_handles(%s);
            try {
                inductor_entry_impl(input_handles.data(), output_handles.data());
                return pack_tensor_handle_list(output_handles);
            } catch(std::exception const& e) {
                PyErr_SetString(PyExc_RuntimeError, e.what());
                return {};
            } catch(...) {
                PyErr_SetString(PyExc_RuntimeError, "unhandled error");
                return {};
            }
        }
        N)r   r   r   r   rQ  r   r  r  r  r  r  r  r  r  rd   rh   rf   r   r   T	  sj         >@E@@@@,u{++K! ! *N:%ho0	2 2OOOrh   r   c                  8   e Zd ZU i Zded<    eej                  ZdZded<    e	j
        d          Ze e	j
        d          z   Ze e	j
        d          z   Z e	j
        d	          Zed'd            Zed(d            Ze ej        d          d)d                        Zed*d            Ze ej        d          d+d                        Ze ej        d          d+d                        Ze	 d,d-d"            Zed.d%            Zed)d&            ZdS )/HalideCodeCachez0Dict[str, Callable[[], Union[ModuleType, CDLL]]]r   Nr  _standalone_runtime_patha  
        #include "{halideruntime_h}"
        #include "{headerfile}"
        #include <stdexcept>
        #include <cmath>

        namespace c10 {{
            inline long div_floor_integer(long a, long b) {{
                if ((a<0) != (b<0)) {{
                    const auto quot = a / b;
                    const auto rem = a % b;
                    return rem ? quot - 1 : quot;
                }}
                return a / b;
            }}
        }}
        z
        void kernel({argdefs}) {{
            {buffers}
            int err = halide_kernel({buffer_names});
            if(err != 0) throw std::runtime_error("halide_kernel failed");
        }}
        a{  
        #include <cuda.h>
        static const halide_device_interface_t* cuda_interface = halide_cuda_device_interface();

        void kernel({argdefs}, uintptr_t stream) {{
            {buffers}
            int err = halide_kernel(reinterpret_cast<void*>(stream), {buffer_names});
            if(err != 0) throw std::runtime_error("halide_kernel failed");
        }}
        a  
        #include "{}"
        #include <cuda.h>

        static int acquire_context(void* user_context,
                                   void** cuda_context_out,
                                   bool create) {{
            return cuCtxGetCurrent(reinterpret_cast<CUcontext*>(cuda_context_out));
        }}

        static int release_context(void* user_context) {{
            return 0;
        }}

        static int get_stream(void* user_context,
                              void* cuda_context,
                              void** stream_out) {{
            *stream_out = user_context;
            return 0;
        }}

        static int register_halide_hooks() {{
            halide_set_cuda_acquire_context(&acquire_context);
            halide_set_cuda_release_context(&release_context);
            halide_set_cuda_get_stream(&get_stream);
            return 0;
        }}

        int inductor_register_halide_hooks_result = register_halide_hooks();
        ro   rp   r  rU   r|   rk   r`   r  c                Z   |j         J |j        *t          |j                   t          |j                  k    sJ |j        J |j        p|j         d|j         }|rd| d}d}d}d}nd}d}d| d}d	}g }	t          |j         |j                  D ]!\  }
}|	                    d
|
 d| d           "d| dd| dd                    |	           d| d| d| d| d| d| d| d| d| d|	                                 d| dt          |	           d| d| d| dg
S )Nz + zreinterpret_cast<uint64_t>(r  cuda_interfacenullptrhalide_buffer_flag_device_dirty0zreinterpret_cast<uint8_t*>(halide_buffer_flag_host_dirtyzhalide_dimension_t(0, r  zhalide_buffer_t ;zhalide_dimension_t z_dims[] = {z};z
.device = z.device_interface = z.host = z	.flags = z.type = z.dimensions = z.dim = z_dims;z.padding = nullptr;)
shapestrider  offsetalias_ofro   r  r  r   halide_type)r  ro   r  r|   rU  r   device_interfacehostflagsdimssizer-  s               rf   _codegen_bufferzHalideCodeCache._codegen_buffer	  s   y$$$z%#ci..C
OO*K*K*K*Kz%%%l.ch??3:?? 		4>8>>>F/D5EEF(<<<<D3E	3:66 	D 	DLD&KKBBBBBBCCCC 't&&&H$HHDIIdOOHHH((v(((<<)9<<<$$T$$$&&e&&&11S__..111//3t99///((D((((((
 	
rh   r_  rV   
headerfileobjectc           	        |                                 }|d|j        v u sJ d|j        v sJ g }g }t          |j                  D ]\  }}|                                rF|                    d|            |                    |                     d| ||                     _d|j        vsJ |                    |j	                   d
                    d |D                                                       }|r| j        n| j        }|                    |                     |rdnd	          |d

                    d |j        D                       |d

                    |                    }	|	S )Nuser_context
no_runtimez&hl_buf_hl_buf_*r  c                    g | ]}d | S )    rd   )r   lines     rf   r  z1HalideCodeCache._codegen_glue.<locals>.<listcomp>
  s    ???t]D]]???rh   HalideRuntimeCuda.hzHalideRuntime.hr  c              3  `   K   | ])}|j         	|                                 d|j         V  *d S )Nr  )r/  bindings_typero   )r   rc  s     rf   r   z0HalideCodeCache._codegen_glue.<locals>.<genexpr>
  sP        :% ??$$//qv//%%%% rh   )halideruntime_hr7  argdefsbuffersbuffer_names)r:  r  r  r
  	is_bufferr  extendr6  ctypero   r   lstripglue_template_cudaglue_template_cppformatfind_header)
r  r_  r7  r:  rF  rG  r  r  glue_template	glue_codes
             rf   _codegen_gluezHalideCodeCache._codegen_glue
  s   ,,..>T[89999t{****.. 	. 	.FAs}} .##NqNN333s22=Q==#wOOPPPP#)++++##CH----))??w???@@GGII29T..s?T!((OO)0G%%6G  "II      
 <00 ) 
 
	 rh   c                    t          ddt                                }|                                }t          d                    | j        | j        | j        |g                              d                    S )NOIr   r  r   )	r3   r4   re  r  r   rM  rL  standalone_runtime_cuda_initr   )r  r  command_lines      rf   config_hashzHalideCodeCache.config_hash#
  s     !"
 
 

 #3355II)*4 	  fWoo	
 	
 		
rh   r  errmsgc                b   t           j        j                            d          }||j        st          d          	 |j        d         }t          j        |          D ]}|                    d          r	 t          j
        dt          j                            ||          g          }n# t          j        $ r Y `w xY wt          j        d|                    d                    }|rt          j                            t          j                            |                    d                    |           }t          j                            |          r!t          j                            |          c S  n"# t(          $ r}t          |          |d }~ww xY wt          |          )	Nhalidez$halide python bindings not installedr   r  lddz(/.*)/libHalide.sor   r+   )r  	machinery
PathFinderr  r  r   r   rY  r  r  r  r   r   SubprocessErrorrr  searchr  abspathgroupr
  r[  )	r  rY  r  r`  fileoutmr   r  s	            rf   _search_for_filez HalideCodeCache._search_for_file7
  s   "-77AA<t><EFFF	.4Q7F
6** 9 9=='' 9!(5"BGLL$>$>?  &5 ! ! ! !	"7G9L9LMMA 9!w||BGOOAGGAJJ,G,GPP7>>$// 9#%7??4#8#88889  	. 	. 	.v&&A-	.6"""sB   :F  94B.-F  .C =F  ?C  B<F  =F   
F
FFc                2   d|                                   d}dt          j        v rQt          j                            t          j        d         |          }t          j                            |          r|S d| d}t                              ||          S )Nlibautoschedule_r  
HALIDE_LIBCan't find z3, set env HALIDE_LIB to the directory containing it)r  r   r   r   r   r
  r#  rf  )ro   sofiler   rY  s       rf   find_libautoschedulez$HalideCodeCache.find_libautoscheduleO
  s     6DJJLL5552:%%7<<
< 8&AADw~~d## U&UUU 	 //???rh   c                   dt           j        v rQt           j                            t           j        d         |           }t           j                            |          r|S dt           j        v rqt           j                            t           j                            t           j        d         d|                      }t           j                            |          r|S d|  d}t                              d|  |          S )NHALIDE_INCLUDEri  z../include/rj  z7, set env HALIDE_INCLUDE to the directory containing it)r   r   r   r   r
  ra  r#  rf  )ro   r   rY  s      rf   rO  zHalideCodeCache.find_header\
  s     rz))7<<
+; <dCCDw~~d## 2:%%7??RZ57KT7K7KLL D w~~d## W$WWW 	 //0Dd0D0DfMMMrh   rp  r  r   Callable[[], Any]c                R   t          t          t          |t          |                                 |f                    d          d                   }t          j        |d           d t          |dz            }t          |dz            }t          |dz            }t          |d	z            }t          |d
z            }	t
          j        	                    |           }
g }|
rt          ||           t          j        |ddd| ddddg
}|j        r/|                    d|                     |j                  g           |                    |                                           |                    t%          j        t(          j        |                     d |j        D             }|                                r|                    d           |                     ||                     ||          ||                                 f|
r|j        nd |                                          |
re|                    t%          j        t6          |                     t%          j        t8          |	|          }|r ||          j        n
 |             dfd}|S )Nr  r[  r`  Trx   zgenerate_kernel.pyzhalide_kernel.azhalide_kernel.hdoner~  -gr  -oz-fhalide_kernelz-ezstatic_library,h,schedulez-pc                D    g | ]}|j         	|                                S rc   )r/  rC  )r   r  s     rf   r  z9HalideCodeCache.generate_halide_async.<locals>.<listcomp>
  s2     
 
 
$'CL<PC<P<P<Prh   	uintptr_t)r  r  r|   r`   ro  c                 0    r
                            S rc   rd   )bindings_futurewait_for_compiles   rf   r   z3HalideCodeCache.generate_halide_async.<locals>.load
  s(     #  """"?$$$rh   )r`   ro  )r   r'  r  r.  rX  r   r   rp   r   r
  r   r~   
executable	schedulerrI  rl  r^   r  r   r
   r  r  r
  r:  r  rR  build_standalone_runtimetouch_worker_task_halider  )r  r_  rp  r  dirpathgenfilelibfiler7  donefilelockfileneed_compilejobsr  binding_typestaskr   rx  ry  s                   @@rf   generate_halide_asyncz%HalideCodeCache.generate_halide_asyncn
  s     1 14899     
 
 	Gd++++g 4455g 1122#4455
w'((w'((7>>(333 	G+...+C ~ M

D#":":4>"J"JKLLLJJtyy{{###KK	)**?EEFFF
 
+/=
 
 
 <<>> 	.  ---22dJ// #">">"@"@A%1;dkkt 3 
 
  	KK	)%::;;;$%8(DIID #,9T??#9  	% 	% 	% 	% 	% 	% 	%
 rh   r^   r_   c                ,      | j         |i |            S rc   )r  r  s      rf   generate_halidezHalideCodeCache.generate_halide
  s#    9(s($9&99;;;rh   c           	        | j         r+t          j                            | j                   r| j         S t          j                                        }d}|rdnd}| j         r5t          j                            | j                   rJ t                      }nt                      }t          |          d| d| 
                                 z  }t          j        |d           t          |dz            }t          |d	z            }t          |d
z            }t          |dz            }	t          ||z            }
t          j                            |          sjdd l}dd l}|                    |t                     5  t          j                            |          st#          |d          5 }|r@|                    | j                            |                     d                               d d d            n# 1 swxY w Y   |                    |	|                    |                     t1          |
          \  }}t3          |||	g|t5          |                    }t7          j        t;          j        |                                                     tA          |           d d d            n# 1 swxY w Y   t          j                            |
          sJ |
| _         |
S )NzlibStandaloneHalideRuntime.soz	host-cudar2  zhalide-runtime--Trx   rq  r~  z	hooks.cppzstandalone_halide_runtime.ar   r>  rA  r|   rJ  )!r$  r   r   r
  rz   r|   r1  rB   rA   r   rX  r   rp   rg  r[  r3  rh  r   r4  rV  rN  rO  compile_standalone_runtimeTargetr8   r3   r5   r  r  r  r  re  r}  )r  r:  libnamer  baser  r  r  hookfileafilerk  rg  hlrI  ro   rK  halide_cmd_gens                    rf   r|  z(HalideCodeCache.build_standalone_runtime
  s[   ' 	0BGNN(-
 -
 	0 //*))++1 '3V' 	w~~c&BCCCCC
 %&&DD;;Dt**MMM#//:K:KMMM
Gd++++w'((w'((w,--G;;<<Ww&''w~~h'' 	$OOO""8\:: $ $w~~h// $h,, " GG # @ G G$'OO4I$J$J!" !"                 11%69J9JKKK'Mf'U'U$D*%/!!)5 1#-$7!(% % %	& & &N )N$C$C$E$EFF   (OOO1$ $ $ $ $ $ $ $ $ $ $ $ $ $ $2 w~~f%%%%%'-$s8    1J<1AH 4J< H	J<H	B(J<<K K )ro   rp   r  rU   r|   rk   r`   r  )r_  rV   r7  r8  r`   rp   r  )r  rp   rY  rp   r`   rp   ro   rp   r`   rp   rc   )r_  rV   rp  rp   r  r   r`   ro  )r^   r   r_   r   r`   ro  )r   r   r   r   rQ  r   r  r  r$  r  r  r  rM  rL  rV  r  r6  rR  r   r   rX  rf  rl  rO  r  r  r|  rd   rh   rf   r#  r#  	  sA        >@E@@@@,u{++K.22222X_	 F& 	" "   /(/		# #  $38?	$ $ B 
 
 
 [
B    [> Y
 
 
  [
$ # # # \#. Y	@ 	@ 	@  \	@ YN N N  \N  BFB B B B [BH < < < [< 7 7 7 [7 7 7rh   r#  r  r  List[partial[Any]]c                   ddl m} 	  || t                    5  |D ]} |             	 d d d            d S # 1 swxY w Y   d S # t          j        $ r}t
          j                            d          dk    r[t          |dd          ^}}}t
          j	        
                    |                              d          rt          |                                          }d}	|                    |	          d	k    sJ  G d
 d          }
 |
            ||                    d          d	z   <   t!          j        t!          j        ddg|d          d          }|                    |	|          }t          dd          5 }|                    |                                           d d d            n# 1 swxY w Y   t-          d|           | d }~ww xY w)Nr   r2  HALIDE_REPRO1r  )ru   ru   ru   pythonz    hl.main()r+   c                      e Zd ZddZdS ) _worker_task_halide.<locals>.Outr`   rp   c                    dS )Nrd  rd   r   s    rf   __repr__z)_worker_task_halide.<locals>.Out.__repr__  s    $urh   Nr  )r   r   r   r  rd   rh   rf   Outr    s(        % % % % % %rh   r  rs  z                        import sys, tempfile
                        with tempfile.TemporaryDirectory() as out:
                            sys.argv = zrepro.pyz?
                            hl.main()
                        r?  r>  zwrote repro.py: )rg  r3  rh  r  r_  r   r   r   r  r   r  r  r   r  countindexr  r   r  r}   r4  rK  r   )r  r  r3  jobr  r  scriptr  r  mainr  replfds                rf   r~  r~  
  s   !!!!!! Xh-- 	 	  	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 %   :>>.))S00#*1e\#B#B FFSw''228<< BF||((**&zz$''1,,,,% % % % % % % % ,/355CIIdOOa'(O *4(:c(:    
 
 ||D$//*c** ,bHHT[[]]+++, , , , , , , , , , , , , , ,"#9a#9#9::A9sW   A 7A ;A ;A GD-G(F5)G5F9	9G<F9	=GGrh  c                J    t          | d                                           d S )Nrc  )r   closer  s    rf   r}  r}    s$    3rh   c                      e Zd ZU i Zded<   i Zded<    eej                  Ze	ddd            Z
e		 	 	 ddd            Ze		 	 ddd            Ze	 ej        d          d d                        ZdS )!rt  zDict[str, ModuleType]r   z Dict[str, List[Tuple[Any, ...]]]linemapsru   rp  rp   r  r`   r0  c                &    t          |d|          S Nrv   r  r8  )r  rp  r  s      rf   r4  zPyCodeCache.write"  s    [$e4444rh   Nlinemapr  attrsr   r   c                \    t          |d|          \  }}|                     ||||          S r  )r4  ru  )r  rp  r  r  r  r   r   s          rf   r   zPyCodeCache.load&  s6     +t5999	T##Cw>>>rh   r   r   c                j   |g }|| j         vrt          ||          }| j                             ||           t          t	          |           | j        |<   |+|                                D ]\  }}t          |||           |s"|s t          j	        t          ||          |_        | j         |         S rc   )r   r?   r   r  r  r  r  setattrr   r
   r@   _reload_in_subproc)r  r   r   r  r  modr  r  s           rf   ru  zPyCodeCache.load_by_key_path1  s     ?Gci'T22C I  c***!%c7m!4!4CL !KKMM ' 'DAqCA&&&& u )2):4c4* *& y~rh   linenor  Optional[List[Dict[str, Any]]]c                    || j         vrd S | j         |         \  }}t          ||          }|dk    rd S ||dz
           }|sd S dd} ||          S )	Nr   r+   stack_tracerp   r`   List[Dict[str, Any]]c                b    d}t          j        ||           }d t          |          D             S )Nz"File "(.+)", line (\d+), in (.+)\nc                <    g | ]\  }}}|t          |          |d S ))rh  r@  ro   )r  )r   rI  lr  s       rf   r  zPPyCodeCache.stack_frames_for_code.<locals>.parse_stack_trace.<locals>.<listcomp>c  s>       Aq! A::  rh   )rr  findallreversed)r  regexmatchess      rf   parse_stack_tracez<PyCodeCache.stack_frames_for_code.<locals>.parse_stack_trace^  sC     :Ej44G '00   rh   )r  rp   r`   r  )r  r   )r  r   r  r  r  r  entryr  s           rf   stack_frames_for_codez!PyCodeCache.stack_frames_for_codeN  s    
 s|##4|D)u''664a!e 	4	 	 	 	 ! '''rh   ru   )rp  rp   r  rp   r`   r0  )ru   NN)
rp  rp   r  rp   r  r  r  r   r`   r   )NN)
r   rp   r   rp   r  r  r  r   r`   r   )r   rp   r  r  r`   r  )r   r   r   r   rQ  r  r   r  r  r  r4  r   ru  r   r   r  rd   rh   rf   rt  rt    s        #%E%%%%13H3333,u{++K5 5 5 5 [5  37*.? ? ? ? [? 
 48*.    [8 Y( ( (  [( ( (rh   rt  c                  &    e Zd Zedd            ZdS )	TritonCodeCachekernel_namerp   rp  r`   r   c                R    t          t                              |          |          S rc   )r>   rt  r   )r  r  rp  s      rf   r   zTritonCodeCache.loadl  s     '(8(8(E(E{SSSrh   N)r  rp   rp  rp   r`   r   )r   r   r   r  r   rd   rh   rf   r  r  k  s8        T T T [T T Trh   r  r  c                 Z   t          j        t          j        j                  rt          j        j        S t          j                    r2t          j                            t          j                    dd          S t          j        t          j
        d                    rt          j
        dd          S t          j        t          j
        d                    rPt          j                            t          j                            t          j
        dd          d                    S dS )Nr  nvccCUDACXXru   	CUDA_HOMEzbin/nvcc)r'   
nvcc_existr$   r|   cuda_cxxr  r   r   r   rX   getenvrealpathrd   rh   rf   _cuda_compilerr  q  s    6;/00 ${## ?w||K,..v>>>29Y//00 (yB'''29[1122 VwRY{B-G-G T TUUU6rh   c            	     f   t          j                    rddlm}  |                     d          }nt           j        j        }t          j        	                    t          j        
                    |d                    t          j        	                    t          j        
                    |d                    t          j        	                    t          j        
                    |d                    t          j        	                    t          j        
                    |d                    gS )Nr   r  zcutlass-3-headersr  ztools/library/includeztools/library/srcztools/util/include)r$   r  r  r  get_dir_pathr|   cutlass_dirr   r   r  r   )r  cutlass_paths     rf   _cutlass_include_pathsr  }  s     /$$$$$$++,?@@{. 	lI>>??
l4KLLMM
l4GHHII
l4HIIJJ rh   c                 ~   t                       ddlm}  |                     d          t	          j        d          gz   }g }t                      r]t          |           |D ] }|                    d| dd| g           !|	                    d	           |	                    d
           nt          d          |S )Nr   )cpp_extensionTr  LIBDIRz-Lz-Xlinkerz-rpath=z-lcudaz-lcudartzMUnsupported env, failed to find cuda libs! Currently only Linux is supported.)r1   torch.utilsr  library_paths	sysconfigget_config_varrG   r2   rI  r  NotImplementedError)r  lpathsextra_ldflagsr   s       rf   _cuda_lib_optionsr    s    ))))))((d(33 **7 F  "Mzz 
f%%% 	N 	ND   +t++z;KT;K;K!LMMMMX&&&Z((((![
 
 	
 rh   c                 
    g dS )N)z-fPICz-fno-strict-aliasingz-fvisibility=hiddenz-Wconversionrd   rd   rh   rf   _nvcc_host_compiler_optionsr    s       rh   c            	        t          j                    } | dk    rd} d|  d|  g}t          j        j        r	|d|  gz  }dddd	|  d
d                    |           dt          j        j        dddg}t          j                    rE|                    dt          j
                            t          j                              g           t          j        j        r|                    g d           t          j        j        r|                    g d           t          j        j        r|                    ddg           |S )N9090asm_compute_lto_z-t=0z"-DCUTLASS_ENABLE_TENSOR_CORE_MMA=1z-wz-gencode=arch=compute_z,code=[,]z
-std=c++17z--expt-relaxed-constexprz-DNDEBUGz-ccbin)z	-lineinforr  z-DCUTLASS_DEBUG_TRACE_LEVEL=1)z--keepz,--ptxas-options=--warn-on-local-memory-usagez --ptxas-options=--warn-on-spillsz--resource-usagez--source-in-ptxz--use_fast_mathz -DCUTLASS_USE_TANH_FOR_SIGMOID=1)r'   get_cuda_archr$   r|   enable_cuda_ltor   compile_opt_levelr  rI  r   r   r  rX   gccenable_debug_infoenable_ptxas_infouse_fast_math)archr  optionss      rf   _nvcc_compiler_optionsr    sh   !##Dt||$LL+T++,D{"  ,???chhtnn???%"	G  G"'//+/2C2C"D"DEFFF{$ MKKKLLL{$ 	
  	
 	
 	
 {  
!2	
 	
 	
 Nrh   	src_filesdst_filedst_file_ext
extra_argsOptional[List[str]]c                   |g }t                      }t                      }t                      }t                      }||z   d |D             z   d |D             z   |z   }d                    |           }	d}
|dk    r-t                       dd                    |           d| d|	 }
n|dk    rB|                    d           t                       dd                    |           d	| d|	 }
nF|d
k    r-t                       dd                    |           d	| d|	 }
nt          d| d          t          	                    d|
           |
S )Nc                *    g | ]}d |v rd| nd| S )=z-Xcompiler z-Xcompiler=rd   )r   opts     rf   r  z(cuda_compile_command.<locals>.<listcomp>  sG     
 
 
 $'#::#3F3F3F
 
 
rh   c                    g | ]}d |z   S )z-Ird   )r   r   s     rf   r  z(cuda_compile_command.<locals>.<listcomp>  s    
1
1
144$;
1
1
1rh   r  ru   r  r&  r  z-sharedz -o exezUnsupported output file suffix !zCUDA command: %s)
r  r  r  r  r   r  r  r  r  r  )r  r  r  r  include_pathscuda_lib_optionsnvcc_host_compiler_optionsnvcc_compiler_optionsr  src_fileress              rf   cuda_compile_commandr    s    
*,,M(**!<!>!>244
	
 
1
 
 
	
 2
1=
1
1
1	2 	  xx	""H
Cs!!SSCHHW$5$5SShSSSS			y!!!!!PPCHHW$5$5PP8PPhPP			!!PPCHHW$5$5PP8PPhPP!"SL"S"S"STTTII #&&&Jrh   c                  J    e Zd ZdZddZddZddZddZddZddZ	ddZ
dS )
DLLWrapperz A wrapper for a dynamic library.lib_pathrp   r`   ra   c                b    || _         d| _        t          j        |          | _        d| _        d S )NFT)r  is_openr   r  DLL)r   r  s     rf   r   zDLLWrapper.__init__  s/     !#H--rh   c                N    | j         r|                                  d| _         d S d S rm   )r  _dlcloser   s    rf   r  zDLLWrapper.close  s.    < 	!MMOOO DLLL	! 	!rh   c                J   d }t                      rFt          d           }t          |d          st          d          }t          |d          r|j        }n@t	                      r#dd l}|                    dd          }|j        }nt          d          |pt                      r$t          g|_	         || j
        j                   d S t	                      r.dd l}ddlm} |j        g|_	         || j
        j                   d S d S t                              d	           d S )
Ndlclosezlibc.sor   kernel32T)use_last_errorz&Unsupported env, failed to do dlclose!)wintypeszKdll unloading function was not found, library may not be unloaded properly!)rG   r   r]  r  rH   rS  FreeLibraryr  r   r
  r  _handler  HMODULEr  r  )r   	f_dlclosesymsrS  r  r  s         rf   r  zDLLWrapper._dlclose  sP   	:: 	P::D4++ 'ItY'' ) L	\\ 	PMMM{{:d{CCH ,II%&NOOO zz ,&.Z	"	$(*+++++ ,++++++&.&6%7	"	$(*+++++, , KK]    rh   ro   Callable[..., None]c                z    | j         st          d| j                   t          | j        |          dfd}|S )NzCannot use closed DLL library: r^   r   r`   ra   c                 D     |  }|rt          dj                   d S )NzError in function: )r   r   )r^   errmethods     rf   _wrapped_funcz-DLLWrapper.__getattr__.<locals>._wrapped_func6  s<    &$-C L"#J#J#JKKKL Lrh   r^   r   r`   ra   )r  r   r  r  r  )r   ro   r   r  s      @rf   __getattr__zDLLWrapper.__getattr__0  sd    | 	RPPPQQQ4((	L 	L 	L 	L 	L 	L
 rh   c                    | S rc   rd   r   s    rf   	__enter__zDLLWrapper.__enter__=  s    rh   r^   r   c                .    |                                   d S rc   r  )r   r^   s     rf   __exit__zDLLWrapper.__exit__@      

rh   c                .    |                                   d S rc   r&  r   s    rf   __del__zDLLWrapper.__del__C  r(  rh   N)r  rp   r`   ra   r   )ro   rp   r`   r  )r`   r
  r!  )r   r   r   rP  r   r  r  r"  r$  r'  r*  rd   rh   rf   r
  r
    s        **   ! ! ! !
! ! ! !F              rh   r
  c                      e Zd ZU ej         G d d                      Zi Zded<    eej	                  Z
dZedd            Ze	 ddd            Zedd            ZdS )CUDACodeCachec                  $    e Zd ZU ded<   ded<   dS )CUDACodeCache.CacheEntryrp   r	  r  Nr   r   r   rQ  rd   rh   rf   
CacheEntryr.  I  '         rh   r0  Dict[str, CacheEntry]r   rs   rp  rp   r  r`   r0  c                |    t          t          dgd|                    }t          || j        |          \  }}||fS z
        Writes source code into a file with dst_file_ext as the file extension.
        Returns the hash key of source code, and the path to the file.
        dummy_inputdummy_outputr  )r.  r  r4  _SOURCE_CODE_SUFFIXr  rp  r  cuda_commandr   r	  s         rf   r4  zCUDACodeCache.writeR  T      -.,OO
 
  0
 
 
Z Jrh   Nr  r  r"  c                   |                      ||          \  }}|| j        vrddlm} t	                      } |t
          j                            ||dz             t                    }|5  |dt          | j
                            |z   }	t
          j                            |	          st          |g|	||          }
t                      }t                              d|
           |
                    d          }	 t#          j        |t"          j        t
          j                   n2# t"          j        $ r }t-          j        ||j                  |d}~ww xY wt                      }d	||z
   d
|
 }t                              |           nt                              d|           t4                              ||	          | j        |<   ddd           n# 1 swxY w Y   | j        |         j        ||fS )z
        Compiles CUDA source_code into a file with dst_file_ext extension.
        Returns a tuple of dst_file_path, hash_key, source_code_path
        r   r2  r4  r5  NzCUDA Compilation: %sr  )r  envzCUDA Compilation took  seconds. Compile command: z8CUDA Compilation skipped: %s since output already exists)r4  r   rg  r3  r  r   r   r   rh  r  r7  r
  r  r   r  r  r  r  r  r  r   r  r%   CUDACompileErrorr  r5  r,  r0  r  )r  rp  r  r  r   r	  r3  r  r~  r  r  r  	cmd_partsry  end_timelog_duration_msgs                   rf   r  zCUDACodeCache.compilea  sV    ))K>>Zci))))))#~~H8BGLL3=AA<XXXD S S()HC0G,H,H+H)HILXw~~k22 .#k< C "&JII4c::: #		#IW"/%j.?RZ     &8 W W W!29elKKQVVW#vvH'w:@U'w'wru'w'w$HH-....IIR"   "/!9!9*k!R!R	#/S S S S S S S S S S S S S S S2 	#*C<<s8   -BG?+D+*G+E:EEA5GG"GTuple[DLLWrapper, str, str]c                    |dk    rt          d| d|           |                     ||          \  }}}t          |          ||fS z
        Compiles source code and loads the generated .so file.
        Returns a tuple of DLLWrapper, hash_key, source_code_path
        r  zCOnly support loading a .so file for now. Requested file extension: z. Source code: r   r  r
  r  rp  r  dst_file_pathr   source_code_paths         rf   r   zCUDACodeCache.load       4X-9X XJUX X   58KK5
 5
1x!1 =))85EFFrh   rp  rp   r  rp   r`   r0  rc   rp  rp   r  rp   r  r  r`   r"  rp  rp   r  rp   r`   rB  )r   r   r   r^  	dataclassr0  r   rQ  r   r  r  r7  r  r4  r  r   rd   rh   rf   r,  r,  G  s                 $&E%%%%,u{++K   [ TX&= &= &= &= [&=P G G G [G G Grh   r,  c                      e Zd ZU ej         G d d                      Zi Zded<    eej	                  Z
dZdZedd            Ze	 ddd            Zedd            ZdS )ROCmCodeCachec                  $    e Zd ZU ded<   ded<   dS )ROCmCodeCache.CacheEntryrp   r	  r  Nr/  rd   rh   rf   r0  rQ    r1  rh   r0  r2  r   r  Frp  rp   r  r`   r0  c                |    t          t          dgd|                    }t          || j        |          \  }}||fS r4  )r.  r(   r4  r7  r8  s         rf   r4  zROCmCodeCache.write  r:  rh   Nr  r  r"  c                   | j         sGd| _         t                              t          t	          t                                                     |                     ||          \  }}|| j        vrddlm	} t                      } |t          j                            ||dz             t                    }|5  |dt          | j                            |z   }	t          j                            |	          st%          |g|	||          }
t'                      }|
                    d          }	 t+          j        |t*          j        dt          j                  }t                              d	|           n2# t*          j        $ r }t5          j        ||j                  |d}~ww xY wt'                      }d
||z
   d|
 }t                              |           nt                              d|           t<                              ||	          | j        |<   ddd           n# 1 swxY w Y   | j        |         j         ||fS )z
        Compiles source_code into a file with dst_file_ext extension,
        using the compile command specific for the ROCm platform.
        Returns a tuple of dst_file_path, hash_key, source_code_path
        Tr   r2  r4  r5  Nr  )r  r5  r<  zCompilation output: %szCompilation took r=  z3Compilation skipped: %s since output already exists)!_logged_compiler_versionr  r  r6   rp   r)   r4  r   rg  r3  r  r   r   r   rh  r  r7  r
  r(   r   r  r  r  r  r   r  r%   r>  r  r5  rO  r0  r  )r  rp  r  r  r   r	  r3  r  r~  r  r  r  r?  r  ry  r@  rA  s                    rf   r  zROCmCodeCache.compile  s    + 	G+/C(II/MOO0D0DEEFFF))K>>Zci))))))#~~H8BGLL3=AA<XXXD S S()HC0G,H,H+H)HILXw~~k22 .#k< C "&J #		#I	W!+!8%#-#4!% "
	" " " 		":FCCCC%8 W W W!29elKKQVVW#vvH'r8j;P'r'rmp'r'r$HH-....IIM"   "/!9!9*k!R!R	#5S S S S S S S S S S S S S S S8 	#*C<<s9   ;A6H*2AE:9H*:F)	F$$F))A5H**H.1H.rB  c                    |dk    rt          d| d|           |                     ||          \  }}}t          |          ||fS rD  rE  rF  s         rf   r   zROCmCodeCache.load  rI  rh   rJ  rc   rK  rL  )r   r   r   r^  rM  r0  r   rQ  r   r  r  r7  rT  r  r4  r  r   rd   rh   rf   rO  rO    s                 $&E%%%%,u{++K$   [ TX.= .= .= .= [.=` G G G [G G Grh   rO  c                      e Zd ZddZdS )CodeCacheFuturer`   ra   c                    t           rc   )r  r   s    rf   r  zCodeCacheFuture.result  s    !!rh   Nr   )r   r   r   r  rd   rh   rf   rW  rW    s(        " " " " " "rh   rW  c                  *    e Zd ZU ded<   ddZdd	Zd
S )TritonFuturer   r  r   r  Optional[Future[Any]]r`   ra   c                "    || _         || _        d S rc   )r  r  )r   r  r  s      rf   r   zTritonFuture.__init__  s    
 rh   c                    | j         =| j                                         }|J d | _         | j                                         | j        S rc   )r  r  r  
precompile)r   r  s     rf   r  zTritonFuture.result
  sI    ;"[''))F>>>DKK""$$${rh   N)r  r   r  r[  r`   ra   )r`   r   )r   r   r   rQ  r   r  rd   rh   rf   rZ  rZ    sJ                 rh   rZ  c                      e Zd ZddZd	dZdS )
LambdaFuture	result_fnr  r`   ra   c                    || _         d S rc   ra  )r   ra  s     rf   r   zLambdaFuture.__init__  s    "rh   c                *    |                                  S rc   rc  r   s    rf   r  zLambdaFuture.result  s    ~~rh   N)ra  r  r`   ra   )r`   r  )r   r   r   r   r  rd   rh   rf   r`  r`    s<        # # # #           rh   r`  )r^   r   r_   r   r`   ra   )r`   rk   r  r  )r  r  r`   rp   r  )r  r  r  rp   r`   rp   )r  rp   r   rp   r!  rp   r`   r"  )ru   r  )r(  r  r  rp   r)  rp   r`   rp   )ru   r  ru   )r(  r  r   rp   r  rp   r)  rp   r!  rp   r`   r0  )r5  rp   r`   rp   )FF)
r:  rp   r(  r  r   rk   r3  rk   r`   ra   )rR  r-   r`   r-   )rV  rW  rX  r    r`   rM   )rV  rW  rX  r    r`   ra  )rV  rW  rX  r    r`   re  )ro  r   r`   rp  )ro  r   r`   r   )r  r  r  rp   r  r  r`   ra   )r`   r  )
r  r  r  r  r  r   r  r  r`   r  )r  rM  r   r  r  rE   r`   ra   )r(  rE   r   r  r  r  r`   ra   )r,  r  r`   r  )r  rp   r`   ra   )r   rp   r`   r0  )r	  r  r  rp   r  r  r`   ra   )r   rp   r^   r   r`   r  )
r  rp   r  r3   r  rp   r  rp   r`   ra   )r  rp   r  r  r`   ra   )rh  rp   )r`   r  r  rc   )
r  r  r  rp   r  rp   r  r  r`   rp   )
__future__r   r  r  r^  r   r   r  r}  r   loggingr   rZ  r  rr  r  r  rs  r  r~   r  r  r  rD  rj  bisectr   r   rS  r   r   r   datetimer	   r
   pathlibr   r   r   typesr   typingr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   typing_extensionsr   rz   torch.distributedr0  r6  r   r    torch._dynamo.utilsr!   r"   r#   torch._inductorr$   r%   r&   torch._inductor.codegen.cudar'   ,torch._inductor.codegen.rocm.compile_commandr(   r)   torch._utils_internalr*   r  r,   r-   collections.abcr.   rR  r/   r0   torch._inductor.cpp_builderr1   r2   r3   r4   r5   r6   r7   r8   r9   torch._inductor.cpu_vec_isar:   torch._inductor.cudagraph_utilsr;   r<   r=   %torch._inductor.runtime.compile_tasksr>   r?   r@   %torch._inductor.runtime.runtime_utilsrA   rB   torch._inductor.utilsrC   rD   rE   rF   rG   rH   rI   torch._loggingrJ   torch._subclasses.fake_tensorrK   rL   rM   %torch.fx.experimental.symbolic_shapesrN   rO   rP   concurrent.futuresrQ   torch._inductor.graphrS   torch._inductor.irrT   torch._inductor.runtime.hintsrU   rV   r   ra  r  _HEREr  r  r   r  rd  _IS_WINDOWSr  	triton.fbrX   triton.fb.buildrY   torch._inductor.fb.utilsrZ   r[   r\   r]   r  getArtifactLoggerr   r  rh  	getLoggerr  r   r   r   r   r   r  r  r  r'  r/  r4  r9  r   rM  rK  rU  r`  rd  rn  rs  ru  Picklerrw  r  r   r  r  r  r[  rh  r  r  r'  r+  r;  r=  rp   r  rQ  r  r  r  r  r  rq  r  r  r  r  r  r  r  r  r   r#  r~  r}  rt  r  r  r  r  r  r  r  r
  r,  rO  rW  rZ  r`  rd   rh   rf   <module>r     s   " " " " " " "                				   				   				        



                        ' ' ' ' ' ' ' ' ' '                                                                 " ( ' ' ' ' '                              Q Q Q Q Q Q Q Q Q Q 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1        3 2 2 2 2 2       GCLL  6((((((55555555
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 5 4 4 4 4 4         
         
 O N N N N N N N                  , + + + + +         
 O N N N N N N N N N  J))))))333333//////IIIIIIII 	!!goobgooe4455k+@AAlg%6 %%%%%%222222                         .228]KKlg% g!!' ' ' 'G G G GG
 G
 G
 G
 G
 G
 G
 G
T' ' ' ' ' ' ' '2X X X X Xi X X Xv   X X X X
* * * * * 9;" " " " " CI; ; ; ; ;     $! ! ! ! 	    *              2! ! ! !B B B BB   4 4 4 4O O O O O&. O O OdV V V V  TT T T TD% % % %               :5 :5 :5 :5 :5 :5 :5 :5z   &W W W Wt; ; ; ;(! ! ! !8L L L L L L L L^       F- F- F- F- F- F- F- F-R8 8 8 8 T                   8b b b b b b b bZ 
. . .  .( ( ( (; ; ; ;/6 /6 /6 /6d      H H H HD u3 u3 u3 u3 u3 u3 u3 u3p$ $ $ $0 b; b; b; b; b; b; b; b;J ; ; ; ; ;4 ; ; ;| [ [ [ [ [0 [ [ [|
# # # #L        K( K( K( K( K( K( K( K(\T T T T T T T T	 	 	 	       .   ' ' ' '\ '+	" " " " "JH H H H H H H HV QG QG QG QG QG QG QG QGh ZG ZG ZG ZG ZG ZG ZG ZGz" " " " " " " "
    ?   *         ?          rh   