
    קg                         d dl Z d dlmZ d dlmZmZmZ d dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZmZmZmZmZmZ d dlmZmZmZmZmZ d d	lmZ d
dlmZ d Zdej         j!        fdZ"dej         j!        dee#         fdZ$dej         j!        dee#         fdZ%de&fdZ'deee#                  fdZ(d Z) G d d          Z* ed e*                       ddZ+dS )    N)defaultdict)DictListOptional)config)aot_autograd)	boxed_nop)BoxedDeviceIndex'check_multiple_devices_or_any_cpu_nodesformat_default_skip_messageget_mutation_stack_traceget_placeholder_info#log_cudagraph_skip_and_bump_counter)	BoxedBoolcount_tangents%get_first_incompatible_cudagraph_nodenum_fw_fixed_argumentsoutput_node)StorageWeakRef   )register_backendc           	      ,   d }t          t                    }d}t                      }| j        D ]c}|j        dk    rvt	           ||j                  t          j                  rH|t           ||j                  	                                                   
                    |           |dz  }|j        dk    rt          |j        d          s|j        j        }t          |j                  D ]\  }}|t!          |j                  k     r|j        |         }	n!|j        |j        vr:|j        |j                 }	d}
|j        r|j        j        rd}
|
r8||t           ||	j                  	                                                   z  }e|S )	Nc                 *    d| v r| d         n| d         S )Nvalfake_result )metas    ]/var/www/html/ai-engine/env/lib/python3.11/site-packages/torch/_dynamo/backends/cudagraphs.pymeta_fkz%find_input_mutations.<locals>.meta_fk    s    #tmmtE{{m1DD    r   placeholderr   call_function_schemaFT)r   setnodesop
isinstancer   torchTensorr   _typed_storageaddhasattrtargetr#   	enumerate	argumentslenargsnamekwargs
alias_infois_write)gr   inputs	input_idxmutated_inputsnschemaiargargumentmut_args              r   find_input_mutationsr@      s   E E E FIUUNW  4=  ''!&//5<88 X~ggafoo&D&D&F&FGGHLLYWWWNIIT_$$18Y// X%F#F$455  3s16{{?? vayHHxqx//  x1H> '~. '"&  #f&wwx}'='='L'L'N'NOO' N
 r    gmc                     i }| j         j        D ]J}|j                            dd           }t	          |t
          j                  r|j        |vr
|||j        <   K|S )Nr   )graphr%   r   getr'   r(   r)   device)rA   device_node_mappingr:   ts       r   get_device_node_mappingrH   F   se    =?X^ . .FJJud##a&& 	.18;N+N+N,-)r    	aot_modelreturnc                     t          | j                  t          t          |                    z
  }|sd S t	          | j                  }t          ||          S N)r@   rC   r$   ranger   r   )rI   	num_fixedmutation_indicesplaceholderss       r   3check_for_mutation_ignore_cuda_graph_managed_tensorrQ   O   sV     ,IO<<s5CSCS?T?TT t'	88L#L2BCCCr    c                     t           j        st          | |          x}r|S t          t	          |                     x}r|S t          |           x}rt          d|j         d          S d S )Nzincompatible op ())r   (cudagraph_backend_support_input_mutationrQ   r   rH   r   r   r2   )rI   rN   mut_skipskipnodes        r   check_for_skiprX   Z   s    : Jy
 
 
8 	 O6	**  t  4Y???t M*+Kty+K+K+KLLL4r    c                 |    t          t          t          |                               }|j        dk    sJ |j        S )Ncuda)nextiterrH   typeindex)rA   rE   s     r   get_device_indexr_   l   s;    $.r223344F;&    <r    c                     t          |           }t          |j                  dk    sJ d |j        d         D             S )Nr   c                 h    g | ]/}t          |t          j        j        j                  r|j        nd 0S rL   )r'   r(   fxrW   Nodestack_trace).0r=   s     r   
<listcomp>z$get_stack_traces.<locals>.<listcomp>u   sD        'sEHM,>??	IT  r    r   )r   r0   r1   )rA   outputs     r   get_stack_tracesrh   r   sO    __Fv{q     ;q>   r    c                     ddl m t          d          t          d           d	fd	}fd}t	          ||t          j        |d          t          j        j	        j
                  } ||           S )
Nr   )cudagraphify_implTFc                    t          | |          }t          t          
          t          |                    }t          | |          x}r(t	          j        	           t          d|            |S                     t          |                       ||t          |          j
        ddt          |           t          | j                  t          | j                  	  	        }d|_        |S )Nzskipping cudagraphs due to Fdevice_indexis_backwardis_inferencestack_tracesrP   mutated_input_idxsT)r	   r   r0   rX   r   disabler   r$   r_   rM   valuerh   r   rC   r@   _boxed_call)rI   
aot_inputsro   interpfixedskip_msgoutboxed_device_indexrj   do_cudagraphsdynamo_inputss          r   forward_cudagraphsz&cudagraphs.<locals>.forward_cudagraphs   s    9j11&s='9'93z??KK%i7778 	m,,,/8h88   M/	::;;;%LL+1))44-io>>3IODD

 

 

 
r    c                     t           |          }
s S t                     }t           |          x}rNt          d|           t          j        j                            j        d          J  fd}d|_	        |S  	||t          |          t                     ddt                     t           j                  t           j                  	  	        }d|_	        |S )Nzskipping cudagraphs due to %sF)create_if_none_existsc                 B                                       |           S rL   )set_to_running_backward)r7   rI   managers    r   fnz3cudagraphs.<locals>.backward_cudagraphs.<locals>.fn   s%    //111 y(((r    Trl   )r	   r   rX   r   r(   	_inductorcudagraph_treesget_managerrs   rt   rM   r_   rh   r   rC   r@   )rI   ru   rv   rw   rx   r   ry   r   rz   rj   r{   s   `      @r   backward_cudagraphsz'cudagraphs.<locals>.backward_cudagraphs   s+   9j11 	y))%i7778 	//  
 o5AA"( B  G &&&) ) ) ) ) ) "BNI%LL))44))44-io>>3IODD

 

 

 
r    )ro   )fw_compilerbw_compilerinference_compilerkeep_inference_input_mutations)F)torch._inductor.cudagraph_treesrj   r   r
   r   	functoolspartialr(   _dynamor   %cudagraph_backend_keep_input_mutation)dynamo_modelr|   r}   r   aot_cudagraphsrz   rj   r{   s    `   @@@r   
cudagraphsr   {   s    AAAAAAdOOM)$//        2$ $ $ $ $ $ $L "&'$,-?dSSS',}';'a	  N >,666r    c                   >    e Zd ZdZed             Zed             ZdS )CudagraphsBackendr   c                  &    ddl m}   |              d S )Nr   reset_cudagraph_trees)r   r   r   s    r   resetzCudagraphsBackend.reset   s)    IIIIIIr    c                 "    t          | |          S rL   )r   )modelr7   s     r   __call__zCudagraphsBackend.__call__   s    %(((r    N)__name__
__module____qualname__compiler_namestaticmethodr   r   r   r    r   r   r      sM         M    \ 
 ) ) \) ) )r    r   r   )r2   compiler_fnTc                    t          |t          t          f          sJ rd |D             nt          |          t          j                                         t          j                                        }|                    t          j                                                   t          j        	                    |          5   | |  ddd           n# 1 swxY w Y   |                                 t          j                                                            |           t          j                                         t          j        
                                t          j                            |          5   |  ddd           n# 1 swxY w Y   t          t          t          f          sffd}|S )zBThis isn't registered as a backend, but is used in some benchmarksc                 6    g | ]}t          j        |          S r   )r(   
zeros_likere   xs     r   rf   z$cudagraphs_inner.<locals>.<listcomp>   s#    ===)!,,===r    N)streamc                      t                    t          |           k    sJ r+t          |           D ]\  }}|                    |                                            rd D             S S )Nc                 6    g | ]}|                                 S r   )cloner   s     r   rf   z1cudagraphs_inner.<locals>.run.<locals>.<listcomp>   s     666!AGGII666r    )r0   zipcopy_replay)
new_inputsdstsrccopy_inputscopy_outputsrC   static_inputsstatic_outputss      r   runzcudagraphs_inner.<locals>.run   s    =!!S__4444 	z::  S		# 	"66~6666!!r    )r'   listtupler(   rZ   synchronizeStreamwait_streamcurrent_streamr   	CUDAGraphrC   )	r   r7   r   r   r   r   rC   r   r   s	     ``  @@@r   cudagraphs_innerr      s5   ftUm,,,,, %==f===V 
JZ  F
uz0022333			6	"	"  v              
	J++F333	J J  ""E			%		/	/ / /./ / / / / / / / / / / / / / /ntUm44 +(*	" 	" 	" 	" 	" 	" 	" 	" 	" Js$   C  C$'C$FF#&F#)TT),r   collectionsr   typingr   r   r   r(   torch._dynamor   torch._dynamo.backends.commonr    torch._dynamo.backends.debuggingr	   torch._inductor.cudagraph_utilsr
   r   r   r   r   r   torch._inductor.utilsr   r   r   r   r    torch.multiprocessing.reductionsr   registryr   r@   rb   GraphModulerH   strrQ   rX   intr_   rh   r   r   r   r   r    r   <module>r      s       # # # # # # ' ' ' ' ' ' ' ' ' '              6 6 6 6 6 6 6 6 6 6 6 6                             < ; ; ; ; ; & & & & & &$ $ $N 4    Dx#Dc]D D D Deh2 (3-    $C    D#/    K7 K7 K7\) ) ) ) ) ) ) )   l0A0A0C0C D D D D$ $ $ $ $ $r    