
    קgcN                        d dl Z d dlZd dlmZmZmZmZmZmZ d dl	Z	d dl
mZmZmZ d dlmZmZ d dlmZmZmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dl m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z' d dl(m)Z)m*Z* d dl+m,Z,m-Z- d dl.m/Z/ erd dl0m1Z1  ee2d          Z3 ee2d          Z4d Z5de6ddfdZ7d Z8d Z9 G d d          Z:da;da<e j=        d             Z>e j=        d             Z?ddZ@dS )    N)AnyDictListOptionalTYPE_CHECKINGUnion)call_backward	call_hookFakeCompiledAutogradEngineGetItemSourceLocalSource)counterslazy_format_graph_codeset_locals_to_steal)getArtifactLoggertrace_structuredclone_preserve_strides)FakeTensorMode)GraphModule)BackwardState)	decomposedisable_autocast_cachedisable_proxy_modes_tracingfetch_object_proxyProxyTorchDispatchModePythonKeyTracertrack_tensor_tree)
DimDynamicShapeEnv)preserve_node_metaset_stack_trace)CapturedTraceback)Proxycompiled_autogradcompiled_autograd_verbosec                  T    t           j        j        j                            d          S )Nr'   )torch_logging	_internal	log_stateis_artifact_enabled     [/var/www/html/ai-engine/env/lib/python3.11/site-packages/torch/_dynamo/compiled_autograd.py snapshot_verbose_logging_enabledr1   )   s%    >#-AA#  r/   msgreturnc                 :    t                               |            d S N)verbose_logdebug)r2   s    r0   cpp_verbose_log_fnr8   /   s    cr/   c                  8    t           j        j        j        j        S r5   )r)   	_inductorconfigtriton
cudagraphsr.   r/   r0   snapshot_cudagraph_enabledr>   3   s    ?!(33r/   c                 (    | t          |           S | S r5   r   )xs    r0   maybe_clonerA   7   s    }%a(((Hr/   c            	       0   e Zd ZddZd Zedefd            Zdee	j
                 dee         deeeef                  fd	Zd
efdZd ZdefdZd Zd Zd Zdee         fdZd Zd Zd Zd Zd ZdefdZdededee	j        j                 fdZ dS )AutogradCompilerInstancer3   Nc                 >   || _         t          j                    | _        | j        j        | _        t                      | _        t          dd| j                  | _        t                      | _
        t          | j
        d          | _        d | _        g d| _        d S )NT)allow_fallback_kernelsallow_non_fake_inputs	shape_envsymbolic)inputssizesscalarshooks)compiler_fn
contextlib	ExitStackstackcloser!   rG   r   fake_tensor_moder   	fx_tracerr   
proxy_modehooks_proxygraph_placeholders)selfrM   s     r0   __init__z!AutogradCompilerInstance.__init__>   s    &)++
Z%
! .#'"&n!
 !
 !

 )**0LL,0"I"I"Ir/   c                 r    t          |t          j                  sJ | j                            ||          S )N)source)
isinstancer)   TensorrR   from_tensor)rW   r@   rZ   s      r0   	wrap_fakez"AutogradCompilerInstance.wrap_fakeM   s6    !U\*****$0060BBBr/   c                 <    t          t          |           |          S r5   r   )nameidxs     r0   rZ   zAutogradCompilerInstance.sourceQ   s    [..444r/   rI   rJ   rK   c                 t    t           d         dxx         dz  cc<   d  _        i  _        t          j                                         j        _        t          j        	                    t                     j        _        i  j        _         fd j        D             \  }}} _         fdt          |          D             }                     ||            fdt          |          D             }                     ||           t          |          D ]\  }}                     d|          }	t%          |t&                    r* j                            ||	t,          j                  ||<   Zt%          |t0                    rF j                             j                            ||	t,          j        	          ||	
          ||<   t7          dt9          |                                         ||            j                            t?          i                       j                             j                     j                             j!                    j                            tE                                  j                            tG                                 |||fS )Nr&   captures   )
tracer_clsc              3   R   K   | ]!}j                             d |di           V  "dS )placeholderr.   N)rS   create_proxy).0r`   rW   s     r0   	<genexpr>z9AutogradCompilerInstance.begin_capture.<locals>.<genexpr>a   sU       D
 D
 N''tRDDD
 D
 D
 D
 D
 D
r/   c           	      j    g | ]/\  }}                     |                    d |                    0S )rI   )r^   rZ   )ri   ra   r@   rW   s      r0   
<listcomp>z:AutogradCompilerInstance.begin_capture.<locals>.<listcomp>g   sI     
 
 
Q NN1dkk(C8899
 
 
r/   c           	          g | ]?\  }}j                             |                    d |          t          j                  @S )rJ   )rG   $create_unspecified_symint_and_symbolrZ   r    DYNAMIC)ri   ra   valrW   s      r0   rl   z:AutogradCompilerInstance.begin_capture.<locals>.<listcomp>n   s[     
 
 
 S N??GS))" 
 
 
r/   rK   )rZ   dynamic_dim)hintrZ   zUnexpected scalar type: )$r   aot_graph_cls_nameaot_graph_infosr)   nnModulerS   rootfxGraphr   graphtensor_attrsrV   rU   	enumeratebind_tensors_to_proxiesrZ   r[   intrG   rn   r    ro   floatcreate_symfloatnodecreate_unspecified_symbolAssertionErrortyperP   enter_contextr   rR   rT   r   r"   )
rW   rI   rJ   rK   
args_proxysizes_proxyscalars_proxyra   rp   rZ   s
   `         r0   begin_capturez&AutogradCompilerInstance.begin_captureU   s    	$%j111Q611115:<#hoo//$x~~~II&(#D
 D
 D
 D
/D
 D
 D
@
K0@
 
 
 
#F++
 
 
 	$$VZ888
 
 
 
 &e,,
 
 
 	$$UK888!'** 	L 	LHC[[C00F#s## L#~RR&   
 C'' L#~AAN<<%$.$6 =  
 !  B     %%?cKKK$$Wm<<< 	
  2///
  !6777
  111
  !7!9!9:::
  !3!5!5666ug%%r/   backward_idxc           
         | j         J | j         |         }| j                            dt          ||                     |          g|                     |          R i           }t                      5  g }|D ]L}||                    d            |\  }	}
}}|                    t          j        |||	|
                     M| 	                    ||           d d d            n# 1 swxY w Y   t          |          S )Ncall_function)kindtargetargskwargs)sizedtypelayoutdevice)rU   rS   rh   r	   to_proxyr   appendr)   emptyr}   tuple)rW   rI   output_metadatassaved_tensorsr   backward_c_functionproxiesgrad_insoutput_metadatar   r   r   r   s                r0   proxy_call_backwardz,AutogradCompilerInstance.proxy_call_backward   sq    +++".|<.--  #m,, v&& 
  . 	
 	
 )** 	< 	<57H#3  "*OOD))).=+tKTvfUUU    ((7;;;	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< Xs   3A(C''C+.C+c                 h      j                             dt          |g fd|D             R |          S )Nr   c                 :    g | ]}                     |          S r.   r   ri   r@   rW   s     r0   rl   z<AutogradCompilerInstance.proxy_call_hook.<locals>.<listcomp>   s%    111q$--""111r/   )rS   rh   r
   )rW   hookr   r   s   `   r0   proxy_call_hookz(AutogradCompilerInstance.proxy_call_hook   sQ    ~**1111D111  
 
 	
r/   ic                 &   | j         J | j         |         }|                     |||         d          }t                      5  t          ||                   ||<   |                     ||         g|g           d d d            n# 1 swxY w Y   |S )Ntensor_pre_hook	hook_type)rU   r   r   rA   r}   )rW   rI   hook_idr   r   proxys         r0   r   z(AutogradCompilerInstance.tensor_pre_hook   s    +++($$1I' % 
 

 )** 	? 	?#F1I..F1I((&)ug>>>	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? s   7BB
B
c                     | j         J | j         |         }|                     ||d          }t                      5  d |D             }|                     ||           d d d            n# 1 swxY w Y   |S )Npre_hookr   c                 ,    g | ]}t          |          S r.   rA   ri   r@   s     r0   rl   z5AutogradCompilerInstance.pre_hook.<locals>.<listcomp>   s    555k!nn555r/   rU   r   r   r}   )rW   rI   r   r   r   s        r0   r   z!AutogradCompilerInstance.pre_hook   s    +++(&&  ' 
 

 )** 	: 	:55f555F((999	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: s   #A,,A03A0c                     | j         J | j         |         }|                     |||d          }t                      5  d |D             }|                     ||           d d d            n# 1 swxY w Y   |S )N	post_hookr   c                 ,    g | ]}t          |          S r.   r   r   s     r0   rl   z6AutogradCompilerInstance.post_hook.<locals>.<listcomp>   s    777!{1~~777r/   r   )rW   outputsrI   r   r   r   s         r0   r   z"AutogradCompilerInstance.post_hook   s    +++(&&!	 ' 
 
 )** 	; 	;77w777G((':::	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; s   #A--A14A1c                 4   t          |t          j                  sJ | j        J | j        |         }|                     ||d          }t                      5  t          |          g}|                     ||g           d d d            n# 1 swxY w Y   |S )Npost_acc_grad_hookr   )r[   r)   r\   rU   r   r   rA   r}   )rW   inputr   r   r   s        r0   r   z+AutogradCompilerInstance.post_acc_grad_hook   s    %.....+++($$* % 
 

 )** 	9 	9 ''(E((888	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 s   (BBBc                    i }d}t          |j                  }|d         j        dk    sJ |d         }t          |j                                                  }t          | j                  }||         |d         k    sJ |t          |          z   dz
  }||         |d         k    sJ t          |          D ]\  }	}
|s|
j        d         j	        j
        dk    rd}%|
j        d         j	        j
        d	k    }t          |
j        d                                                   dk    }|rF|rDt          |
j                                                  }t          d
 |D                       r|
||	<   |r_|                                D ])}
|
j        d                                         |
j        d<   *t          |                                          S g S )NFr   rI   rd   rp   cudaTcpuc              3   z   K   | ]6}t          |j        t          j        j                  o|j        j        d v V  7dS ))primsatenN)r[   r   r)   _ops
OpOverload	namespace)ri   users     r0   rj   zDAutogradCompilerInstance.move_graph_nodes_to_cuda.<locals>.<genexpr>  s]          t{EJ,ABB C-1BB     r/   )listnodesr   userskeyslenrV   r|   metar   r   r   allvaluesr   )rW   rz   to_movehas_cuda_inputsr   rI   inputs_usersfirst_getitem_idxlast_getitem_idxr   nodeis_cpu	is_scalar
node_userss                 r0   move_graph_nodes_to_cudaz1AutogradCompilerInstance.move_graph_nodes_to_cuda   s   ,.U[!!Qx(****qFL--//00 788&'<?::::,s</@/@@1D%&,r*::::: .. 	& 	&GAt" ty'7'>'Cv'M'M"&Yu%,1U:FDIe,1133449I &) &!$*//"3"344
   !+     & "&GAJ  	((( ; ;#'9U#3#8#8#:#:	%   '''	r/   c           	      &   | j                             dt          j        di            | j                                         | j                             dd| j                             |                     |                    fi            | 	                                 | 
                                 g t                      r|                     | j         j                  t          | j         j        | j         j        d          t!          dg           t#          dddd          }t$                              d	|           t(                              d	|           t-          d
fd           fd}||                               fS )Nr   r.   outputCompiledAutogradrI   zCompiled autograd graphT)include_deviceinclude_stridecoloredz%scompiled_autograd_graphc                  0                          d          S )NF)print_output)print_readable)rz   s   r0   <lambda>z6AutogradCompilerInstance.end_capture.<locals>.<lambda>J  s    u333GG r/   )
payload_fnc                     	 da D ]3}||                                                             d          ||<   4 | ||||          da S # da w xY w)NT)non_blockingF)in_compiled_autograd_region
pin_memoryr   )compiled_fnrI   rJ   rK   rL   r   runtime_inputs_to_moves         r0   runtime_wrapperz=AutogradCompilerInstance.end_capture.<locals>.runtime_wrapperM  sy    4.2+/ O OA &q	 4 4 6 6 ; ; ; N NF1II"{65'5AA.3++e+3333s   AA A)rS   rh   r   _exec_final_callbacks_stubrP   rQ   create_node
create_argr   rename_aot_dispatcher_nodesreorder_accumulate_grad_nodesr>   r   rz   r   rw   r   r   compiled_autograd_loginfor6   r7   r   rM   )rW   r   lazy_graph_coder   rz   r   s       @@r0   end_capturez$AutogradCompilerInstance.end_capture'  s   ##&A		
 	
 	
 	
""^&&t}}W'='=>>@		
 	
 	
 	((*****,,,,.%'' 	Y%)%B%B4>CW%X%X"N!57I
 
 	EH:...0%
 
 
 	""4999$000%GGGG	
 	
 	
 	

		4 		4 		4 		4 		4  0 0 7 777r/   c           	         | j         dS dt          j        j        j        dt          j        j        j        fd}| j                                        D ]\  }}|d         }|d         }|d         j        }t          |j	                  }t          |          }|J 	 |j        dk    rt          |          }|j        dk    n# t          $ r Y }w xY w	 t          | j        j        j	                  }	t          |          D ]}
t          |	           t          |	          }|j        d	k    r2 |||          s&t          |	          }|j        d	k    r |||          &|j        d	k    r|j        d	k    r|j        st          |	          }- |||          st          d
| d|j         |_        t#          |j                  D ]"\  }}d
| d|j         |j        |         _        #t          |          }t          |	          }|j        d	k    r|j        d	k    # t          $ r' t&                              d| j         |||           Y w xY wdS )z
        Renames nodes as they appear in the AOTDispatcher backward graphs, prefixed by AOT id
        e.g. AOTDispatcher backward graph X's `sin_Y` -> `aotX_sin_Y`
        Nabc                 F   | j         |j         k    }|sDt          | j         d          o.t          |j         d          o| j         j        |j         j        k    }|oI| j        |j        k    o9| j        |j        k    o)t          | j                  t          |j                  k    S )N__name__)r   hasattrr   opr   r   all_input_nodes)r   r   target_matchs      r0   
is_similarzHAutogradCompilerInstance.rename_aot_dispatcher_nodes.<locals>.is_similarb  s    8qx/L AHj11 ?*55?)QX->>   EDADLEFaf$E )**c!2C.D.DD	r/   ca_node_start_idxaot_idaot_gmr   r   aot_zIFailed to match %s%s (NodeCall %s) nodes with AOT backward graph %s nodes)rs   r)   rx   r   Nodert   itemsrz   iterr   nextr   StopIterationrS   ranger   r`   r|   r   r6   r7   )rW   r   nodecall_indexr   r   r   	aot_graphaot_itaot_nodeca_itr   ca_noder   inps                 r0   r   z4AutogradCompilerInstance.rename_aot_dispatcher_nodesZ  s   
 "*F	%(-, 	1C 	 	 	 	 %)$8$>$>$@$@ 5	 5	 ND $%8 9(^FX,I )/**FF||H'''k_44#F||H k_44    &T^1788011    AKKKKu++ jH,,ZZ5R5R, #5kkG jH,,ZZ5R5R, kX--'*2H2H"= !"&u++ %:h88 , ,+#A#A#A(-#A#AGL"+H,D"E"E T T3:S:S:S:S:S/277#F||H"5kkG! kX--'*2H2H" !   !!_+"    ]5	 5	s%   %C
CCEH-IIc                     | j         j                            dt          j        j        j        j                  D ]?}t          |j	                  }||j
        ur |j        dk    r|                    |           @dS )a  
        Usage of AOTAutograd causes all the accumulate_grad_ nodes to get pushed to the end of
        the graph.  This differs from eager mode, which schedules them as soon as possible. This
        pass attempts to reorder the graph to mimic eager behavior.
        r   )r   r   rg   N)rS   rz   
find_nodesr)   opsinductoraccumulate_grad_defaultmaxr   prevr   r   )rW   r   args      r0   r   z6AutogradCompilerInstance.reorder_accumulate_grad_nodes  s     N(33uy'9'J'R 4 
 
 	! 	!D di..C$)##-(?(?

4   	! 	!r/   c                 x    |d S t          |t                    r fd|D             S t          |t                    rt           fd|D                       S t          |t          j                  sJ t           j        |          }t          |t          j        j        j	        j
                  sJ |j        S )Nc                 :    g | ]}                     |          S r.   r   r   s     r0   rl   z5AutogradCompilerInstance.to_proxy.<locals>.<listcomp>  s%    000DMM!$$000r/   c              3   B   K   | ]}                     |          V  d S r5   r   r   s     r0   rj   z4AutogradCompilerInstance.to_proxy.<locals>.<genexpr>  s/      55aq))555555r/   )r[   r   r   r)   r\   r   rS   rx   experimentalproxy_tensor_ProxyTensorr   )rW   tr  s   `  r0   r   z!AutogradCompilerInstance.to_proxy  s    94a 	10000a0000a 	655551555555!U\*****)$.!<<,(=(J(WXXXXX!!r/   c                 
   t          t          j        j                  r(fdt	          t          |                    D             t          |          t                    k    sJ t          |d | j                   d S )Nc                      g | ]
}|         S r.   r.   )ri   r   r   s     r0   rl   zDAutogradCompilerInstance.bind_tensors_to_proxies.<locals>.<listcomp>  s    ???awqz???r/   constanttracer)r[   r)   rx   r%   r  r   r   rS   )rW   tensorsr   s     `r0   r}   z0AutogradCompilerInstance.bind_tensors_to_proxies  s}    gux~.. 	@????5W+>+>???G7||s7||++++'7T$.QQQQQQr/   indexc                 ~    | j         J | j         |         }t                      }t          ||d | j                   |S )Nr   )rU   r   r   rS   )rW   r$  r   bw_states       r0   bind_backward_statez,AutogradCompilerInstance.bind_backward_state  sE    +++ ' ??(EDPPPPr/   	node_namer  pyobjc                 t   d}|Y|j         }t          |d          rB|| _        |j        }t	          | j        j        j                  ||j        j	        d| j
        |<   | | d| d}t          j                                                    d         }|                    d|          }t          |           d S )N _aot_id)r   r   r   z (NodeCall )r   z:raw_stack_trace = CapturedTraceback.extract().format()[-1])_forward_clsr   rs   r,  r   rS   rz   r   _lazy_backward_info	bw_modulert   r$   extractformatreplacer#   )	rW   r(  r  r)  maybe_aot_idforward_clsnew_coderaw_stack_tracenew_stack_traces	            r0   set_node_originz(AutogradCompilerInstance.set_node_origin  s     ,K{I.. *3'*2),T^-A-G)H)H*)=G8 8$^4  KKK.KKK+355<<>>rB)11H(
 
 	(((((r/   r3   N)!r   
__module____qualname__rX   r^   staticmethodr   rZ   r   r)   r\   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r}   r'  strr   autogradFunctionr9  r.   r/   r0   rC   rC   =   s       J J J JC C C 5] 5 5 5 \5?&U\"?& Cy?& eCJ'(	?& ?& ?& ?&B!
 ! ! ! !F	
 	
 	
#          &&c & & & &P18 18 18fL L L\! ! !" " "R R R    )) ) /0	) ) ) ) ) )r/   rC   Fc              #   >  K   t           j        j        j                            t          j        t          |                     }t                      r.t           j        j        j        	                    t                     da	 t           j                            d          5  d V  d d d            n# 1 swxY w Y   |sdat           j        j        j                            |           d S # |sdat           j        j        j                            |           w xY w)NTF)r)   _C_dynamor&   set_autograd_compiler	functoolspartialrC   r1   set_verbose_loggerr8   compiled_autograd_enabledr?  set_multithreading_enabled)rM   priors     r0   enablerK    sG     H.DD2K@@ E ()) R*==>PQQQ $H^66u== 	 	EEE	 	 	 	 	 	 	 	 	 	 	 	 	 	 	  	.(-%*@@GGGGG  	.(-%*@@GGGGs0   C- "B3'C- 3B77C- :B7;C- -/Dc               #   (  K   t           j        j        j                            d           } da	 d V  | rdat           j        j        j                            |            d S # | rdat           j        j        j                            |            w xY w)NFT)r)   rB  rC  r&   rD  rH  )rJ  s    r0   disablerM    s      H.DDTJJE %H 	-(,%*@@GGGGG  	-(,%*@@GGGGs   A" "/Bc                      d} t           rJ t          j        j        j                            d            t          j        j        j                            d            d S )NF)r   r)   rB  rC  r&   rD  rG  )compiled_autograd_enables    r0   resetrP    sR    $****	H&<<TBBB	H&99$?????r/   r:  )ArN   rE  typingr   r   r   r   r   r   r)   torch._dynamo.external_utilsr	   r
   r   torch._dynamo.sourcer   r   torch._dynamo.utilsr   r   r   torch._loggingr   r   torch._prims_commonr   torch._subclassesr   torch.fxr   %torch.fx.experimental._backward_stater   "torch.fx.experimental.proxy_tensorr   r   r   r   r   r   r   %torch.fx.experimental.symbolic_shapesr    r!   torch.fx.tracebackr"   r#   torch.utils._tracebackr$   torch.fx.proxyr%   r   r   r6   r1   r>  r8   r>   rA   rC   rH  r   contextmanagerrK  rM  rP  r.   r/   r0   <module>r`     s           B B B B B B B B B B B B B B B B          
 < ; ; ; ; ; ; ; U U U U U U U U U U > > > > > > > > 6 6 6 6 6 6 , , , , , ,             ? ? ? ? ? ?                  G F F F F F F F B B B B B B B B 4 4 4 4 4 4  %$$$$$$ *)(4GHH *EFF  C D    4 4 4  j) j) j) j) j) j) j) j)\ "  $  H H H" 	H 	H 	H@ @ @ @ @ @r/   