
    קgV                     P   d dl Z d dlZd dlZddlmZ  eej        d          sH ed          ej        j        d<    ed          ej        j        d<    ed          ej        j        d<   d dlm	Z	m
Z
mZ d	 Zd
 Z G d dej        j
                  Z G d d          Z	 ddZdS )    N   )_dummy_type_CudaStreamBase
_CUDAGraph_graph_pool_handle_cuda_isCurrentStreamCapturing)r   r   r   c                      t                      S )zReturn True if CUDA graph capture is underway on the current CUDA stream, False otherwise.

    If a CUDA context does not exist on the current device, returns False without initializing the context.
    )r        M/var/www/html/ai-engine/env/lib/python3.11/site-packages/torch/cuda/graphs.pyis_current_stream_capturingr      s    
 *+++r   c                      t                      S )zReturn an opaque token representing the id of a graph memory pool.

    See :ref:`Graph memory management<graph-memory-management>`.

    .. warning::
        This API is in beta and may change in future releases.
    )r   r
   r   r   graph_pool_handler   "   s     r   c                   j     e Zd ZdZ fdZd fd	Z fdZ fdZ fdZ fd	Z	 fd
Z
 fdZ xZS )	CUDAGraphzrWrapper around a CUDA graph.

    .. warning::
        This API is in beta and may change in future releases.
    c                 F    t                                          |           S N)super__new__)cls	__class__s    r   r   zCUDAGraph.__new__5   s    wws###r   Nglobalc                 N    t                                          ||           dS )a  Begin capturing CUDA work on the current stream.

        Typically, you shouldn't call ``capture_begin`` yourself.
        Use :class:`~torch.cuda.graph` or :func:`~torch.cuda.make_graphed_callables`,
        which call ``capture_begin`` internally.

        Arguments:
            pool (optional): Token (returned by :func:`~torch.cuda.graph_pool_handle` or
                :meth:`other_Graph_instance.pool()<torch.cuda.CUDAGraph.pool>`) that hints this graph may share memory
                with the indicated pool.  See :ref:`Graph memory management<graph-memory-management>`.
            capture_error_mode (str, optional): specifies the cudaStreamCaptureMode for the graph capture stream.
                Can be "global", "thread_local" or "relaxed". During cuda graph capture, some actions, such as cudaMalloc,
                may be unsafe. "global" will error on actions in other threads, "thread_local" will only error for
                actions in the current thread, and "relaxed" will not error on these actions. Do NOT change this setting
                unless you're familiar with `cudaStreamCaptureMode <https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html#group__CUDART__STREAM_1g9d0535d93a214cbf126835257b16ba85>`_
        )poolcapture_error_modeN)r   capture_begin)selfr   r   r   s      r   r   zCUDAGraph.capture_begin8   s)    " 	4<NOOOOOr   c                 H    t                                                       dS )aG  End CUDA graph capture on the current stream.

        After ``capture_end``, ``replay`` may be called on this instance.

        Typically, you shouldn't call ``capture_end`` yourself.
        Use :class:`~torch.cuda.graph` or :func:`~torch.cuda.make_graphed_callables`,
        which call ``capture_end`` internally.
        N)r   capture_endr   r   s    r   r   zCUDAGraph.capture_endK   s!     	r   c                 H    t                                                       dS )z,Replay the CUDA work captured by this graph.N)r   replayr    s    r   r"   zCUDAGraph.replayV   s    r   c                 H    t                                                       dS )z1Delete the graph currently held by this instance.N)r   resetr    s    r   r$   zCUDAGraph.resetZ   s    r   c                 D    t                                                      S )zReturn an opaque token representing the id of this graph's memory pool.

        This id can optionally be passed to another graph's ``capture_begin``,
        which hints the other graph may share the same memory pool.
        )r   r   r    s    r   r   zCUDAGraph.pool^   s     ww||~~r   c                 D    t                                                      S )z/Enable debugging mode for CUDAGraph.debug_dump.)r   enable_debug_moder    s    r   r'   zCUDAGraph.enable_debug_modef   s    ww((***r   c                 F    t                                          |          S )z
        Arguments:
            debug_path (required): Path to dump the graph to.

        Calls a debugging function to dump the graph if the debugging is
        enabled via CUDAGraph.enable_debug_mode()
        )r   
debug_dump)r   
debug_pathr   s     r   r)   zCUDAGraph.debug_dumpj   s     ww!!*---r   )Nr   )__name__
__module____qualname____doc__r   r   r   r"   r$   r   r'   r)   __classcell__)r   s   @r   r   r   .   s         $ $ $ $ $P P P P P P&	 	 	 	 	            + + + + +. . . . . . . . .r   r   c                   X    e Zd ZU dZdZej        d         ed<   	 	 	 d
defdZ	d Z
d	 ZdS )grapha  Context-manager that captures CUDA work into a :class:`torch.cuda.CUDAGraph` object for later replay.

    See :ref:`CUDA Graphs <cuda-graph-semantics>` for a general introduction,
    detailed use, and constraints.

    Arguments:
        cuda_graph (torch.cuda.CUDAGraph): Graph object used for capture.
        pool (optional): Opaque token (returned by a call to :func:`~torch.cuda.graph_pool_handle()` or
            :meth:`other_Graph_instance.pool()<torch.cuda.CUDAGraph.pool>`) hinting this graph's capture
            may share memory from the specified pool. See :ref:`Graph memory management<graph-memory-management>`.
        stream (torch.cuda.Stream, optional): If supplied, will be set as the current stream in the context.
            If not supplied, ``graph`` sets its own internal side stream as the current stream in the context.
        capture_error_mode (str, optional): specifies the cudaStreamCaptureMode for the graph capture stream.
            Can be "global", "thread_local" or "relaxed". During cuda graph capture, some actions, such as cudaMalloc,
            may be unsafe. "global" will error on actions in other threads, "thread_local" will only error for
            actions in the current thread, and "relaxed" will not error on actions. Do NOT change this setting
            unless you're familiar with `cudaStreamCaptureMode <https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html#group__CUDART__STREAM_1g9d0535d93a214cbf126835257b16ba85>`_

    .. note::
        For effective memory sharing, if you pass a ``pool`` used by a previous capture and the previous capture
        used an explicit ``stream`` argument, you should pass the same ``stream`` argument to this capture.

    .. warning::
        This API is in beta and may change in future releases.

    .. _cudaStreamCaptureMode:
        https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html#group__CUDART__STREAM_1g9d0535d93a214cbf126835257b16ba85
    Nztorch.cuda.Streamdefault_capture_streamr   r   c                 0   | j         j        (t          j                                        | j         _        |dn|f| _        ||n| j         j        | _        | j        J t          j                            | j                  | _        || _	        || _
        d S )Nr
   )r   r2   torchcudaStreamr   capture_streamstream
stream_ctx
cuda_graphr   )r   r:   r   r8   r   s        r   __init__zgraph.__init__   s     >0849J4E4E4G4GDN1,BBTG	(FFdn.S 	 "...*++D,?@@$"4r   c                    t           j                                         t          j                     t           j                                         | j                                          | j        j	        | j
        d| j        i d S )Nr   )r4   r5   synchronizegccollectempty_cacher9   	__enter__r:   r   r   r   )r   s    r   rA   zgraph.__enter__   s~    
   


    	!!###%%Y	
+/+B	
 	
 	
 	
 	
r   c                 p    | j                                          | j                            |||           d S r   )r:   r   r9   __exit__)r   exc_type	exc_value	tracebacks       r   rC   zgraph.__exit__   s6    ##%%%  9i@@@@@r   )NNr   )r+   r,   r-   r.   r2   typingOptional__annotations__strr;   rA   rC   r
   r   r   r1   r1   u   s          : DHFO,?@GGG
 "*5 5
  5 5 5 5,
 
 
A A A A Ar   r1      Fc                   () t          j                    r"t          j                    rt          d          d}t	          | t
                    sd}| f} |f}g (t          | |          D ]	\  }}t	          |t           j        j                  rt          |j
                  dk    r0t          |j                  dk    rt          |j                  dk    s
J d            t          d |                                D                       s
J d            t          j        j        j        | }(                    t          |                     t          d |D                       s
J d	            d
 (D             }	d | D             )()fdt'          t          |                     D             }
d t'          t          |                     D             }d t'          t          |                     D             }|t)                      n|}t           j                                         t           j                            t           j                                                  5  t          | ||
          D ]\  }}}d\  }}}t'          |          D ]}t           j        j                             ||           }t          d |D                       }t          |          dk    rRt           j                            |t          d |D                       t          d |D                       d|          }|||fD ]}~	 ddd           n# 1 swxY w Y   t           j                                         g }g }t          | ||          D ]\  }}}t           j                            ||          5   || }ddd           n# 1 swxY w Y   t           j        j                            |          \  }}|                    t          |                     |                    |           g }g }t          t=          |
          t=          |          t=          |          t=          )                    D ][\  }}}}t          d |D                       }t          d |D                       }d}t          |          dk    rt           j                            ||          5  t           j                            |t          d |D                       t          d |D                       d|          }ddd           n# 1 swxY w Y   g } d}!|D ]A}"|"j        r#|!|                     ||!                    |!dz  }!,|                     d           Bt          |           } |                    |           |                    |            ]|                                  |                                  d }#g }$tC          |           D ]\  }%} |#||%         ||%         )|%         |	|%         ||%         |
|%         ||%         ||%         ||%         	  	        }&t	          |t           j        j                  r6d }' |'||j"        |&|j#                  |_#        |$                    |           |$                    |&           |r|$d         S t          |$          S )a  Accept callables (functions or :class:`nn.Module<torch.nn.Module>`\ s) and returns graphed versions.

    Each graphed callable's forward pass runs its source callable's
    forward CUDA work as a CUDA graph inside a single autograd node.

    The graphed callable's forward pass also appends
    a backward node to the autograd graph. During backward, this node runs the
    callable's backward work as a CUDA graph.

    Therefore, each graphed callable should be a drop-in replacement for its source callable
    in an autograd-enabled training loop.

    See :ref:`Partial-network capture<partial-network-capture>` for detailed use and constraints.

    If you pass a tuple of several callables, their captures will use the same memory pool.
    See :ref:`Graph memory management<graph-memory-management>` for when this is appropriate.

    Arguments:
        callables (torch.nn.Module or Python function, or tuple of these): Callable or callables to graph.
            See :ref:`Graph memory management<graph-memory-management>` for when passing a tuple of callables
            is appropriate.  If you pass a tuple of callables, their order in the tuple must be the same order
            they'll run in the live workload.
        sample_args (tuple of Tensors, or tuple of tuples of Tensors): Samples args for each callable.
            If a single callable was passed, ``sample_args`` must be a single tuple of argument Tensors.
            If a tuple of callables was passed, ``sample_args`` must be tuple of tuples of argument Tensors.
        num_warmup_iters (int): The number of warmup iterations. Currently, ``DataDistributedParallel`` needs
            11 iterations for warm up. Default: ``3``.
        allow_unused_input (bool): If False, specifying inputs that were not used when computing outputs
            (and therefore their grad is always zero) is an error. Defaults to False.
        pool (optional): Token (returned by :func:`~torch.cuda.graph_pool_handle` or
            :meth:`other_Graph_instance.pool()<torch.cuda.CUDAGraph.pool>`) that hints this graph may share memory
            with the indicated pool.  See :ref:`Graph memory management<graph-memory-management>`.
    .. note::
        The ``requires_grad`` state of each Tensor in ``sample_args`` must match the state
        that's expected for the corresponding real input in the training loop.

    .. warning::
        This API is in beta and may change in future releases.

    .. warning::
        ``sample_args`` for each callable must contain only Tensors. Other types are not allowed.

    .. warning::
        Returned callables do not support higher order differentiation (e.g., double backward).

    .. warning::
        In any :class:`~torch.nn.Module` passed to :func:`~make_graphed_callables`, only parameters
        may be trainable. Buffers must have ``requires_grad=False``.

    .. warning::
        After you pass a :class:`torch.nn.Module` through :func:`~make_graphed_callables`,
        you may not add or remove any of that Module's parameters or buffers.

    .. warning::
        :class:`torch.nn.Module`\s passed to :func:`~torch.cuda.make_graphed_callables` must not have module hooks
        registered on them at the time they are passed. However, registering hooks on modules *after* passing them
        through :func:`~torch.cuda.make_graphed_callables` is allowed.

    .. warning::
        When running a graphed callable, you must pass its arguments in the same order and format
        they appeared in that callable's ``sample_args``.

    .. warning::
        The automatic mixed precision is supported in :func:`~torch.cuda.make_graphed_callables` only with disabled
        caching. The context manager `torch.cuda.amp.autocast()` must have `cache_enabled=False`.
    z_make_graphed_callables does not support the autocast caching. Please set `cache_enabled=False`.FTr   zModules must not have hooks registered at the time they are passed. However, registering hooks on modules after passing them through make_graphed_callables is allowed.c              3   (   K   | ]}|j         d u V  dS )FNrequires_grad.0bs     r   	<genexpr>z)make_graphed_callables.<locals>.<genexpr>  s)      EEAq%/EEEEEEr   zIn any :class:`~torch.nn.Module` passed to :func:`~make_graphed_callables`, only parameters may be trainable. All buffers must have ``requires_grad=False``.c              3   J   K   | ]}t          |t          j                  V  d S r   )
isinstancer4   Tensor)rQ   args     r   rS   z)make_graphed_callables.<locals>.<genexpr>#  s.      HHS:c5<00HHHHHHr   zfIn the beta API, sample_args for each callable must contain only Tensors. Other types are not allowed.c                 ,    g | ]}t          |          S r
   )len)rQ   argss     r   
<listcomp>z*make_graphed_callables.<locals>.<listcomp>*  s    !L!L!L#d))!L!L!Lr   c                     g | ]D}t          |t          j        j                  r!t	          |                                          nd ES )r
   )rU   r4   nnModuletuple
parameters)rQ   cs     r   r[   z*make_graphed_callables.<locals>.<listcomp>+  sP     " " " ",Aux!?!?GallnnR" " "r   c                 2    g | ]}|         |         z   S r
   r
   )rQ   iflatten_sample_argsper_callable_module_paramss     r   r[   z*make_graphed_callables.<locals>.<listcomp>/  s9     * * * 	A!;A!>>* * *r   c                 J    g | ] }t           j                                        !S r
   r4   r5   r   rQ   _s     r   r[   z*make_graphed_callables.<locals>.<listcomp>4  &    HHHQ%*&&((HHHr   c                 J    g | ] }t           j                                        !S r
   rg   rh   s     r   r[   z*make_graphed_callables.<locals>.<listcomp>5  rj   r   N)NNNc              3   (   K   | ]}|j         	|V  d S r   rN   rQ   os     r   rS   z)make_graphed_callables.<locals>.<genexpr>D  s)      $K$K11?$KQ$K$K$K$K$K$Kr   c              3   (   K   | ]}|j         	|V  d S r   rN   rQ   rc   s     r   rS   z)make_graphed_callables.<locals>.<genexpr>H  s=       % %"#q%% % % % % %r   c              3   L   K   | ]}|j         	t          j        |          V   d S r   rO   r4   
empty_likerm   s     r   rS   z)make_graphed_callables.<locals>.<genexpr>K  sH       + +45AO+!,Q//+ + + + + +r   )outputsinputsgrad_outputsonly_inputsallow_unused)r   c              3   P   K   | ]!}|j         rt          j        |          nd V  "d S r   rr   rm   s     r   rS   z)make_graphed_callables.<locals>.<genexpr>p  sJ       $
 $
AB1?<EQ$
 $
 $
 $
 $
 $
r   c              3   (   K   | ]}|j         	|V  d S r   rN   rm   s     r   rS   z)make_graphed_callables.<locals>.<genexpr>t  s)      JJ1!/JQJJJJJJr   c              3   (   K   | ]}|j         	|V  d S r   rN   rp   s     r   rS   z)make_graphed_callables.<locals>.<genexpr>z  s)       T TqAO T T T T T T Tr   c              3      K   | ]}||V  	d S r   r
   rm   s     r   rS   z)make_graphed_callables.<locals>.<genexpr>{  s"      &W&WQq&W&Wr      c	           	      r    
  G  fddt           j        j                  

fd}	|	S )Nc                   |    e Zd Zefd            Zeej        j        j         fd                        Z	dS )Omake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.Graphedc                 ^   t                    D ]Y}|                                         ||                                         k    r!|                             ||                    Z                                 t	          t
                    sJ t          d D                       S )Nc              3   >   K   | ]}|                                 V  d S r   detachrm   s     r   rS   zjmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.Graphed.forward.<locals>.<genexpr>  s*      @@AQXXZZ@@@@@@r   )rangedata_ptrcopy_r"   rU   r_   )ctxru   rc   	fwd_graphlen_user_argsstatic_input_surfacestatic_outputss      r   forwardzWmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.Graphed.forward  s     }-- A AA+A.7799VAY=O=O=Q=QQQ,Q/55fQi@@@  """!.%88888@@@@@@@@r   c                 ~   t          |          t                    k    sJ t          |          D ]F\  }}|?|                                |                                k    r|                    |           G                                 t          t                    sJ t          d D                       S )Nc              3   F   K   | ]}||                                 n|V  d S r   r   rP   s     r   rS   zkmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.Graphed.backward.<locals>.<genexpr>  sC        ;<!-AHHJJJQ     r   )rY   zipr   r   r"   rU   r_   )r   gradsggrad	bwd_graphstatic_grad_inputsstatic_grad_outputss       r   backwardzXmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.Graphed.backward  s     5zzS)<%=%====="#6>> * *GAt} ::<<4==??::GGDMMM  """ ""4e<<<<<  @R     r   N)
r+   r,   r-   staticmethodr   r4   autogradfunctiononce_differentiabler   )r   r   r   r   r   r   r   s   r   Graphedr     s        A A A A A A A \A ^$8      98 \  r   r   c                      t          j        j        j        |  } j        t          |          z    }t           j        j                            |          S r   )r4   utils_pytreearg_tree_leavesapplyr_   tree_unflatten)	user_argsflatten_user_argsoutr   module_paramsoutput_unflatten_specs      r   functionalizedzVmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.functionalized  sP     !& 3 CY O'-%(9":":]"JLC;&55c;PQQQr   )r4   r   Function)r   r   r   r   r   r   r   r   r   r   r   s   ````````` @r   make_graphed_autograd_functionz>make_graphed_callables.<locals>.make_graphed_autograd_function  s    	 	 	 	 	 	 	 	 	 	 	 	 	en- 	 	 	:	R 	R 	R 	R 	R 	R 	R r   c                       fd}|S )Nc                  .    j         k    r |  S  |  S r   )training)r   funcgraph_training_stategraphedorig_fwds    r   new_fwdzEmake_graphed_callables.<locals>.make_graphed_forward.<locals>.new_fwd  s-     }(<<<&w	22'x33r   r
   )r   r   r   r   r   s   ```` r   make_graphed_forwardz4make_graphed_callables.<locals>.make_graphed_forward  s5    4 4 4 4 4 4 4 4 r   )$r4   is_autocast_enabledis_autocast_cache_enabledRuntimeErrorrU   r_   r   r]   r^   rY   _backward_hooks_forward_hooks_forward_pre_hooksallbuffersr   r   r   appendr   r   r5   r=   r8   r6   tree_leavesr   r   r1   tree_flattenreversedrO   reverse	enumerater   r   )*	callablessample_argsnum_warmup_itersallow_unused_inputr   just_one_callablera   rZ   flatten_argper_callable_len_user_args"per_callable_static_input_surfaces
fwd_graphs
bwd_graphsmempoolr   r   grad_inputsrt   outputs_gradri   vper_callable_static_outputs"per_callable_output_unflatten_specr   flatten_outputsspec per_callable_static_grad_outputsper_callable_static_grad_inputsr   r   r   r   r   grad_idxrW   r   retrc   r   r   rd   re   s*                                           @@r   make_graphed_callablesr      s   J  "" 
u'F'H'H 
m
 
 	
 i'' % L	"ny+.. 
 
4a)) 	A%&&!++())Q..,--222] 322
 EEEEEEE  -  
 k)94@""5#5#5666HHKHHHHH 	
 	
Z	
 	
 	
 	
 "M!L8K!L!L!L" "" " "* * * * *s9~~&&* * *&
 IH%I2G2GHHHJHH%I2G2GHHHJ%)\!!!tG
 
J			5:,,..	/	/  03{$F1
 1
 	 	,D$, 2B.K,+,,  +-99$$+FF$$K$K$K$K$KKK|$$q(("'."5"5 ,$ % %';% % %     &+ + +9@+ + + & & %)%7 #6 
# 
#K |[9  A'	              . 
J #%)+&!$YZ!H!H 8 8dIZig66 	" 	"dDkG	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" !& 3 @ @ I I#**5+A+ABBB*11$7777 (*$&(#JM344,--+,,	K K &C &CFni $ $
 $
FT$
 $
 $
 
 
 JJJJJJJ|q  !!)'!::  #n11(  T T,@ T T TTT!&&W&W2E&W&W&W!W!W $!3 2                  ' 	0 	0C  0[%<"))+h*?@@@A"))$////"#566(//0CDDD'../ABBBB %,,...#++---0 0 0f CY''    400qMqM&q)&q).q1.q1'*,Q/+A.

 

 dEHO,, 	 	 	 	 0/dmWdl[[DLJJtJJw 1v::s8   ,CMMM=OO	O	3AUU	U	)rK   FN)r>   rG   r4   _utilsr   hasattr_C__dict__torch._Cr   r   r   r   r   r   r1   r   r
   r   r   <module>r      s   				               wux*++ &1k,&?&?EHl#.9k:N.O.OEH*+:E+(; ;EH67         , , ,     D. D. D. D. D.# D. D. D.NFA FA FA FA FA FA FA FAV PTl l l l l lr   