
    gy                     h   d Z ddlZddlZddlmZmZ ddlmZmZ ddl	m
Z
mZ ddlZddlmZmZ dd	lmZ dd
lmZmZ ddlmZmZmZmZmZ  e            r	ddlmc mZ  e            rddlm Z  d Z!d Z"d Z#d Z$d Z%e!dddZ&dHdZ'd Z(d Z)d Z*d Z+d Z,d Z-d Z.d Z/ G d de0          Z1d  Z2d! Z3e2d"             Z4d#e
fd$Z5d#e
fd%Z6dId&Z7dJd(Z8ej9        dej:        dej;        d)ej<        d*ej=        d+ej>        d,ej?        d-ej@        d.ejA        d/ejB        d0i
ZCd1 eCD                                D             ZEd2 ZFdKd3ejG        fd4ZHe2dId5eIfd6            ZJdId5eIfd7ZKdLd8ZLdId9ZM G d: d;eN          ZOe3dMd<            ZPdId=ZQe2dNd@            ZRdA ZS G dB dC          ZTdD ZUdE ZVedOdG            ZWdS )PzB
A set of basic tensor ops compatible with tpu, gpu, and multigpu
    N)contextmanagernullcontext)update_wrapperwraps)AnyMapping   )AcceleratorStatePartialState   )!TORCH_DISTRIBUTED_OPERATION_TYPES)DistributedTypeTensorInformation)is_npu_availableis_torch_distributed_availableis_torch_versionis_torch_xla_availableis_xpu_available)ReduceOpc                 6    t          | t          j                  S N)
isinstancetorchTensortensors    W/var/www/html/ai-engine/env/lib/python3.11/site-packages/accelerate/utils/operations.pyis_torch_tensorr   -   s    fel+++    c           
          t          | t          j        j        t          j        j        t          j        j        t          j        j        t          j        j        t          j        j        t          j        j	                  S r   )
r   r   xpuFloatTensor
ByteTensor	IntTensor
LongTensor
HalfTensorDoubleTensorBFloat16Tensorr   s    r   is_torch_xpu_tensorr)   1   sO    							 	 	 	r   c                 ,    t          | t                    S r   )r   r   tensor_infos    r   is_tensor_informationr-   >   s    k#4555r   c                 l    t          | t                    ot          | d          ot          | d          S )z
    Checks if `data` is a `namedtuple` or not. Can have false positives, but only if a user is trying to mimic a
    `namedtuple` perfectly.
    _asdict_fields)r   tuplehasattrdatas    r   is_namedtupler5   B   s3    
 dE""\wtY'?'?\GDR[D\D\\r   c                     t          |           r t          |           t          |           S  t          |           |          S )zO
    Cast a generator to the same type as obj (list, tuple, or namedtuple)
    )r5   typelist)obj	generators     r   
honor_typer;   J   sC    
 S $tCyy$y//**tCyy###r   F	test_typeerror_on_other_typec          	          t          |t          t          f          r t          | fd|D                       S t          |t                    r: t          |           fd|                                D                       S  |          r  |gR i S r0t          dt          |           d j         dj         d          |S )ad  
    Recursively apply a function on a data structure that is a nested list/tuple/dictionary of a given base type.

    Args:
        func (`callable`):
            The function to recursively apply.
        data (nested list/tuple/dictionary of `main_type`):
            The data on which to apply `func`
        *args:
            Positional arguments that will be passed to `func` when applied on the unpacked data.
        main_type (`type`, *optional*, defaults to `torch.Tensor`):
            The base type of the objects to which apply `func`.
        error_on_other_type (`bool`, *optional*, defaults to `False`):
            Whether to return an error or not if after unpacking `data`, we get on an object that is not of type
            `main_type`. If `False`, the function will leave objects of types different than `main_type` unchanged.
        **kwargs (additional keyword arguments, *optional*):
            Keyword arguments that will be passed to `func` when applied on the unpacked data.

    Returns:
        The same data structure as `data` with `func` applied to every object of type `main_type`.
    c              3   @   K   | ]}t          |gR d V  dS )r<   Nrecursively_apply).0oargsr>   funckwargsr=   s     r   	<genexpr>z$recursively_apply.<locals>.<genexpr>n   sm          "!" .7M` dj      r   c           	      @    i | ]\  }}|t          |gR d S )r<   rA   )rC   kvrE   r>   rF   rG   r=   s      r   
<dictcomp>z%recursively_apply.<locals>.<dictcomp>w   sf        Aq $!" .7M` dj   r   zUnsupported types (z) passed to `z?`. Only nested list/tuple/dicts of objects that are valid for `z` should be passed.)	r   r1   r8   r;   r   r7   items	TypeError__name__)rF   r4   r=   r>   rE   rG   s   ` ````r   rB   rB   U   sw   , $&& 
        	  
 
 	
 
D'	"	" 
tDzz        !JJLL	  
 
 	
 
4 
tD*4***6***	 
S$t** S S4= S S+4+=S S S
 
 	
 Kr   c                 r   t          |           st          | d          rdk    rddk    rd	 |                               S # t          $ r |                               cY S t          $ r^}t                      rt          t                    rd n+t                      rt          t                    rd n|Y d	}~nd	}~ww xY w	 |                               S # t          $ r |                               cY S w xY wt          | t          t          f          rt          | fd
| D                       S t          | t                    rUt          t                    rgng  t          |           fd|                                 D                       S | S )a  
    Recursively sends the elements in a nested list/tuple/dictionary of tensors to a given device.

    Args:
        tensor (nested list/tuple/dictionary of `torch.Tensor`):
            The data to send to a given device.
        device (`torch.device`):
            The device to send the data to.

    Returns:
        The same data structure as `tensor` with all tensors sent to the proper device.
    tonpuznpu:0r!   zxpu:0)non_blockingznpu:zxpu:Nc              3   >   K   | ]}t          |           V  dS )rS   	skip_keysNsend_to_device)rC   tdevicerS   rV   s     r   rH   z!send_to_device.<locals>.<genexpr>   s6      oocd^AvLT]^^^oooooor   c           	      J    i | ]\  }}||v r|nt          |            S )rU   rW   )rC   rJ   rY   rZ   rS   rV   s      r   rL   z"send_to_device.<locals>.<dictcomp>   sN       Aq Y11N1fS_kt,u,u,u  r   )r   r2   rQ   rN   AssertionErrorr   r   intr   r1   r8   r;   r   strr7   rM   )r   rZ   rS   rV   errors    ``` r   rX   rX      s6    v )'&$"7"7 )U??FU??F	99V,9??? 	% 	% 	%99V$$$$$ 
	 
	 
	  !! fc** -,F__F!## fc** -,F__F
		%99V,9??? 	% 	% 	%99V$$$$$	%	FUDM	*	* oooooohnooo
 
 	
 
FG	$	$ i%% 	"IIItF||     "LLNN  
 
 	
 s0   A C,	C5ACCC. .DDc                 (    d }t          ||           S )aK  
    Recursively gathers the information needed to rebuild a nested list/tuple/dictionary of tensors.

    Args:
        data (nested list/tuple/dictionary of `torch.Tensor`):
            The data to send to analyze.

    Returns:
        The same data structure as `data` with [`~utils.TensorInformation`] instead of tensors.
    c                 8    t          | j        | j                  S )N)shapedtype)r   rb   rc   r   s    r   _get_data_structurez/get_data_structure.<locals>._get_data_structure   s     v|6<HHHHr   rA   )r4   rd   s     r   get_data_structurere      s'    I I I 0$777r   c                 (    d }t          ||           S )a:  
    Recursively gathers the shape of a nested list/tuple/dictionary of tensors as a list.

    Args:
        data (nested list/tuple/dictionary of `torch.Tensor`):
            The data to send to analyze.

    Returns:
        The same data structure as `data` with lists of tensor shapes instead of tensors.
    c                 *    t          | j                  S r   )r8   rb   r   s    r   
_get_shapezget_shape.<locals>._get_shape   s    FL!!!r   rA   )r4   rh   s     r   	get_shaperi      s#    " " " Z...r   c                 6    d }t          || t                    S )z
    Recursively initializes tensors from a nested list/tuple/dictionary of [`~utils.TensorInformation`].

    Returns:
        The same data structure as `data` with tensors instead of [`~utils.TensorInformation`].
    c                 8    t          j        | j        d| j        iS Nrc   )r   emptyrb   rc   r+   s    r   _initialize_tensorz.initialize_tensors.<locals>._initialize_tensor   s    {K-G[5FGGGr   r=   )rB   r-   )data_structurern   s     r   initialize_tensorsrq      s-    H H H /K`aaaar   c                 "   t          | t          t          t          f          r3t	          |           dk    r t          dt          |            d          t          | t          t          f          rt          | d                   S t          | t                    r.|                                 D ]}t          | |                   c S n:t          | t          j
                  s t          dt          |            d          | j        d         S )a  
    Recursively finds the batch size in a nested list/tuple/dictionary of lists of tensors.

    Args:
        data (nested list/tuple/dictionary of `torch.Tensor`): The data from which to find the batch size.

    Returns:
        `int`: The batch size.
    r   z&Cannot find the batch size from empty .z0Can only find the batch size of tensors but got )r   r1   r8   r   len
ValueErrorr7   find_batch_sizekeysr   r   rN   rb   )r4   rJ   s     r   rv   rv      s     $g.// QSYY!^^O$t**OOOPPP$&& ZtAw'''	D'	"	" Z 	, 	,A"47+++++	,el++ ZX4PT::XXXYYY:a=r   c                 T    	 t          |           S # t          t          f$ r Y nw xY wdS )a  
    Same as [`utils.operations.find_batch_size`] except will ignore if `ValueError` and `TypeErrors` are raised

    Args:
        data (nested list/tuple/dictionary of `torch.Tensor`): The data from which to find the batch size.

    Returns:
        `int`: The batch size.
    N)rv   ru   rN   r3   s    r   ignorant_find_batch_sizery   
  s?    t$$$	"   4s    %%c                 (    d }t          ||           S )aS  
    Recursively finds tensors in a nested list/tuple/dictionary and converts them to a list of numbers.

    Args:
        data (nested list/tuple/dictionary of `torch.Tensor`): The data from which to convert to regular numbers.

    Returns:
        The same data structure as `data` with lists of numbers instead of `torch.Tensor`.
    c                     |                                                                  } | j        t          j        k    r|                     t          j                  } |                                 S r   )detachcpurc   r   bfloat16rQ   float32tolistr   s    r   _convert_to_listz!listify.<locals>._convert_to_list&  sM    $$&&<5>)) YYu}--F}}r   rA   )r4   r   s     r   listifyr     s$       -t444r   c                 V    d }t          || d          }t          j                     |S )Nc                     | j         dk    r|                                 d          } |                                 s|                                 } t	          j        |           S )Nr   )ndimcloneis_contiguous
contiguousxm
all_gatherr   s    r   _tpu_gather_onez$_tpu_gather.<locals>._tpu_gather_one3  sX    ;!\\^^D)F ##%% 	)&&((F}V$$$r   Tr>   )rB   r   	mark_step)r   r   ress      r   _tpu_gatherr   2  s7    % % % OV
N
N
NCLNNNJr   c                     t                      t          dd          rt          j        j        nt          j        j        fd}t          || d          S )Nz>=z1.13c                 N     j         dk    r                                 d                                            s                                  j        zj        dk    rot          j        j                                         z   j	        j
                  } |             |j        dg                                 dd          R  S  fdt          j                  D             }t
          j                            |            t          j        |d          S )Nr   gloorc   rZ   r   c                 8    g | ]}t          j                  S  )r   
empty_like)rC   _r   s     r   
<listcomp>z8_gpu_gather.<locals>._gpu_gather_one.<locals>.<listcomp>`  s$    [[[1e.v66[[[r   dim)r   r   r   r   backendr   rm   num_processesnumelrc   rZ   viewsizerangedistributedr   cat)r   output_tensors	gather_opstates   ` r   _gpu_gather_onez$_gpu_gather.<locals>._gpu_gather_oneH  s(   ;!\\^^D)F ##%% 	)&&((F=$&)@)@
 #[#fllnn4l|  N
 Inf---&>&r>FKKMM!"",=>>>>
 \[[[eFY@Z@Z[[[N((@@@9^3333r   Tr   )r   r   r   r   all_gather_into_tensor_all_gather_baserB   )r   r   r   r   s     @@r   _gpu_gatherr   A  sl    NNEf%% 7%<		%6	4 4 4 4 4 48 _f$OOOOr   c                       e Zd ZdZdS )DistributedOperationExceptionz
    An exception class for distributed operations. Raised if the operation cannot be performed due to the shape of the
    tensors.
    N)rO   
__module____qualname____doc__r   r   r   r   r   g  s         
 	Dr   r   c                 <     t                      fd            }|S )zv
    Verifies that `tensor` is the same shape across all processes. Only ran if `PartialState().debug` is `True`.
    c                  
   t                      j        t          j        k    st                      j        s | i |S j         dj         }d|v r	|d         }n| d         }t                      j        j        t          |          j        k    rUt          d| d|j        j         dt                      j        j         dt                      j        j         d| d          t          |          }t          |g          }|d         o|                    |d                   t          |          k    }|sAd	                    d
 t!          |          D                       }t          d| d|            | i |S )Nrs   r   r   z%One or more of the tensors passed to z were not on the z+ while the `Accelerator` is configured for z. Please move it to the z before calling z
  - c                 $    g | ]\  }}d | d| S )zProcess z: r   )rC   irb   s      r   r   z5verify_operation.<locals>.wrapper.<locals>.<listcomp>  s.    2m2m2mxqRW3Ja3J3J53J3J2m2m2mr   znCannot apply desired operation due to shape mismatches. All shapes across devices must be valid.

Operation: `z`
Input shapes:
  - )r   distributed_typer   NOdebugr   rO   rZ   r7   find_devicer   ri   gather_objectcountrt   join	enumerate)	rE   rG   	operationr   shapesoutputare_sameprocess_shape_strfunctions	           r   wrapperz!verify_operation.<locals>.wrapperu  s   >>*o.@@@H\@8T,V,,,*@@X->@@	vH%FF!WF>> %V)<)<)AAA/b	 b bTZTaTf b b  T`  Tb  Tb  Ti  Tn b b)5)>)Cb bU^b b b   6""x((!9 ||F1I..#f++=H $,MM2m2m[dek[l[l2m2m2m$n$n!3\'0\ \HY\ \  
 x((((r   r   r   r   s   ` r   verify_operationr   p  s5    
 8__) ) ) ) _)4 Nr   c                 <     t                      fd            }|S )z
    Checks that `verify_operation` failed and if so reports a more helpful error chaining the existing
    `DistributedOperationException`.
    c                      	  | i |S # t           $ r*}j         dj         }t          d| d          |d }~ww xY w)Nrs   zError found while calling `z1`. Please see the earlier error for more details.)r   r   rO   )rE   rG   er   r   s       r   r   z"chained_operation.<locals>.wrapper  sx    	8T,V,,,, 	 	 	#.DD1BDDI/jijjj 	s    
?%:?r   r   s   ` r   chained_operationr     s5     8__    _ Nr   c                     t                      j        t          j        k    rt	          |           S t                      j        t
          v rt          |           S | S )a4  
    Recursively gather tensor in a nested list/tuple/dictionary of tensors from all devices.

    Args:
        tensor (nested list/tuple/dictionary of `torch.Tensor`):
            The data to gather.

    Returns:
        The same data structure as `tensor` with all tensors sent to the proper device.
    )r   r   r   XLAr   r   r   r   s    r   gatherr     sM     ~~&/*===6"""		(,M	M	M6"""r   objectc                     d t          t                      j                  D             }t          j                            ||            d |D             S )Nc                     g | ]}d S r   r   )rC   r   s     r   r   z&_gpu_gather_object.<locals>.<listcomp>  s    HHHqdHHHr   c                     g | ]	}|D ]}|
S r   r   )rC   yxs      r   r   z&_gpu_gather_object.<locals>.<listcomp>  s%    111!q11!A1111r   )r   r   r   r   r   all_gather_object)r   output_objectss     r   _gpu_gather_objectr     sQ    HHE,..*F$G$GHHHN	''???11~1111r   c                     t                      j        t          j        k    rt	          d          t                      j        t
          v rt          |           S | S )a5  
    Recursively gather object in a nested list/tuple/dictionary of objects from all devices.

    Args:
        object (nested list/tuple/dictionary of picklable object):
            The data to gather.

    Returns:
        The same data structure as `object` with all the objects sent to every device.
    z&gather objects in TPU is not supported)r   r   r   r   NotImplementedErrorr   r   )r   s    r   r   r     sN     ~~&/*===!"JKKK		(,M	M	M!&)))r   c                 0    dd}t          || d|          S )Nr   c                 H    t           j                            | |           | S )Nsrc)r   r   	broadcast)r   r   s     r   _gpu_broadcast_onez*_gpu_broadcast.<locals>._gpu_broadcast_one  s#    ##F#444r   T)r>   r   r   rA   )r4   r   r   s      r   _gpu_broadcastr     s1        /4UXYYYYr   broadcast tensorc                 X   t          | t          t          f          r)t          | fdt	          |           D                       S t          | t
                    r6 t          |           fd|                                 D                       S t          j	        | fd          S )Nc              3   J   K   | ]\  }}t          | d |           V  dS )r   nameN_tpu_broadcast)rC   r   rY   r   s      r   rH   z!_tpu_broadcast.<locals>.<genexpr>  s?      "g"gTQPQ>!T--A--#H#H#H"g"g"g"g"g"gr   c           	      D    i | ]\  }}|t          | d |           S )r   r   r   )rC   rJ   rK   r   s      r   rL   z"_tpu_broadcast.<locals>.<dictcomp>  s6    aaa$!QQq$}}}} E E Eaaar   c                     |          S r   r   )r   r   s    r   <lambda>z _tpu_broadcast.<locals>.<lambda>  s    !C& r   )
r   r8   r1   r;   r   r   r7   rM   r   mesh_reduce)r   r   r   s    ``r   r   r     s    &4-(( c&"g"g"g"gU^_eUfUf"g"g"ghhh	FG	$	$ ctF||aaaaRXR^R^R`R`aaabbb>$(8(8(8(8999r                     	   
   c                     i | ]\  }}||	S r   r   )rC   rJ   rK   s      r   rL   rL     s    CCC1q!CCCr   c                    d}t                      }t          j        |t          j        |j                  }| Z| j        }t          | j                 }t          j        t          |          |gz   t                    |dt          |          dz   <   t          |d          }||                                         }t          |dd         d	                   }|dd         }||fS )
ze
    Grabs the shape of `tensor` only available on one process and returns a tensor of its shape
    i   r   Nrc   r   sum	reductionr   r   )r   r   rm   r]   rZ   rb   TENSOR_TYPE_TO_INTrc   r   r8   rt   reducenonzero)r   max_tensor_dimensionr   base_tensorrb   tensor_dtyperc   s          r   gather_tensor_shaper     s    
 !NNE+2%)ELYYYK
 )&,7(-T%[[L>5QY\(](](]$c%jj1n$%666Kk11334KBCC #$$Ecrc"Kr   returnc                     t                      }t          |           \  }}| 9t          j        |t          |                                       |j                  } t          | d          S )a  
    Copys a tensor that only exists on a single device and broadcasts it to other devices. Differs from `broadcast` as
    each worker doesn't need to know its shape when used (and tensor can be `None`)

    Args:
        tensor (`torch.tensor`):
            The tensor that should be sent to all devices. Must only have it be defined on a single device, the rest
            should be `None`.
    Nr   r   r   )r   r   r   zerosTENSOR_INT_TO_DTYPErQ   rZ   r   )r   r   rb   rc   s       r   copy_tensor_to_devicesr    s`     NNE&v..LE5~U*=e*DEEEHHVV&E****r   from_processc                     t                      j        t          j        k    rt	          | |d          S t                      j        t
          v rt          | |          S | S )a  
    Recursively broadcast tensor in a nested list/tuple/dictionary of tensors to all devices.

    Args:
        tensor (nested list/tuple/dictionary of `torch.Tensor`):
            The data to gather.
        from_process (`int`, *optional*, defaults to 0):
            The process from which to send the data

    Returns:
        The same data structure as `tensor` with all tensors broadcasted to the proper device.
    zaccelerate.utils.broadcast)r   r   r   )r   r   r   r   r   r   r   )r   r  s     r   r   r     sZ     ~~&/*===f,=YZZZZ		(,M	M	Mf,7777r   c                 $   t                      j        t          j        k    r2t	          |           D ]!\  }}t          j        d|fd          | |<   "n;t                      j        t          v r!t          j	        
                    |            | S )a  
    Broadcast a list of picklable objects form one process to the others.

    Args:
        object_list (list of picklable objects):
            The list of objects to broadcast. This list will be modified inplace.
        from_process (`int`, *optional*, defaults to 0):
            The process from which to send the data.

    Returns:
        The same list containing the objects from process 0.
    z&accelerate.utils.broadcast_object_listc                     |          S r   r   )r   r  s    r   r   z'broadcast_object_list.<locals>.<lambda>C  s    efgset r   r   )r   r   r   r   r   r   r   r   r   r   broadcast_object_list)object_listr  r   r9   s    `  r   r	  r	  4  s     ~~&/*===,, 	v 	vFAs^,TVY[t[t[t[tuuKNN	v		(,M	M	M///NNNr   c                 *    d }t          || |          S )aN  
    Recursively takes a slice in a nested list/tuple/dictionary of tensors.

    Args:
        data (nested list/tuple/dictionary of `torch.Tensor`):
            The data to slice.
        tensor_slice (`slice`):
            The slice to take.

    Returns:
        The same data structure as `data` with all the tensors slices.
    c                     | |         S r   r   )r   tensor_slices     r   _slice_tensorz$slice_tensors.<locals>._slice_tensorW  s    l##r   rA   )r4   r  process_indexr   r  s        r   slice_tensorsr  I  s%    $ $ $ ]D,???r   c           
      B    t           d         t          t          f          rCt           d          fdt	          t           d                             D                       S t           d         t                    rC t           d                    fd d                                         D                       S t           d         t          j
                  s%t          dt           d                              t          j                   S )a  
    Recursively concatenate the tensors in a nested list/tuple/dictionary of lists of tensors with the same shape.

    Args:
        data (nested list/tuple/dictionary of lists of tensors `torch.Tensor`):
            The data to concatenate.
        dim (`int`, *optional*, defaults to 0):
            The dimension on which to concatenate.

    Returns:
        The same data structure as `data` with all the tensors concatenated.
    r   c              3   T   K   | ]!t          fd D                       V  "dS )c                      g | ]
}|         S r   r   )rC   dr   s     r   r   z)concatenate.<locals>.<genexpr>.<listcomp>k  s    0D0D0D!10D0D0Dr   r   Nconcatenate)rC   r   r4   r   s    @r   rH   zconcatenate.<locals>.<genexpr>k  sG      #l#lSTK0D0D0D0Dt0D0D0D#$N$N$N#l#l#l#l#l#lr   c                 N    i | ] t          fd D                       !S )c                      g | ]
}|         S r   r   )rC   r  rJ   s     r   r   z*concatenate.<locals>.<dictcomp>.<listcomp>m  s    -A-A-Aqad-A-A-Ar   r   r  )rC   rJ   r4   r   s    @r   rL   zconcatenate.<locals>.<dictcomp>m  s?    dddPQa-A-A-A-AD-A-A-As!K!K!Kdddr   z%Can only concatenate tensors but got r   )r   r1   r8   r;   r   rt   r   r7   rw   r   r   rN   r   )r4   r   s   ``r   r  r  ]  s    $q'E4=)) Q$q'#l#l#l#l#lX]^abfghbi^j^jXkXk#l#l#lmmm	DGW	%	% QtDG}}dddddUYZ[U\UaUaUcUcdddeeeQ.. QOT!WOOPPP9Ts####r   c                       e Zd ZdS )CannotPadNestedTensorWarningN)rO   r   r   r   r   r   r  r  s  s        Dr   r  c                 4    dd}t          || d|||          S )a3  
    Recursively pad the tensors in a nested list/tuple/dictionary of tensors from all devices to the same size so they
    can safely be gathered.

    Args:
        tensor (nested list/tuple/dictionary of `torch.Tensor`):
            The data to gather.
        dim (`int`, *optional*, defaults to 0):
            The dimension on which to pad.
        pad_index (`int`, *optional*, defaults to 0):
            The value with which to pad.
        pad_first (`bool`, *optional*, defaults to `False`):
            Whether to pad at the beginning or the end.
    r   Fc                   	
 t          | dd          rt          j        dt                     | S t	          | j                  k    r| S t          j        | j        | j                  d          }t          |          
                                }t          fd|D                       		| j                 k    r| S | j        
t          
          }	|<   |                     t          |                    |z   }|r8t          	
fdt          t	          |                    D                       }n6t          
fdt          t	          |                    D                       }| ||<   |S )N	is_nestedFzHCannot pad nested tensors without more information. Leaving unprocessed.)rZ   c              3   (   K   | ]}|         V  d S r   r   )rC   sr   s     r   rH   zFpad_across_processes.<locals>._pad_across_processes.<locals>.<genexpr>  s'      --!qv------r   c              3   t   K   | ]2}|k    rt                   z
            nt          d           V  3d S r   slice)rC   r   r   max_sizeold_sizes     r   rH   zFpad_across_processes.<locals>._pad_across_processes.<locals>.<genexpr>  s[        [\Q#XXh#.9995QU;;     r   c              3   n   K   | ]/}|k    rt          d                    nt          d          V  0dS r   Nr!  rC   r   r   r$  s     r   rH   zFpad_across_processes.<locals>._pad_across_processes.<locals>.<genexpr>  sE      ooUVqCxxE!Xc]333U4[[oooooor   )getattrwarningswarnr  rt   rb   r   r   rZ   r   r}   maxr8   	new_zerosr1   r   )r   r   	pad_index	pad_firstr   sizesnew_size
new_tensorindicesr#  r$  s    `       @@r   _pad_across_processesz3pad_across_processes.<locals>._pad_across_processes  s   6;.. 	MZ,   M#fl####M |FL???Et  ""----u-----v|C(((M<>> %%eHoo66B
 	p      `efijrfsfs`t`t    GG oooooZ_`cdl`m`mZnZnoooooG$
7r   T)r>   r   r-  r.  r   r   FrA   )r   r   r-  r.  r3  s        r   pad_across_processesr5  w  s>    "   > v4ST]ir   r   c                 4    dd}t          || d|||          S )z
    Takes a `tensor` of arbitrary size and pads it so that it can work given `num_processes` needed dimensions.

    New tensors are just the last input repeated.

    E.g.:
      Tensor: ([3,4,4]) Num processes: 4 Expected result shape: ([4,4,4])

    r   c                 x  
 ||z  }|||z  z
  }||z  dk    r||z
  }n|||z  z
  }|||z  cxk    rdk     rn n||z
  }| j         
t          
          }||z   |d<   |                     t          |                    }t          
fdt	          t          |                    D                       }	| ||	<   |S )Nr   r   c              3   n   K   | ]/}|k    rt          d                    nt          d          V  0dS r&  r!  r'  s     r   rH   z@pad_input_tensors.<locals>._pad_input_tensors.<locals>.<genexpr>  sE      kkQR188a#///tkkkkkkr   )rb   r8   r,  r1   r   rt   )r   
batch_sizer   r   	remainderlast_inputsto_padr0  r1  r2  r$  s      `      @r   _pad_input_tensorsz-pad_input_tensors.<locals>._pad_input_tensors  s    -/	 I$=>&!++"Z/FF"jM&ABF &,,,,1,,,,, 6)F<>> 6)%%eHoo66
kkkkkV[\_`h\i\iVjVjkkkkk$
7r   T)r>   r9  r   r   r   rA   )r   r9  r   r   r=  s        r   pad_input_tensorsr>    sA       &  #   r   mean      ?c                 2    dd}t          || d||          S )aX  
    Recursively reduce the tensors in a nested list/tuple/dictionary of lists of tensors across all processes by the
    mean of a given operation.

    Args:
        tensor (nested list/tuple/dictionary of `torch.Tensor`):
            The data to reduce.
        reduction (`str`, *optional*, defaults to `"mean"`):
            A reduction method. Can be of "mean", "sum", or "none"
        scale (`float`, *optional*):
            A default scaling value to be applied after the reduce, only valied on XLA.

    Returns:
        The same data structure as `data` with all the tensors reduced.
    r?  r@  c                    t                      }|                                 }|j        t          j        k    r|S |j        t          j        k    rHt          j                     t          j        t          j	        |g|           t          j                     n=|j        j
        t          v r*t          j                            |t          j                   |dk    r
||j        z  }|S )Nr?  )r   r   r   r   r   r   r   r   
all_reduce
REDUCE_SUMvaluer   r   r   r   SUMr   )r   r   scaler   cloned_tensors        r   _reduce_across_processesz(reduce.<locals>._reduce_across_processes  s    !_%777  !_%888
 LNNNM"--%@@@LNNNN#)-NNN((EEEU00Mr   T)r>   r   rG  r?  r@  rA   )r   r   rG  rI  s       r   r   r     s;    $   &  &di_d   r   c                 2    d }d }t          || |          S )av  
    Recursively converts the elements nested list/tuple/dictionary of tensors in FP16/BF16 precision to FP32.

    Args:
        tensor (nested list/tuple/dictionary of `torch.Tensor`):
            The data to convert from FP16/BF16 to FP32.

    Returns:
        The same data structure as `tensor` with all tensors that were in FP16/BF16 precision converted to FP32.
    c                 *    |                                  S r   )floatr   s    r   _convert_to_fp32z)convert_to_fp32.<locals>._convert_to_fp32
  s    ||~~r   c                 ~    t          |           st          | d          o| j        t          j        t          j        fv S rl   )r   r2   rc   r   float16r~   r   s    r   _is_fp16_bf16_tensorz-convert_to_fp32.<locals>._is_fp16_bf16_tensor  s@    ''C767+C+C 
MNZ
 J
 	
r   ro   rA   )r   rN  rQ  s      r   convert_to_fp32rR    s9      
 
 
 -vAUVVVVr   c                   $    e Zd ZdZd Zd Zd ZdS )ConvertOutputsToFp32ad  
    Decorator to apply to a function outputing tensors (like a model forward pass) that ensures the outputs in FP16
    precision will be convert back to FP32.

    Args:
        model_forward (`Callable`):
            The function which outputs we want to treat.

    Returns:
        The same function as `model_forward` but with converted outputs.
    c                 4    || _         t          | |           d S r   )model_forwardr   )selfrV  s     r   __init__zConvertOutputsToFp32.__init__#  s     *t]+++++r   c                 6    t           | j        |i |          S r   )rR  rV  )rW  rE   rG   s      r   __call__zConvertOutputsToFp32.__call__'  s#    1t14B6BBCCCr   c                 *    t          j        d          )NzCannot pickle a prepared model with automatic mixed precision, please unwrap the model with `Accelerator.unwrap_model(model)` before pickling it.)picklePicklingError)rW  s    r   __getstate__z!ConvertOutputsToFp32.__getstate__*  s    " `
 
 	
r   N)rO   r   r   r   rX  rZ  r^  r   r   r   rT  rT    sN        
 
, , ,D D D
 
 
 
 
r   rT  c                 >     t                       fd} |_        |S )Nc                       | i |S r   r   )rE   rG   rV  s     r   forwardz(convert_outputs_to_fp32.<locals>.forward3  s    }d-f---r   )rT  __wrapped__)rV  ra  s   ` r   convert_outputs_to_fp32rc  0  s8    (77M. . . . . (GNr   c                 >   t          | t                    r.|                                 D ]}t          |          }||c S dS t          | t          t
          f          r| D ]}t          |          }||c S dS t          | t          j                  r| j        S dS )z
    Finds the device on which a nested dict/list/tuple of tensors lies (assuming they are all on the same device).

    Args:
        (nested list/tuple/dictionary of `torch.Tensor`): The data we want to know the device of.
    N)	r   r   valuesr   r1   r8   r   r   rZ   )r4   r9   rZ   s      r   r   r   <  s     $   ;;== 	 	C %%F! "	 	 
D5$-	(	(  	 	C %%F! "	 	 
D%,	'	' { r   Tc              #   \  K   t                      j        t          j        k    s8t                      j        4t                      j                                        st                      }n"ddl}|j        	                    | |||          }|5  dV  ddd           dS # 1 swxY w Y   dS )z
    Wrapper around `deepspeed.runtime.zero.GatheredParameters`, but if Zero-3 is not enabled, will be a no-op context
    manager.
    Nr   )modifier_rank
fwd_moduleenabled)
r
   r   r   	DEEPSPEEDdeepspeed_pluginis_zero3_init_enabledr   	deepspeedzeroGatheredParameters)paramsrg  rh  ri  gather_param_contextrm  s         r   ro  ro  Q  s      *o.GGG+7 ""3IIKK 	8  +}}(~@@-JPW  A  
  
 
                   s   B!!B%(B%)FNr   )r   r   r   )NNr4  rJ  )NNT)Xr   r\  r)  
contextlibr   r   	functoolsr   r   typingr   r   r   r   r
   r   	constantsr   dataclassesr   r   importsr   r   r   r   r   torch_xla.core.xla_modelcore	xla_modelr   torch.distributedr   r   r)   r-   r5   r;   rB   rX   re   ri   rq   rv   ry   r   r   r   	Exceptionr   r   r   r   r   r   r   r   rM  doublehalfr~   uint8int8int16int32int64boolr   rM   r  r   r   r  r]   r   r	  r  r  UserWarningr  r5  r>  r   rR  rT  rc  r   ro  r   r   r   <module>r     s<      2 2 2 2 2 2 2 2 + + + + + + + +          2 2 2 2 2 2 2 2 8 8 8 8 8 8 ; ; ; ; ; ; ; ;               *)))))))))!!## +******, , ,
 
 
6 6 6] ] ]$ $ $ 4CX] 0 0 0 0 0f6 6 6 6r8 8 8$/ / /$b b b  .  "5 5 5.  #P #P #PL	 	 	 	 	I 	 	 	     F  &   &2s 2 2 2 2#    &Z Z Z Z: : : : 
K	L!	J	NA	K	J	K	K	K	J  DC(:(@(@(B(BCCC   2+ +5< + + + +"  C    * S    *@ @ @ @($ $ $ $,	 	 	 	 	; 	 	 	 1 1 1 1h% % % %P & & & &RW W W0
 
 
 
 
 
 
 
4	 	 	  *      r   