
    g[                        d dl Z d dlmZ d dlmZmZmZmZ d dlZd dl	m
Z
mZmZmZ ddlmZ ddlmZmZmZmZ ddlmZmZmZmZmZmZmZmZmZmZm Z m!Z!  ee"          Z#dd	ddd dd	d	d dddd
d	dZ$i Z%e%&                                D ]&\  Z'Z( ede'          re$)                    e(           ' G d de          Z* G d de
          Z+ G d de          Z, G d d          Z- G d d          Z. G d de.e-          Z/ e            rd dl0m1c m2Z3  G d de3j4                  Z5 G d de.e-          Z6d Z7	 	 	 	 	 	 	 	 	 	 	 	 d3ded eej8                 d!ee9         d"ee9         d#e:d$e:d%eeee;ef                           d&ee:         d'e:d(ee         d)e:d*e:d+e:d,efd-Z< G d. d/e
          Z= G d0 d1e.e-          Z>d4d2Z?dS )5    N)suppress)CallableListOptionalUnion)BatchSampler
DataLoaderIterableDatasetRandomSampler   )
get_logger)DistributedTypeGradientStatePartialStateis_torch_xla_available)RNGType	broadcastbroadcast_object_listconcatenatefind_batch_sizeget_data_structureinitialize_tensorsis_torch_version*is_torchdata_stateful_dataloader_availablesend_to_deviceslice_tensorssynchronize_rng_statesF   )
batch_sizeshufflesamplerbatch_samplernum_workers
collate_fn
pin_memory	drop_lasttimeoutworker_init_fnmultiprocessing_context	generatorprefetch_factorpersistent_workers>=c                   8     e Zd ZdZ fdZ fdZdefdZ xZS )SeedableRandomSamplera  
    Same as a random sampler, except that in `__iter__` a seed can be used.

    Needed specifically in distributed cases, when the random generator for each GPU needs to start from the same seed
    and be fully reproducable on multiple iterations.

    If a custom `generator` is passed, it will rely on its initial seed as well as the current iteration it is on
    (stored in `self.epoch`).
    c                      t                      j        |i | d| _        t          j                                        | _        d S )Nr   )super__init__epochtorchrandominitial_seed)selfargskwargs	__class__s      R/var/www/html/ai-engine/env/lib/python3.11/site-packages/accelerate/data_loader.pyr2   zSeedableRandomSampler.__init__O   sB    $)&)))
!L5577    c              #   `  K   | j         7t          j                    | _         | j                             | j                   | j        | j        z   }| j                             |           t                                                      E d {V  |                     | j        dz              d S )Nr   )	r*   r4   	Generatormanual_seedr6   r3   r1   __iter__	set_epoch)r7   seedr:   s     r;   r@   zSeedableRandomSampler.__iter__T   s      >!"_..DNN&&t'8999 zD--""4(((77##%%%%%%%%%tzA~&&&&&r<   r3   c                     || _         dS )z*Sets the current iteration of the sampler.N)r3   r7   r3   s     r;   rA   zSeedableRandomSampler.set_epoch`   s    


r<   )	__name__
__module____qualname____doc__r2   r@   intrA   __classcell__r:   s   @r;   r/   r/   D   sv         8 8 8 8 8

' 
' 
' 
' 
's        r<   r/   c                   f    e Zd ZdZ	 	 	 	 ddededed	ed
ef
dZed             Z	d Z
d Zd Zd ZdS )BatchSamplerSharda  
    Wraps a PyTorch `BatchSampler` to generate batches for one of the processes only. Instances of this class will
    always yield a number of batches that is a round multiple of `num_processes` and that all have the same size.
    Depending on the value of the `drop_last` attribute of the batch sampler passed, it will either stop the iteration
    at the first batch that would be too small / not present on all processes or loop with indices from the beginning.

    Args:
        batch_sampler (`torch.utils.data.sampler.BatchSampler`):
            The batch sampler to split in several shards.
        num_processes (`int`, *optional*, defaults to 1):
            The number of processes running concurrently.
        process_index (`int`, *optional*, defaults to 0):
            The index of the current process.
        split_batches (`bool`, *optional*, defaults to `False`):
            Whether the shards should be created by splitting a batch to give a piece of it on each process, or by
            yielding different full batches on each process.

            On two processes with a sampler of `[[0, 1, 2, 3], [4, 5, 6, 7]]`, this will result in:

            - the sampler on process 0 to yield `[0, 1, 2, 3]` and the sampler on process 1 to yield `[4, 5, 6, 7]` if
              this argument is set to `False`.
            - the sampler on process 0 to yield `[0, 1]` then `[4, 5]` and the sampler on process 1 to yield `[2, 3]`
              then `[6, 7]` if this argument is set to `True`.
        even_batches (`bool`, *optional*, defaults to `True`):
            Whether or not to loop back at the beginning of the sampler when the number of samples is not a round
            multiple of (original batch size / number of processes).

    <Tip warning={true}>

    `BatchSampler`s with varying batch sizes are not enabled by default. To enable this behaviour, set `even_batches`
    equal to `False`

    </Tip>r   r   FTr"   num_processesprocess_indexsplit_batcheseven_batchesc                 8   |r)|j         |z  dk    rt          d|j          d| d          || _        || _        || _        || _        || _        t          |dd           | _         t          |dd          | _        | j         | j        rt          d          d S d S )	Nr   zDTo use `BatchSamplerShard` in `split_batches` mode, the batch size (;) needs to be a round multiple of the number of processes ().r   r&   FzYou need to use `even_batches=False` when the batch sampler has no batch size. If you are not calling this method directly, set `accelerator.even_batches=False` instead.)	r   
ValueErrorr"   rN   rO   rP   rQ   getattrr&   )r7   r"   rN   rO   rP   rQ   s         r;   r2   zBatchSamplerShard.__init__   s      	]5EJJ^WdWo ^ ^LY^ ^ ^   +***(!-tDD UCC?"t'8"f   #"""r<   c                 *    t          | j                  S Nlenr"   r7   s    r;   total_lengthzBatchSamplerShard.total_length       4%&&&r<   c                 j   | j         rt          | j                  S t          | j                  | j        z  dk    rt          | j                  | j        z  S t          | j                  | j        z  }| j        r|S | j        r|dz   S | j        t          | j                  | j        z  k     r|dz   n|S Nr   r   )rP   rZ   r"   rN   r&   rQ   rO   r7   lengths     r;   __len__zBatchSamplerShard.__len__   s     	+t)***t!""T%771<<t)**d.@@@T'((D,>>> 	oM 	oA: "&!3c$:L6M6MPTPb6b!b!b6A::hnnr<   c                 `    | j         r|                                 n|                                 S rX   )rP   _iter_with_split_iter_with_no_splitr[   s    r;   r@   zBatchSamplerShard.__iter__   s-    *.*<\t$$&&&$BZBZB\B\\r<   c              #     K   g }| j         j        | j        z  }t          | j                   D ]D\  }}|dk    r|}t	          |          | j        k    r||| j        z  || j        dz   z           V  E| j        st	          |          dk    rt	          |          | j        k     r| j        s>t	          |          || j        z  k    r!||| j        z  || j        dz   z           V  d S d S t	          |          | j        k     r||z  }t	          |          | j        k     ||z   }||| j        z  || j        dz   z           V  d S d S d S d S r_   )r"   r   rN   	enumeraterZ   rO   r&   rQ   )r7   initial_databatch_lengthidxbatchs        r;   rd   z"BatchSamplerShard._iter_with_split   s     )48JJ#D$677 	i 	iJCaxx$5zzT_,,L4+==PTPbefPf@gghhhh ~ 		i#l"3"3a"7"7CJJ<X<X$ iu::t/A AAAt/A ALTXTfijTjDk kllllll BA ,''$/99 L0L ,''$/99,L4+==PTPbefPf@gghhhhhh		i 		i"7"7<X<Xr<   c              #     K   g }g }t          | j                  D ]l\  }}| j        s|| j        k     r||z  }|| j        z  | j        k    r|}|| j        z  | j        dz
  k    r%| j        t          |          | j        k    r|V  g }m| j        sCt          |          dk    r1| j        st          |          dk    r|V  d S d S t          |          | j        k    r|V  t          |          | j        | j        z  k     r%||z  }t          |          | j        | j        z  k     %t          |          | j        k    rg }|dz  }d}|| j        z  dk    st          |          dk    rn|| j        z   t          |          z
  }||||         z  }|| j        z  | j        k    r|V  |}g }|dz  }|| j        z  dk    Ut          |          dk    hd S d S d S d S )Nr   r   )rg   r"   r&   rN   rO   r   rZ   rQ   )r7   rh   batch_to_yieldrj   rk   cycle_index	end_indexs          r;   re   z%BatchSamplerShard._iter_with_no_split   s[     #D$677 	$ 	$JC> &cD,>&>&>% T''4+===!&T''4+=+AAA'3u::+H+H$$$$!# ~ 	#l"3"3a"7"7$ ~&&**(((((( +* ~&&$/99(((( ,''$*<t*NNN L0L ,''$*<t*NNN u::00E1HC  D..!33s5zzA~~ +do =E

 JI\+i*?@@ET//43EEE#"+KE1HC D..!33s5zzA~~~~)	 	"7"7( 8F~r<   N)r   r   FT)rE   rF   rG   rH   r   rI   boolr2   propertyr\   rb   r@   rd   re    r<   r;   rM   rM   e   s           J #! #  	
     4 ' ' X'o o o$] ] ]i i i,- - - - -r<   rM   c                   P    e Zd ZdZ	 	 	 	 	 ddedededed	ed
efdZd Zd Z	d Z
dS )IterableDatasetSharda  
    Wraps a PyTorch `IterableDataset` to generate samples for one of the processes only. Instances of this class will
    always yield a number of samples that is a round multiple of the actual batch size (depending of the value of
    `split_batches`, this is either `batch_size` or `batch_size x num_processes`). Depending on the value of the
    `drop_last` attribute of the batch sampler passed, it will either stop the iteration at the first batch that would
    be too small or loop with indices from the beginning.

    Args:
        dataset (`torch.utils.data.dataset.IterableDataset`):
            The batch sampler to split in several shards.
        batch_size (`int`, *optional*, defaults to 1):
            The size of the batches per shard (if `split_batches=False`) or the size of the batches (if
            `split_batches=True`).
        drop_last (`bool`, *optional*, defaults to `False`):
            Whether or not to drop the last incomplete batch or complete the last batches by using the samples from the
            beginning.
        num_processes (`int`, *optional*, defaults to 1):
            The number of processes running concurrently.
        process_index (`int`, *optional*, defaults to 0):
            The index of the current process.
        split_batches (`bool`, *optional*, defaults to `False`):
            Whether the shards should be created by splitting a batch to give a piece of it on each process, or by
            yielding different full batches on each process.

            On two processes with an iterable dataset yielding of `[0, 1, 2, 3, 4, 5, 6, 7]`, this will result in:

            - the shard on process 0 to yield `[0, 1, 2, 3]` and the shard on process 1 to yield `[4, 5, 6, 7]` if this
              argument is set to `False`.
            - the shard on process 0 to yield `[0, 1, 4, 5]` and the sampler on process 1 to yield `[2, 3, 6, 7]` if
              this argument is set to `True`.
    r   Fr   datasetr   r&   rN   rO   rP   c                     |r%|dk    r||z  dk    rt          d| d| d          || _        || _        || _        || _        || _        || _        d S )Nr   r   zGTo use `IterableDatasetShard` in `split_batches` mode, the batch size (rS   rT   )rU   ru   r   r&   rN   rO   rP   )r7   ru   r   r&   rN   rO   rP   s          r;   r2   zIterableDatasetShard.__init__"  s      	Z!^^
]0Ja0O0O^Zd ^ ^LY^ ^ ^   $"***r<   c                 v    || _         t          | j        d          r| j                            |           d S d S NrA   )r3   hasattrru   rA   rD   s     r;   rA   zIterableDatasetShard.set_epoch7  sC    
4<-- 	*L""5)))))	* 	*r<   c                     | j         r,t          | j                  | j        | j        z  z  | j        z  S t          j        t          | j                  | j        | j        z  z            | j        z  S rX   )r&   rZ   ru   r   rN   mathceilr[   s    r;   rb   zIterableDatasetShard.__len__<  sg    > 	k%%$/D<N*NOSWSbbb9S..$/DDV2VWXX[_[jjjr<   c              #   >  K   t          | j        d          s]t          | j        d          rHt          | j        j        t          j                  r$| j        j                            | j                   | j        r| j	        n| j	        | j
        z  }| j        r| j	        | j
        z  n| j	        }t          | j        |z  | j        dz   |z            }d }g }| j        D ]Q}|                    |           t          |          |k    r'|D ]}||         V  ||                                }g }R| j        sct          |          dk    rR||                                }t          |          |k     r||z  }t          |          |k     |D ]}||         V  d S d S d S )NrA   r*   r   r   )ry   ru   
isinstancer*   r4   r>   r?   r3   rP   r   rN   rangerO   appendrZ   copyr&   )r7   real_batch_sizeprocess_batch_sizeprocess_slicefirst_batchcurrent_batchelementis           r;   r@   zIterableDatasetShard.__iter__C  s     k22	;k22	; 4<15?CC	;
 L"..tz:::-1-?k$//doX\XjFjHLHZodo1CCC`d`od03EEHZ]^H^btGtuu| 	# 	#G  )))=!!_44& + +A'*****&"/"4"4"6"6K " ~ 	'#m"4"4q"8"8"+0022m$$66, m$$66" ' '#A&&&&&	' 	'"8"8
' 'r<   N)r   Fr   r   F)rE   rF   rG   rH   r
   rI   rp   r2   rA   rb   r@   rr   r<   r;   rt   rt     s         F #+ + + + 	+
 + + + + + +** * *
k k k' ' ' ' 'r<   rt   c                   *    e Zd ZdZd Zd Zd Zd ZdS )DataLoaderStateMixina  
    Mixin class that adds a state to a `DataLoader` to keep track of the status inside the dataloader such as at the
    end of the iteration, the number of items in the dataset in the last batch relative to the batch size, and other
    useful information that might be needed.

    **Available attributes:**

        - **end_of_dataloader** (`bool`) -- Whether at the last iteration or batch
        - **remainder** (`int`) -- The number of items that are remaining in the last batch, relative to the total
          batch size

    <Tip warning={true}>

        Inheriters of this class should ensure that the class creates a `GradientState()` instance, stored in
        `self.gradient_state`.

    </Tip>

    c                 "    d| _         d| _        d S NFend_of_dataloader	remainder)clsr9   s     r;   __init_subclass__z&DataLoaderStateMixin.__init_subclass__y  s     %r<   c                 "    d| _         d| _        d S r   r   r[   s    r;   resetzDataLoaderStateMixin.reset}  s    !&r<   c                 6   |                                   t          t                    5  | j        s7t	          | j        dt          | j                            }|| j        z  | _        ddd           n# 1 swxY w Y   | j	        
                    |            dS )z6Prepares the gradient state for the current dataloadertotal_dataset_lengthN)r   r   	Exception
_drop_lastrV   ru   rZ   total_batch_sizer   gradient_state_add_dataloaderr`   s     r;   beginzDataLoaderStateMixin.begin  s    

i   	@ 	@? @ /Es4<GXGXYY!'$*?!?	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	++D11111s   ?A44A8;A8c                 :    | j                             |            dS )z9Cleans up the gradient state after exiting the dataloaderN)r   _remove_dataloaderr[   s    r;   endzDataLoaderStateMixin.end  s    ..t44444r<   N)rE   rF   rG   rH   r   r   r   r   rr   r<   r;   r   r   d  sZ         (    2 2 25 5 5 5 5r<   r   c                   T    e Zd ZdZddZd Zd Zd Zed             Z	d	 Z
d
 Zd ZdS )DataLoaderAdapterz
    A class which wraps around a PyTorch `DataLoader` (or variants of it) to be used with the `Accelerator`. For
    compatability reasons, this class inherits from the class it wraps around, so it can be used as a drop-in.
    FNc                 2   || _         t                      rddlm} |rt                      st	          d          |r ||fd|i|| _        nt          |fd|i|| _        t          | j        d          r | j                                        | _	        d S d S )Nr   )StatefulDataLoaderz`StatefulDataLoader is not available. Please install torchdata version 0.8.0 or higher to use it.r"   
state_dict)
use_stateful_dataloaderr   torchdata.stateful_dataloaderr   ImportErrorbase_dataloaderr	   ry   r   dl_state_dict)r7   ru   r   r"   r9   r   s         r;   r2   zDataLoaderAdapter.__init__  s    '>$577 	IHHHHHH" 	+U+W+W 	r   # 	^#5#5g#e#e]#e^d#e#eD  #-g#]#]]#]V\#]#]D 4'66 	C!%!5!@!@!B!BD	C 	Cr<   c                 T    |dk    rt                      t          | j        |          S )Nr   )AttributeErrorrV   r   )r7   names     r;   __getattr__zDataLoaderAdapter.__getattr__  s-    $$$ """t+T222r<   c                     | j         S rX   )r   r[   s    r;   r   zDataLoaderAdapter.state_dict  s    !!r<   c                 :    | j                             |           d S rX   )r   load_state_dict)r7   r   s     r;   r   z!DataLoaderAdapter.load_state_dict  s    ,,Z88888r<   c                     | j         j        S )a   
        In order to maintain backwards compatability with other code, we need to ensure `isinstance(obj, DataLoader)`
        returs true. This is because some downstream code assumes that the `DataLoader` is the base class of the
        object.
        )r   r:   r[   s    r;   r:   zDataLoaderAdapter.__class__  s     #--r<   c                 *    t          | j                  S rX   )rZ   r   r[   s    r;   rb   zDataLoaderAdapter.__len__  s    4'(((r<   c                    t                      j        t          j        k    rt                      j        dz
  }| j        d         dk    r| j        dxx         |z  cc<   | j        d         dk    r| j        dxx         |z  cc<   | j        d         Md| j        d         v r@| j        d         d         dk    r+| j        d         dxx         | j        |z  z  cc<   dS dS dS dS dS )a:  
        Adjusts the state dict for prefetching. Natively, this will adjust all of the iters yielded keys in
        `self.dl_state_dict` by a factor of `num_processes - 1`, however if a custom correction is needed, this can be
        overridden.

        This should modify `self.dl_state_dict` directly
        r   _sampler_iter_yieldedr   _num_yielded_index_sampler_stateNsamples_yielded)r   distributed_typer   NOrN   r   r   )r7   factors     r;   adjust_state_dict_for_prefetchz0DataLoaderAdapter.adjust_state_dict_for_prefetch  s"    >>*o.@@@!^^1A5F!"9:Q>>"#:;;;vE;;;!.1A55">222f<222!"89E%);<R)SSS*+ABCTUXYYY&'=>?PQQQUYUdgmUmmQQQQQ A@ FESSYYr<   c                     t          | j        d          rC| j                                        | _        |                                  | j        | j        d<   d S d S )Nr   _iterator_finished)ry   r   r   r   r   r   r[   s    r;   _update_state_dictz$DataLoaderAdapter._update_state_dict  sf     4'66 	N!%!5!@!@!B!BD//1117;7MD3444	N 	Nr<   )FN)rE   rF   rG   rH   r2   r   r   r   rq   r:   rb   r   r   rr   r<   r;   r   r     s         
C C C C"3 3 3" " "9 9 9 . . X.) ) )n n n.N N N N Nr<   r   c                        e Zd ZdZ	 	 	 	 	 	 	 ddedef fdZd Z fd	Zd
efdZ	e
d             Ze
d             Zd Zd Z xZS )DataLoaderSharda  
    Subclass of `DataLoaderAdapter` that will deal with device placement and current distributed setup.

    Args:
        dataset (`torch.utils.data.dataset.Dataset`):
            The dataset to use to build this dataloader.
        device (`torch.device`, *optional*):
            If passed, the device to put all batches on.
        rng_types (list of `str` or [`~utils.RNGType`]):
            The list of random number generators to synchronize at the beginning of each iteration. Should be one or
            several of:

            - `"torch"`: the base torch random number generator
            - `"cuda"`: the CUDA random number generator (GPU only)
            - `"xla"`: the XLA random number generator (TPU only)
            - `"generator"`: an optional `torch.Generator`
        synchronized_generator (`torch.Generator`, *optional*):
            A random number generator to keep synchronized across processes.
        skip_batches (`int`, *optional*, defaults to 0):
            The number of batches to skip at the beginning.
        use_stateful_dataloader (`bool`, *optional*, defaults to `False`):
            Whether to have this class adapt `StatefulDataLoader` from `torchdata` instead of the regular `DataLoader`.
        **kwargs (additional keyword arguments, *optional*):
            All other keyword arguments to pass to the regular `DataLoader` initialization.

    **Available attributes:**

        - **total_batch_size** (`int`) -- Total batch size of the dataloader across all processes.
            Equal to the original batch size when `split_batches=True`; otherwise the original batch size * the total
            number of processes

        - **total_dataset_length** (`int`) -- Total length of the inner dataset across all processes.
    Nr   Fr   _non_blockingc	                      t                      j        |fd|i|	 || _        || _        || _        || _        t                      | _        || _        || _	        d| _
        d S )Nr   r   )r1   r2   device	rng_typessynchronized_generatorskip_batchesr   r   r   r   	iteration)r7   ru   r   r   r   r   r   r   r   r9   r:   s             r;   r2   zDataLoaderShard.__init__  so     	\\:Q\U[\\\"&<#(+oo$*r<   c              #     K   | j         t          | j         | j                   |                                  |                     | j                   | j                                        }	 t          |          }n# t          $ r d V  Y nw xY wd}	 	 | j
        t          || j
        | j                  }|                                  t          |          }|| j        k    r|V  |dz  }|}n:# t          $ r- d| _        |                                  || j        k    r|V  Y nw xY w| xj        dz  c_        |                                  d S )Nr   Tnon_blockingr   )r   r   r   r   rA   r   r   r@   nextStopIterationr   r   r   r   r   r   r   )r7   dataloader_iterr   batch_index
next_batchs        r;   r@   zDataLoaderShard.__iter__  s     >%"4>43NOOO

t~&&&.7799	 11MM 	 	 	EEEEE	 	;*$2=$+\`\n$o$o$oM'')))!/22
$"333''''q  *    )-&'')))$"333''''	$ 	!




s%   ,A< <BBAC1 14D('D(c                 j    t                                                      }t          g|dd         R S )a  
        Define the `__reduce__` method to ensure a `DataLoaderShard` can be pickled and unpickled. This needs to be
        explicitly defined since default pickling behavior is broken by `DataLoaderAdapter` messing with its
        `__class__` member.
        r   N)r1   
__reduce__r   r7   r8   r:   s     r;   r   zDataLoaderShard.__reduce__@  s3     ww!!##+$qrr(+++r<   r3   c                 ,   | j         |k    r|| _         t          | j        d          r;t          | j        j        d          r!| j        j                            |           d S t          | j        d          r| j                            |           d S d S Nr!   rA   r   ry   r"   r!   rA   ru   rD   s     r;   rA   zDataLoaderShard.set_epochI  s    >U"""DN4%y11 	*gd>P>XZe6f6f 	*&0077777 T\;// 	*L""5)))))	* 	*r<   c                     t          | j        t                    r| j        n| j        }t	          |dd          r|j        n|j        t	          |dd          z  S )NrP   FrN   r   )r~   r!   r   r"   rV   r   )r7   r"   s     r;   r   z DataLoaderShard.total_batch_sizeT  s^    (24<(N(NfTXTf }ou==YM$$*W]OUV-W-WW	
r<   c                 l    t          | j        d          r| j        j        S t          | j                  S )Nr\   )ry   ru   r\   rZ   r[   s    r;   r   z$DataLoaderShard.total_dataset_length]  s1    4<00 	%<,,t|$$$r<   c                      t          |           S rX   get_samplerr[   s    r;   r   zDataLoaderShard.get_samplerd      4   r<   c                     t          | j        t                    }|r|| j        _        d S || j        _        t	          | j        d          r|| j        j        _        d S d S Nr"   r~   r!   r   r"   ry   r7   r!   sampler_is_batch_samplers      r;   set_samplerzDataLoaderShard.set_samplerg  n    #-dlL#I#I # 	C#*DL   )0D&t)?;; C;B"0888C Cr<   )NNNr   FFF)rE   rF   rG   rH   rp   r2   r@   r   rI   rA   rq   r   r   r   r   rJ   rK   s   @r;   r   r     s          J # % #        ,! ! !F, , , , ,	*s 	* 	* 	* 	* 
 
 X
 % % X%! ! !C C C C C C Cr<   r   c                        e Zd ZdZdedej        f fdZ fdZde	fdZ
ed             Zed	             Zed
             Zed             Z xZS )MpDeviceLoaderWrappera  
        Wrapper for the xpl.MpDeviceLoader class that knows the total batch size.

        XLA preloading threads will all call DataLoaderShard's __iter__(). Remove rng_types from DataLoaderShard to
        prevent it from using the XLA device in the preloading threads, and synchronize the RNG once from the main
        thread only.

        **Available attributes:**

        - **total_batch_size** (`int`) -- Total batch size of the dataloader across all processes.
            Equal to the original batch size when `split_batches=True`; otherwise the original batch size * the total
            number of processes

        - **total_dataset_length** (`int`) -- Total length of the inner dataset across all processes.
        
dataloaderr   c                     t                                          ||           | j        j        | _        d | j        _        || _        d S rX   )r1   r2   _loaderr   
_rng_typesr   )r7   r   r   r:   s      r;   r2   zMpDeviceLoaderWrapper.__init__  s>    GGZ000"l4DO%)DL" DKKKr<   c                     | j         t          | j         | j        j                   t	                                                      S rX   )r   r   r   r   r1   r@   )r7   r:   s    r;   r@   zMpDeviceLoaderWrapper.__iter__  s8    *&t8[\\\77##%%%r<   r3   c                 h    t          | j        d          r| j                            |           d S d S rx   )ry   r   rA   rD   s     r;   rA   zMpDeviceLoaderWrapper.set_epoch  s<    t44 1))%000001 1r<   c                     | j         j        S rX   )r   r   r[   s    r;   r   z&MpDeviceLoaderWrapper.total_batch_size  s    <00r<   c                     | j         j        S rX   )r   r   r[   s    r;   r   z*MpDeviceLoaderWrapper.total_dataset_length  s    <44r<   c                     | j         j        S rX   )r   r"   r[   s    r;   r"   z#MpDeviceLoaderWrapper.batch_sampler  s    <--r<   c                     | j         S rX   )r   r[   s    r;   r   z MpDeviceLoaderWrapper.dataloader  s
    <r<   )rE   rF   rG   rH   r   r4   r   r2   r@   rI   rA   rq   r   r   r"   r   rJ   rK   s   @r;   r   r   t  s        	 	 	! 	! 	! 	! 	! 	! 	! 	!	& 	& 	& 	& 	&	13 	1 	1 	1 	1 
	1 	1 
	1 
	5 	5 
	5 
	. 	. 
	. 
	  	  
	  	  	  	  	 r<   r   c                        e Zd ZdZ	 	 	 	 	 	 ddededef fdZd	 Zd
 ZdefdZ	d Z
 fdZed             Zed             Zd Zd Z xZS )DataLoaderDispatcheraD  
    Subclass of `DataLoaderAdapter` that will iterate and preprocess on process 0 only, then dispatch on each process
    their part of the batch.

    Args:
        split_batches (`bool`, *optional*, defaults to `False`):
            Whether the resulting `DataLoader` should split the batches of the original data loader across devices or
            yield full batches (in which case it will yield batches starting at the `process_index`-th and advancing of
            `num_processes` batches at each iteration). Another way to see this is that the observed batch size will be
            the same as the initial `dataloader` if this option is set to `True`, the batch size of the initial
            `dataloader` multiplied by `num_processes` otherwise. Setting this option to `True` requires that the batch
            size of the `dataloader` is a round multiple of `batch_size`.
        skip_batches (`int`, *optional*, defaults to 0):
            The number of batches to skip at the beginning of an iteration.
        use_stateful_dataloader (`bool`, *optional*, defaults to `False`):
            Whether to have this class adapt `StatefulDataLoader` from `torchdata` instead of the regular `DataLoader`.

    **Available attributes:**

        - **total_batch_size** (`int`) -- Total batch size of the dataloader across all processes.
            Equal to the original batch size when `split_batches=True`; otherwise the original batch size * the total
            number of processes

        - **total_dataset_length** (`int`) -- Total length of the inner dataset across all processes.
    Fr   NrP   r   r   c                    d}	t          dd          rddlm}
 t          ||
          r|j        }	 t                      j        |fd|i| || _        |	r+t          j	        j
        j                            ||	           t                      | _        t                      | _        || _        || _        || _        |t(          n|| _        d| _        d S )NFr-   z1.11.0r   )ShufflerIterDataPiper   )r    )r   -torch.utils.data.datapipes.iter.combinatoricsr   r~   _shuffle_enabledr1   r2   rP   r4   utilsdatagraph_settingsapply_shuffle_settingsr   r   r   stater   r   r   r   slice_fnr   )r7   ru   rP   r   r   r   r   r   r9   r    r   r:   s              r;   r2   zDataLoaderDispatcher.__init__  s     D(++ 	3ZZZZZZ '#788 3!2\\:Q\U[\\\* 	]K+BB7T[B\\\+oo!^^
$*()1)9xr<   c                    d\  }}| j         j        dk    r	 | j        r$|                                  t	          |          }ng }t          | j         j                  D ]8}|                                  |                    t	          |                     9	 t          |d          }n"# t          $ r}t          d          |d }~ww xY wt          |          dg}n# t          $ r d dg}Y nw xY wd | j        g}t          |           |d         | _        | j        rg| j        s`| j        sY| j         j        dk    r6t          |          dk    r#t          |d          }t          |          dg}nd dg}t          |           ||fS )N)NNr   dimaG  You can't use batches of different size with `dispatch_batches=True` or when using an `IterableDataset`.either pass `dispatch_batches=False` and have each process fetch its own batch  or pass `split_batches=True`. By doing so, the main process will fetch a full batch and slice it into `num_processes` batches for each process.FTr   )r   rO   rP   r   r   r   rN   r   r   RuntimeErrorr   r   _stop_iterationr   r   rZ   )r7   iteratorbatchesrk   _e
batch_infos          r;   _fetch_batchesz#DataLoaderDispatcher._fetch_batches  s   #:#q((*% !++--- NNEE !G"4:#;<< 7 7//111tH~~6666! +G ; ; ;' ! ! !*V 
  !!! 177?

  * * *"D\


*  45Jj))))!} 	2% 2do 2:+q00S\\A5E5E'Q777E"4U";";U!CJJ"&J%j111j  s6   A?C B) (C )
C3CCC C.-C.c              #   V  K   |                                   |                     | j                   d }t          dd          r| j                                        }n)| j        j        dk    r| j                                        }d}d| _        d }| 	                    |          \  }}d}|s||}}| j        j        dk    rt          |d                   }t          || j        j        | j                  }t          |d          }| j        sG|E|                     |t#          d| j        j                  | j        j        | j        j                  }|t'          d| d	          t)          |          }	|	| j        j        z  }
| j        }|s)| 	                    |          \  }}| j        r
|d         d
}| j        s-|r+|	| j        j        z  dk    rt+          ||gd          }|
dz  }
t#          | j        j        |
z  | j        j        dz   |
z            }|                     ||| j        j        | j        j                  }|r"d
| _        |                                  |	| _        || j        k    r|V  |dz  }|| xj        dz  c_        |                                  d S )Nr-   z2.0.1r   Fr   )from_process)rO   rN   z"Batch does not contain any data (`zM`). At the end of all iterable data available before expected stop iteration.Tr   r   )r   rA   r   r   r   r@   r   rO   r   r  r   r   r   r   r   r   r   slicerN   rU   r   r   r   r   r   r   r   )r7   main_iteratorstop_iterationr   r   next_batch_infor   rk   r  observed_batch_sizer   
data_slices               r;   r@   zDataLoaderDispatcher.__iter__  s     

t~&&&D'** 	< !099;;MMZ%** 099;;M$&*&9&9-&H&H#
O  8	 *O:Ez'1,,*:a=99"5$**;$J\]]]Ee!444E? {':"mm!TZ566"&*":"&*":	 ,   }  N  N  N  N   #2%"8"8,
0HHJ!1N! * /3.A.A-.P.P+
O' *OA,>,F%)N?  ~  :MPTPZPh:hlm:m:m#UK$8a@@@a
tz7*DtzG_bcGcgqFqrrJMM"j6"j6	 "  E  5)-&'')))!4d///1Kq ! 8	r 	!




r<   r3   c                 ,   | j         |k    r|| _         t          | j        d          r;t          | j        j        d          r!| j        j                            |           d S t          | j        d          r| j                            |           d S d S r   r   rD   s     r;   rA   zDataLoaderDispatcher.set_epoch\  s    >U"""DN4%y11 	*gd>P>XZe6f6f 	*&0077777T\;// 	*L""5)))))	* 	*r<   c                     t          | j                  }| j        r|S | j        r|| j        j        z  S t          j        || j        j        z            S rX   )rZ   r   rP   r   r   rN   r{   r|   )r7   whole_lengths     r;   rb   zDataLoaderDispatcher.__len__e  sY    4/00 	F_ 	F4:#;;;9\DJ,DDEEEr<   c                 j    t                                                      }t          g|dd         R S )a  
        Define the `__reduce__` method to ensure a `DataLoaderDispatcher` can be pickled and unpickled. This needs to
        be explicitly defined since default pickling behavior is broken by `DataLoaderAdapter` messing with its
        `__class__` member.
        r   N)r1   r   r   r   s     r;   r   zDataLoaderDispatcher.__reduce__n  s3     ww!!##$0tABBx000r<   c                 Z    | j         r| j        j        n| j        j        | j        j        z  S rX   )rP   ru   r   rN   r[   s    r;   r   z%DataLoaderDispatcher.total_batch_sizew  s/     (,'9uDL##@WZ^ZfZt@t	
r<   c                 *    t          | j                  S rX   )rZ   ru   r[   s    r;   r   z)DataLoaderDispatcher.total_dataset_length}  s    4<   r<   c                      t          |           S rX   r   r[   s    r;   r   z DataLoaderDispatcher.get_sampler  r   r<   c                     t          | j        t                    }|r|| j        _        d S || j        _        t	          | j        d          r|| j        j        _        d S d S r   r   r   s      r;   r   z DataLoaderDispatcher.set_sampler  r   r<   )Fr   FFFN)rE   rF   rG   rH   rp   r2   r  r@   rI   rA   rb   r   rq   r   r   r   r   rJ   rK   s   @r;   r   r     sC        : $ % #         @-! -! -!^J J JX*s * * * *F F F1 1 1 1 1 
 
 X

 ! ! X!! ! !C C C C C C Cr<   r   c                     t          | j        t                    }|rt          | j        dd          }nt          | j        dd          }|S )a  
    Get the sampler associated to the dataloader

    Args:
        dataloader (`torch.utils.data.dataloader.DataLoader`):
            The data loader to split across several devices.
    Returns:
        `torch.utils.data.Sampler`: The sampler associated to the dataloader
    r!   N)r~   r!   r   rV   r"   )r   r   r!   s      r;   r   r     sO      ***<lKK E*,i>>*2ItDDNr<   Tr   r   rN   rO   rP   put_on_devicer   dispatch_batchesrQ   slice_fn_for_dispatchuse_seedable_samplerr   r   returnc                     ||sd}nt           j        t                    }|r|st          d          t	                      }||j        }||j        }|r j         j        }nGt           j	        d          r j	        j        }n%t          dt           j	                   d          |dk    r$||z  dk    rt          d	 j         d
| d           j        }t          |t                    s j	        nd}t           j        t                    }d}t                     }t          |t                    rD|
rBt          |j        |j        |j        t'          |dt)          j                                        }t           j        t                    rN|j        t.          j        k    r9t)          j                                        d          }| _        | j        _        |dk    s|j        t.          j        k    r|st          |t                    rBt'           j        dd           j        j        }t9          | j         j        |||          }n\|
s6t          |d          r&|j        t)          j                    |_        |j        }|r j        n j	        }t=          |||||          }g d||d|v r|                    d            fdt@          D             }|" j        |d<   |r|s
 j        |z  n j        |d<   |r/|!                    d           tE          |f|| j        ||	|d| no|r:tG          |f|r|j        t.          j        k    r|nd| j        | j        |||d| n3tG          |f|r|j        t.          j        k    r|nd||| j        ||d| t          |t                    r|
r $                    |           |j        t.          j        k    rtK           |          S  S )a  
    Wraps a PyTorch `DataLoader` to generate batches for one of the processes only.

    Depending on the value of the `drop_last` attribute of the `dataloader` passed, it will either stop the iteration
    at the first batch that would be too small / not present on all processes or loop with indices from the beginning.

    Args:
        dataloader (`torch.utils.data.dataloader.DataLoader`):
            The data loader to split across several devices.
        device (`torch.device`):
            The target device for the returned `DataLoader`.
        num_processes (`int`, *optional*):
            The number of processes running concurrently. Will default to the value given by [`~state.PartialState`].
        process_index (`int`, *optional*):
            The index of the current process. Will default to the value given by [`~state.PartialState`].
        split_batches (`bool`, *optional*, defaults to `False`):
            Whether the resulting `DataLoader` should split the batches of the original data loader across devices or
            yield full batches (in which case it will yield batches starting at the `process_index`-th and advancing of
            `num_processes` batches at each iteration).

            Another way to see this is that the observed batch size will be the same as the initial `dataloader` if
            this option is set to `True`, the batch size of the initial `dataloader` multiplied by `num_processes`
            otherwise.

            Setting this option to `True` requires that the batch size of the `dataloader` is a round multiple of
            `batch_size`.
        put_on_device (`bool`, *optional*, defaults to `False`):
            Whether or not to put the batches on `device` (only works if the batches are nested list, tuples or
            dictionaries of tensors).
        rng_types (list of `str` or [`~utils.RNGType`]):
            The list of random number generators to synchronize at the beginning of each iteration. Should be one or
            several of:

            - `"torch"`: the base torch random number generator
            - `"cuda"`: the CUDA random number generator (GPU only)
            - `"xla"`: the XLA random number generator (TPU only)
            - `"generator"`: the `torch.Generator` of the sampler (or batch sampler if there is no sampler in your
              dataloader) or of the iterable dataset (if it exists) if the underlying dataset is of that type.

        dispatch_batches (`bool`, *optional*):
            If set to `True`, the dataloader prepared is only iterated through on the main process and then the batches
            are split and broadcast to each process. Will default to `True` when the underlying dataset is an
            `IterableDataset`, `False` otherwise.
        even_batches (`bool`, *optional*, defaults to `True`):
            If set to `True`, in cases where the total batch size across all processes does not exactly divide the
            dataset, samples at the start of the dataset will be duplicated so the batch can be divided equally among
            all workers.
        slice_fn_for_dispatch (`Callable`, *optional*`):
            If passed, this function will be used to slice tensors across `num_processes`. Will default to
            [`~utils.slice_tensors`]. This argument is used only when `dispatch_batches` is set to `True` and will be
            ignored otherwise.
        use_seedable_sampler (`bool`, *optional*, defaults to `False`):
            Whether to use the [`~data_loader.SeedableRandomSampler`] instead of a `RandomSampler` for better
            reproducability. Comes at a cost of potentially different performances due to different shuffling
            algorithms but ensures results will be the *exact* same. Should be paired with `set_seed()` at every
            `self.set_epoch`
        non_blocking (`bool`, *optional*, defaults to `False`):
            If set to `True`, dataloader will utilize non-blocking host-to-device transfers. If the dataloader has
            `pin_memory` set to `True`, this will help to increase overlap between data transfer and computations.
        use_stateful_dataloader (`bool`, *optional*, defaults to `False`):
            "If set to true, the dataloader prepared by the Accelerator will be backed by "
            "[torchdata.StatefulDataLoader](https://github.com/pytorch/data/tree/main/torchdata/stateful_dataloader).
            This requires `torchdata` version 0.8.0 or higher that supports StatefulDataLoader to be installed."


    Returns:
        `torch.utils.data.dataloader.DataLoader`: A new data loader that will yield the portion of the batches

    <Tip warning={true}>

    `BatchSampler`s with varying batch sizes are not enabled by default. To enable this behaviour, set `even_batches`
    equal to `False`

    </Tip>
    NFz<Using `dispatch_batches=True` requires `put_on_device=True`.r   a  In order to use `split_batches==True` you must have a `batch_size` attribute either in the passed `dataloader` or `dataloader.batch_sampler` objects, and it has to return a natural number. Your `dataloader.batch_size` is None and `dataloader.batch_sampler` (`z0`) does not have the `batch_size` attribute set.r   r   z?To use a `DataLoader` in `split_batches` mode, the batch size (rS   rT   r*   )data_sourcereplacementnum_samplesr*   *   )r   r&   rN   rO   rP   )rN   rO   rP   rQ   r   r    r!   r"   r&   c           	      R    i | ]#}|v|t          |t          |                   $S rr   rV   _PYTORCH_DATALOADER_KWARGS.0kr   ignore_kwargss     r;   
<dictcomp>z'prepare_data_loader.<locals>.<dictcomp>^  B       M!! 	
7:q"<Q"?@@!!!r<   r&   )rP   r"   r   r   r   r   )r   r!   r   r   r   r   r   r   )r   r"   r   r   r   r   r   )&r~   ru   r
   rU   r   rN   rO   r   ry   r"   typer!   r   r   r   r/   r  r  _num_samplesrV   r4   r>   r   r   XLAr?   r*   MEGATRON_LMrt   r&   rM   remover$  popr   r   r   r   )r   r   rN   rO   rP   r  r   r  rQ   r  r  r   r   r   batch_size_for_checknew_datasetnew_batch_samplerr   r   r!   r*   r"   r9   r(  s   `                      @r;   prepare_data_loaderr4    sh   t  	O$)**<oNN Y YWXXXNNE++   ,#-#8   z/>> '1'?'J$$ j j677j j j    !##(<}(LPQ(Q(Q^R\Rg ^ ^LY^ ^ ^  
 $K8B;P_8`8`j
00fj)**<lKK!*%%G'=)) 

.B 


 (++,g{EO4E4EFF	
 
 
 *$m44 19OSbSf9f9fO%%11"55	(
'0
$e48SSS]mSk?33 	z);==I)3);)E&.%0$.+++  KK ( ;GG[,I,I ;$,(-(9(9G%)0):&2JhJ..PZPhM 1+++)! ! !  M !7!?KS\D\D\%%%    +  F  (2{6CwL\wJ!]22blbw 	|  $


;)	
'+!+&*$;	
 	
 	
 	


 
" 
$
*fu/EI\/\/\66bf%!,!+&#9$;
 
 
 


 %

*fu/EI\/\/\66bf+#9!+&$;

 

 

 


 '011 (6J (w'''!444$Z888r<   c                   <    e Zd ZdZddZd Zed             Zd ZdS )	SkipBatchSamplerz
    A `torch.utils.data.BatchSampler` that skips the first `n` batches of another `torch.utils.data.BatchSampler`.
    Should not be used if the original dataloader is a `StatefulDataLoader`.
    r   c                 "    || _         || _        d S rX   )r"   r   )r7   r"   r   s      r;   r2   zSkipBatchSampler.__init__  s    *(r<   c              #   \   K   t          | j                  D ]\  }}|| j        k    r|V  d S rX   )rg   r"   r   )r7   indexsampless      r;   r@   zSkipBatchSampler.__iter__  sF      '(:;; 	 	NE7)))	 	r<   c                 *    t          | j                  S rX   rY   r[   s    r;   r\   zSkipBatchSampler.total_length  r]   r<   c                 :    t          | j                  | j        z
  S rX   )rZ   r"   r   r[   s    r;   rb   zSkipBatchSampler.__len__  s    4%&&):::r<   Nr   )	rE   rF   rG   rH   r2   r@   rq   r\   rb   rr   r<   r;   r6  r6    sk         
) ) ) )  
 ' ' X'; ; ; ; ;r<   r6  c                   :     e Zd ZdZd fd	Zd Zd Z fdZ xZS )	SkipDataLoadera  
    Subclass of a PyTorch `DataLoader` that will skip the first batches. Generally it's preferable to use
    `skip_first_batches`/`torchdata.StatefulDataLoader` instead of this class.

    Args:
        dataset (`torch.utils.data.dataset.Dataset`):
            The dataset to use to build this dataloader.
        skip_batches (`int`, *optional*, defaults to 0):
            The number of batches to skip at the beginning.
        kwargs:
            All other keyword arguments to pass to the regular `DataLoader` initialization.
    r   Fc                 t     t                      j        |fd|i| || _        t                      | _        d S )Nr   )r1   r2   r   r   r   )r7   ru   r   r   r9   r:   s        r;   r2   zSkipDataLoader.__init__  sC    \\:Q\U[\\\(+oor<   c              #      K   |                                   t          | j                                                  D ](\  }}|| j        k    r|                                  |V  )|                                  d S rX   )r   rg   r   r@   r   r   r   )r7   r9  rk   s      r;   r@   zSkipDataLoader.__iter__  sx      

%d&:&C&C&E&EFF 	 	LE5)))'')))




r<   c                 :    t          | j                  | j        z
  S rX   )rZ   r   r   r[   s    r;   rb   zSkipDataLoader.__len__  s    4'((4+<<<r<   c                 j    t                                                      }t          g|dd         R S )a  
        Define the `__reduce__` method to ensure a `SkipDataLoader` can be pickled and unpickled. This needs to be
        explicitly defined since default pickling behavior is broken by `DataLoaderAdapter` messing with its
        `__class__` member.
        r   N)r1   r   r?  r   s     r;   r   zSkipDataLoader.__reduce__  s3     ww!!##*abb***r<   )r   F)	rE   rF   rG   rH   r2   r@   rb   r   rJ   rK   s   @r;   r?  r?    s~         . . . . . .
  = = =+ + + + + + + + +r<   r?  c                 &   	 t                      }|j        t          j        k    r j        } j          j        }d}t          |t                    rd}n;t           j	        t                    }|r j	        n j        }t          ||          }g d	 	fdt          D             }| j        |d<    j        |d<   t           t                     r#|||d<   t!          |f j        | j        d	| nvt           t&                    r@|||d<   n|r||d
<    j        |d<   n||d<   t'          |f j         j         j        d| n!|t-          |fd|i| nt/          |fd|i| |j        t          j        k    rt1           |            S )z
    Creates a `torch.utils.data.DataLoader` that will efficiently skip the first `num_batches`. Should not be used if
    the original dataloader is a `StatefulDataLoader`.
    FN)r   r!  c           	      R    i | ]#}|v|t          |t          |                   $S rr   r#  r%  s     r;   r)  z&skip_first_batches.<locals>.<dictcomp>  r*  r<   r&   r   r   )rP   r"   r   r!   r"   )r   r   r   )r   r   r   r-  r   r   ru   r~   r
   r!   r   r"   r6  r$  r&   r   r   rP   r   r   r   r   r?  r	   r   )
r   num_batchesr   r   ru   r   r3  r"   r9   r(  s
   `        @r;   skip_first_batchesrG    sD   
 NNE!444"*
 G$'?++ V #-j.@,#O#O .Fd
**JLd,]UUU  M    +  F  (2{)4|*233  X$%0F>")
$2+!,	
 

 
 


 
J	0	0 X$%0F>""% 	8 1F9#-#8F<  &7F?#$
$ *#-#D	
 

 
 


 $'TTkTVTTJJ#GWW;LWPVWWJ!444*:v>>
r<   )NNNFFNNTNFFFr=  )@r{   
contextlibr   typingr   r   r   r   r4   torch.utils.datar   r	   r
   r   loggingr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rE   loggerr$  %_PYTORCH_DATALOADER_ADDITIONAL_KWARGSitemsvadditional_kwargsupdater/   rM   rt   r   r   r   %torch_xla.distributed.parallel_loaderdistributedparallel_loaderxplMpDeviceLoaderr   r   r   r   rI   rp   strr4  r6  r?  rG  rr   r<   r;   <module>rX     s2          2 2 2 2 2 2 2 2 2 2 2 2  U U U U U U U U U U U U       W W W W W W W W W W W W                             
H		 #  $ )+ %AGGII = =Aa   ="))*;<<<    M   BY Y Y Y Y Y Y Yx`' `' `' `' `'? `' `' `'F(5 (5 (5 (5 (5 (5 (5 (5VRN RN RN RN RN RN RN RNjJC JC JC JC JC')= JC JC JCZ  2 777777777/  /  /  /  /  2 /  /  / deC eC eC eC eC,.B eC eC eCP  ( &*#'#'59'+04!&$)t ttU\"t C=t C=	t
 t t U3<012t tnt t $H-t t t "t t t t tn; ; ; ; ;| ; ; ;.%+ %+ %+ %+ %+&(< %+ %+ %+PL L L L L Lr<   