
    ΧgJ                        d dl Z d dlZd dlZd dlZd dlmZmZmZmZm	Z	m
Z
mZmZmZmZ d dlmZ d dlmZmZmZmZ g dZ ed          Z edd	          Zeeef         Zeed
f         Z edee          Z G d dee                   Z G d dee         ee                   Z G d deeed
f                            Z G d dee                   Z  G d dee                   Z! G d de          Z" G d dee                   Z#efdee         deee$e%f                  de
e         de	e#e                  fdZ&dS )    N)
castDictGenericIterableListOptionalSequenceTupleTypeVarUnion)
deprecated)default_generator	GeneratorrandpermTensor)DatasetIterableDatasetTensorDatasetStackDatasetConcatDatasetChainDatasetSubsetrandom_split_T_T_coT)	covariant._T_stackc                   &    e Zd ZdZdefdZd	dZdS )
r   a  An abstract class representing a :class:`Dataset`.

    All datasets that represent a map from keys to data samples should subclass
    it. All subclasses should overwrite :meth:`__getitem__`, supporting fetching a
    data sample for a given key. Subclasses could also optionally overwrite
    :meth:`__len__`, which is expected to return the size of the dataset by many
    :class:`~torch.utils.data.Sampler` implementations and the default options
    of :class:`~torch.utils.data.DataLoader`. Subclasses could also
    optionally implement :meth:`__getitems__`, for speedup batched samples
    loading. This method accepts list of indices of samples of batch and returns
    list of samples.

    .. note::
      :class:`~torch.utils.data.DataLoader` by default constructs an index
      sampler that yields integral indices.  To make it work with a map-style
      dataset with non-integral indices/keys, a custom sampler must be provided.
    returnc                      t          d          )Nz3Subclasses of Dataset should implement __getitem__.)NotImplementedErrorselfindexs     T/var/www/html/ai-engine/env/lib/python3.11/site-packages/torch/utils/data/dataset.py__getitem__zDataset.__getitem__>   s    !"WXXX    otherDataset[_T_co]ConcatDataset[_T_co]c                 $    t          | |g          S N)r   r#   r(   s     r%   __add__zDataset.__add__E   s    dE]+++r'   N)r(   r)   r   r*   )__name__
__module____qualname____doc__r   r&   r.    r'   r%   r   r   +   sR         $YE Y Y Y Y, , , , , ,r'   r   c                   *    e Zd ZdZdee         fdZdS )r   aH  An iterable Dataset.

    All datasets that represent an iterable of data samples should subclass it.
    Such form of datasets is particularly useful when data come from a stream.

    All subclasses should overwrite :meth:`__iter__`, which would return an
    iterator of samples in this dataset.

    When a subclass is used with :class:`~torch.utils.data.DataLoader`, each
    item in the dataset will be yielded from the :class:`~torch.utils.data.DataLoader`
    iterator. When :attr:`num_workers > 0`, each worker process will have a
    different copy of the dataset object, so it is often desired to configure
    each copy independently to avoid having duplicate data returned from the
    workers. :func:`~torch.utils.data.get_worker_info`, when called in a worker
    process, returns information about the worker. It can be used in either the
    dataset's :meth:`__iter__` method or the :class:`~torch.utils.data.DataLoader` 's
    :attr:`worker_init_fn` option to modify each copy's behavior.

    Example 1: splitting workload across all workers in :meth:`__iter__`::

        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_DATALOADER)
        >>> # xdoctest: +SKIP("Fails on MacOS12")
        >>> class MyIterableDataset(torch.utils.data.IterableDataset):
        ...     def __init__(self, start, end):
        ...         super(MyIterableDataset).__init__()
        ...         assert end > start, "this example code only works with end >= start"
        ...         self.start = start
        ...         self.end = end
        ...
        ...     def __iter__(self):
        ...         worker_info = torch.utils.data.get_worker_info()
        ...         if worker_info is None:  # single-process data loading, return the full iterator
        ...             iter_start = self.start
        ...             iter_end = self.end
        ...         else:  # in a worker process
        ...             # split workload
        ...             per_worker = int(math.ceil((self.end - self.start) / float(worker_info.num_workers)))
        ...             worker_id = worker_info.id
        ...             iter_start = self.start + worker_id * per_worker
        ...             iter_end = min(iter_start + per_worker, self.end)
        ...         return iter(range(iter_start, iter_end))
        ...
        >>> # should give same set of data as range(3, 7), i.e., [3, 4, 5, 6].
        >>> ds = MyIterableDataset(start=3, end=7)

        >>> # Single-process loading
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=0)))
        [tensor([3]), tensor([4]), tensor([5]), tensor([6])]

        >>> # xdoctest: +REQUIRES(POSIX)
        >>> # Mult-process loading with two worker processes
        >>> # Worker 0 fetched [3, 4].  Worker 1 fetched [5, 6].
        >>> # xdoctest: +IGNORE_WANT("non deterministic")
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=2)))
        [tensor([3]), tensor([5]), tensor([4]), tensor([6])]

        >>> # With even more workers
        >>> # xdoctest: +IGNORE_WANT("non deterministic")
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=12)))
        [tensor([3]), tensor([5]), tensor([4]), tensor([6])]

    Example 2: splitting workload across all workers using :attr:`worker_init_fn`::

        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_DATALOADER)
        >>> class MyIterableDataset(torch.utils.data.IterableDataset):
        ...     def __init__(self, start, end):
        ...         super(MyIterableDataset).__init__()
        ...         assert end > start, "this example code only works with end >= start"
        ...         self.start = start
        ...         self.end = end
        ...
        ...     def __iter__(self):
        ...         return iter(range(self.start, self.end))
        ...
        >>> # should give same set of data as range(3, 7), i.e., [3, 4, 5, 6].
        >>> ds = MyIterableDataset(start=3, end=7)

        >>> # Single-process loading
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=0)))
        [3, 4, 5, 6]
        >>>
        >>> # Directly doing multi-process loading yields duplicate data
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=2)))
        [3, 3, 4, 4, 5, 5, 6, 6]

        >>> # Define a `worker_init_fn` that configures each dataset copy differently
        >>> def worker_init_fn(worker_id):
        ...     worker_info = torch.utils.data.get_worker_info()
        ...     dataset = worker_info.dataset  # the dataset copy in this worker process
        ...     overall_start = dataset.start
        ...     overall_end = dataset.end
        ...     # configure the dataset to only process the split workload
        ...     per_worker = int(math.ceil((overall_end - overall_start) / float(worker_info.num_workers)))
        ...     worker_id = worker_info.id
        ...     dataset.start = overall_start + worker_id * per_worker
        ...     dataset.end = min(dataset.start + per_worker, overall_end)
        ...

        >>> # Mult-process loading with the custom `worker_init_fn`
        >>> # Worker 0 fetched [3, 4].  Worker 1 fetched [5, 6].
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=2, worker_init_fn=worker_init_fn)))
        [3, 5, 4, 6]

        >>> # With even more workers
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=12, worker_init_fn=worker_init_fn)))
        [3, 4, 5, 6]
    r(   c                 $    t          | |g          S r,   )r   r-   s     r%   r.   zIterableDataset.__add__   s    T5M***r'   N)r/   r0   r1   r2   r   r   r.   r3   r'   r%   r   r   M   s@        j jX+WU^ + + + + + +r'   r   c                   J    e Zd ZU dZeedf         ed<   deddfdZd Zd Z	dS )	r   zDataset wrapping tensors.

    Each sample will be retrieved by indexing tensors along the first dimension.

    Args:
        *tensors (Tensor): tensors that have the same size of the first dimension.
    .tensorsr   Nc                 `    t          fdD                       s
J d            | _        d S )Nc              3   |   K   | ]6}d                               d           |                     d           k    V  7dS )r   N)size).0tensorr7   s     r%   	<genexpr>z)TensorDataset.__init__.<locals>.<genexpr>   sS       
 
5;GAJOOA&++a..0
 
 
 
 
 
r'   zSize mismatch between tensors)allr7   )r#   r7   s    `r%   __init__zTensorDataset.__init__   s_     
 
 
 
?F
 
 
 
 
 	+ 	+*	+ 	+ 	+ r'   c                 D    t          fd| j        D                       S )Nc              3   (   K   | ]}|         V  d S r,   r3   )r;   r<   r$   s     r%   r=   z,TensorDataset.__getitem__.<locals>.<genexpr>   s'      >>vVE]>>>>>>r'   )tupler7   r"   s    `r%   r&   zTensorDataset.__getitem__   s(    >>>>>>>>>>r'   c                 B    | j         d                             d          S Nr   )r7   r:   r#   s    r%   __len__zTensorDataset.__len__   s    |A##A&&&r'   )
r/   r0   r1   r2   r
   r   __annotations__r?   r&   rF   r3   r'   r%   r   r      sw           63; D    ? ? ?' ' ' ' 'r'   r   c                   r    e Zd ZU dZeeef         ed<   dee	         dee	         ddfdZ
d Zd	efd
Zd ZdS )r   a  Dataset as a stacking of multiple datasets.

    This class is useful to assemble different parts of complex input data, given as datasets.

    Example:
        >>> # xdoctest: +SKIP
        >>> images = ImageDataset()
        >>> texts = TextDataset()
        >>> tuple_stack = StackDataset(images, texts)
        >>> tuple_stack[0] == (images[0], texts[0])
        >>> dict_stack = StackDataset(image=images, text=texts)
        >>> dict_stack[0] == {'image': images[0], 'text': texts[0]}

    Args:
        *args (Dataset): Datasets for stacking returned as tuple.
        **kwargs (Dataset): Datasets for stacking returned as dict.
    datasetsargskwargsr   Nc                     |r^|rt          d          t          |d                    _        t           fd|D                       rt          d          | _        d S |rnt          |                                          }t          |d                    _        t           fd|D                       rt          d          | _        d S t          d          )NztSupported either ``tuple``- (via ``args``) or``dict``- (via ``kwargs``) like input/output, but both types are given.r   c              3   H   K   | ]}j         t          |          k    V  d S r,   _lengthlenr;   datasetr#   s     r%   r=   z(StackDataset.__init__.<locals>.<genexpr>   s0      DDG4<3w<</DDDDDDr'   zSize mismatch between datasetsc              3   H   K   | ]}j         t          |          k    V  d S r,   rN   rQ   s     r%   r=   z(StackDataset.__init__.<locals>.<genexpr>   s0      CCG4<3w<</CCCCCCr'   z%At least one dataset should be passed)
ValueErrorrP   rO   anyrI   listvalues)r#   rJ   rK   tmps   `   r%   r?   zStackDataset.__init__   s     	F  ^   tAw<<DLDDDDtDDDDD C !ABBB DMMM 	Fv}}''Cs1v;;DLCCCCsCCCCC C !ABBB"DMMMDEEEr'   c                     t          | j        t                    r%fd| j                                        D             S t	          fd| j        D                       S )Nc                 (    i | ]\  }}||         S r3   r3   )r;   krR   r$   s      r%   
<dictcomp>z,StackDataset.__getitem__.<locals>.<dictcomp>  s#    NNN*!WAwu~NNNr'   c              3   (   K   | ]}|         V  d S r,   r3   )r;   rR   r$   s     r%   r=   z+StackDataset.__getitem__.<locals>.<genexpr>  s'      AAWU^AAAAAAr'   )
isinstancerI   dictitemsrB   r"   s    `r%   r&   zStackDataset.__getitem__  se    dmT** 	ONNNN8K8K8M8MNNNNAAAA4=AAAAAAr'   indicesc           	         t          | j        t                    rd |D             }| j                                        D ]\  }}t	          t          |dd                     r|                    |          }t          |          t          |          k    r/t          dt          |           dt          |                     t          ||          D ]
\  }}|||<   t          ||          D ]\  }}||         ||<   |S d |D             }	| j        D ]}t	          t          |dd                     r|                    |          }t          |          t          |          k    r/t          dt          |           dt          |                     t          ||	          D ]\  }}
|

                    |           t          ||	          D ] \  }}
|

                    ||                    !d |	D             }|S )Nc                     g | ]}i S r3   r3   r;   _s     r%   
<listcomp>z-StackDataset.__getitems__.<locals>.<listcomp>
  s    (=(=(=(=(=(=r'   __getitems__z0Nested dataset's output size mismatch. Expected z, got c                     g | ]}g S r3   r3   rd   s     r%   rf   z-StackDataset.__getitems__.<locals>.<listcomp>  s    !6!6!6"!6!6!6r'   c                 ,    g | ]}t          |          S r3   )rB   )r;   samples     r%   rf   z-StackDataset.__getitems__.<locals>.<listcomp>)  s    &N&N&NuV}}&N&N&Nr'   )r^   rI   r_   r`   callablegetattrrg   rP   rT   zipappend)r#   ra   
dict_batchr[   rR   r`   datad_sampleidx
list_batcht_sampletuple_batchs               r%   rg   zStackDataset.__getitems__  sw   dmT** 	(=(=W(=(=(=J"m1133 3 3
7GG^TBBCC 3#0099E5zzS\\11(J),WJ J=@ZZJ J   +.eZ*@*@ + +h&*+ *-Wj)A)A 3 3X&-cl3 "7!6g!6!6!6
} 	2 	2G>>?? 2,,W55u::W--$F%(\\F F9<UF F   '*%&<&< * *ND(OOD))))* &)*%=%= 2 2MCOOGCL11112&N&N:&N&N&Nr'   c                     | j         S r,   )rO   rE   s    r%   rF   zStackDataset.__len__,  s
    |r'   )r/   r0   r1   r2   r   rB   r_   rG   r   r   r?   r&   rV   rg   rF   r3   r'   r%   r   r      s          $ E4K    Fgen F F4 F F F F(B B B
#D # # # #J    r'   r   c                        e Zd ZU dZeee                  ed<   ee         ed<   e	d             Z
dee         ddf fdZd Zd	 Ze ed
e          d                         Z xZS )r   zDataset as a concatenation of multiple datasets.

    This class is useful to assemble different existing datasets.

    Args:
        datasets (sequence): List of datasets to be concatenated
    rI   cumulative_sizesc                 p    g d}}| D ].}t          |          }|                    ||z              ||z  }/|S rD   )rP   rn   )sequencersels        r%   cumsumzConcatDataset.cumsum<  sH    11 	 	AAAHHQUOOOFAAr'   r   Nc                 D   t                                                       t          |          | _        t	          | j                  dk    s
J d            | j        D ]!}t          |t                    r
J d            "|                     | j                  | _        d S )Nr   z(datasets should not be an empty iterablez.ConcatDataset does not support IterableDataset)	superr?   rV   rI   rP   r^   r   r   rx   )r#   rI   d	__class__s      r%   r?   zConcatDataset.__init__E  s    X4=!!A%%%'Q%%% 	@ 	@A!?  @ @?@ @ @ @ !%DM : :r'   c                     | j         d         S )Nrx   rE   s    r%   rF   zConcatDataset.__len__O  s    $R((r'   c                 
   |dk     r5| t          |           k    rt          d          t          |           |z   }t          j        | j        |          }|dk    r|}n|| j        |dz
           z
  }| j        |         |         S )Nr   z8absolute value of index should not exceed dataset length   )rP   rT   bisectbisect_rightrx   rI   )r#   rr   dataset_idx
sample_idxs       r%   r&   zConcatDataset.__getitem__R  s    77tc$ii N   d))c/C)$*?EE!JJt4[1_EEJ}[)*55r'   z>`cummulative_sizes` attribute is renamed to `cumulative_sizes`)categoryc                     | j         S r,   r   rE   s    r%   cummulative_sizeszConcatDataset.cummulative_sizes`  s     $$r'   )r/   r0   r1   r2   r   r   r   rG   intstaticmethodr   r   r?   rF   r&   propertyr   FutureWarningr   __classcell__r   s   @r%   r   r   0  s           75>""""3i  \;'!2 ;t ; ; ; ; ; ;) ) )6 6 6 ZH  % %	  X
% % % % %r'   r   c                   D     e Zd ZdZdee         ddf fdZd Zd Z xZ	S )r   a_  Dataset for chaining multiple :class:`IterableDataset` s.

    This class is useful to assemble different existing dataset streams. The
    chaining operation is done on-the-fly, so concatenating large-scale
    datasets with this class will be efficient.

    Args:
        datasets (iterable of IterableDataset): datasets to be chained together
    rI   r   Nc                 V    t                                                       || _        d S r,   )r   r?   rI   )r#   rI   r   s     r%   r?   zChainDataset.__init__t  s$     r'   c              #   l   K   | j         D ])}t          |t                    s
J d            |E d {V  *d S )N*ChainDataset only supports IterableDataset)rI   r^   r   )r#   r   s     r%   __iter__zChainDataset.__iter__x  sd       	 	A?  < <;< < < LLLLLLLL		 	r'   c                     d}| j         D ]3}t          |t                    s
J d            |t          |          z  }4|S )Nr   r   )rI   r^   r   rP   )r#   totalr   s      r%   rF   zChainDataset.__len__  s^     	 	A?  < <;< < < SVVOEEr'   )
r/   r0   r1   r2   r   r   r?   r   rF   r   r   s   @r%   r   r   i  sx         !'!2 !t ! ! ! ! ! !        r'   r   c                       e Zd ZU dZee         ed<   ee         ed<   dee         dee         ddfdZ	d Z
dee         dee         fdZd	 ZdS )
r   z
    Subset of a dataset at specified indices.

    Args:
        dataset (Dataset): The whole Dataset
        indices (sequence): Indices in the whole set selected for subset
    rR   ra   r   Nc                 "    || _         || _        d S r,   rR   ra   )r#   rR   ra   s      r%   r?   zSubset.__init__  s    r'   c                      t          |t                    r j         fd|D                      S  j         j        |                  S )Nc                 *    g | ]}j         |         S r3   ra   )r;   ir#   s     r%   rf   z&Subset.__getitem__.<locals>.<listcomp>  s     > > >Qa > > >r'   )r^   rV   rR   ra   )r#   rr   s   ` r%   r&   zSubset.__getitem__  sL    c4   	@< > > > ># > > >??|DL-..r'   c                      t          t           j        dd                     r& j                             fd|D                       S  fd|D             S )Nrg   c                 *    g | ]}j         |         S r3   r   r;   rr   r#   s     r%   rf   z'Subset.__getitems__.<locals>.<listcomp>  s     -S-S-SCdl3.?-S-S-Sr'   c                 @    g | ]}j         j        |                  S r3   r   r   s     r%   rf   z'Subset.__getitems__.<locals>.<listcomp>  s'    GGGDLc!23GGGr'   )rk   rl   rR   rg   )r#   ra   s   ` r%   rg   zSubset.__getitems__  si     GDL.$??@@ 	H<,,-S-S-S-S7-S-S-STTTGGGGwGGGGr'   c                 *    t          | j                  S r,   )rP   ra   rE   s    r%   rF   zSubset.__len__  s    4<   r'   )r/   r0   r1   r2   r   r   rG   r	   r   r?   r&   r   rg   rF   r3   r'   r%   r   r     s           U^c] # 4    / / /
HDI H$u+ H H H H! ! ! ! !r'   r   rR   lengths	generatorr   c                    
 t          j        t          |          d          rt          |          dk    rg }t          |          D ]j\  }}|dk     s|dk    rt	          d| d          t          t          j        t                     |z                      }|                    |           kt                     t          |          z
  }t          |          D ]$}|t          |          z  }||xx         dz  cc<   %|}t          |          D ]#\  }}	|	dk    rt          j        d| d           $t          |          t                     k    rt	          d          t          t          |          |                                          
t          t          t
                   |          } 
fd	t!          t#          j        |          |          D             S )
a  
    Randomly split a dataset into non-overlapping new datasets of given lengths.

    If a list of fractions that sum up to 1 is given,
    the lengths will be computed automatically as
    floor(frac * len(dataset)) for each fraction provided.

    After computing the lengths, if there are any remainders, 1 count will be
    distributed in round-robin fashion to the lengths
    until there are no remainders left.

    Optionally fix the generator for reproducible results, e.g.:

    Example:
        >>> # xdoctest: +SKIP
        >>> generator1 = torch.Generator().manual_seed(42)
        >>> generator2 = torch.Generator().manual_seed(42)
        >>> random_split(range(10), [3, 7], generator=generator1)
        >>> random_split(range(30), [0.3, 0.3, 0.4], generator=generator2)

    Args:
        dataset (Dataset): Dataset to be split
        lengths (sequence): lengths or fractions of splits to be produced
        generator (Generator): Generator used for the random permutation.
    r   r   zFraction at index z is not between 0 and 1zLength of split at index z- is 0. This might result in an empty dataset.zDSum of input lengths does not equal the length of the input dataset!)r   c                 L    g | ] \  }}t          ||z
  |                   !S r3   )r   )r;   offsetlengthrR   ra   s      r%   rf   z random_split.<locals>.<listcomp>  sE       FF 	w& 89::  r'   )mathisclosesum	enumeraterT   r   floorrP   rn   rangewarningswarnr   tolistr   r	   rm   	itertools
accumulate)rR   r   r   subset_lengthsr   fracn_items_in_split	remainderidx_to_add_atr   ra   s   `         @r%   r   r     s   < |CLL!$$ W):):$& )) 	4 	4GAtaxx4!88 !Pa!P!P!PQQQ"
3w<<$.//    !!"23333LL3~#6#66	y!! 	/ 	/AN 3 33M=)))Q.)))) "7++ 	 	IAv{{> > > >   7||s7||##R
 
 	
 s7||y999@@BBG8C='**G    !)"6w"?"?II   r'   )'r   r   r   r   typingr   r   r   r   r   r   r	   r
   r   r   typing_extensionsr   torchr   r   r   r   __all__r   r   str_T_dict_T_tupler   r   r   r   r   r   r   r   r   floatr   r3   r'   r%   <module>r      s                                 ) ( ( ( ( ( A @ @ @ @ @ @ @ @ @ @ @	 	 	 WT]]4(((
sEz
7:x11, , , , ,gen , , ,Dn+ n+ n+ n+ n+genhuo n+ n+ n+h' ' ' ' 'GE&#+./ ' ' '0T T T T T78$ T T Tn6% 6% 6% 6% 6%GEN 6% 6% 6%r    ?   @! ! ! ! !WU^ ! ! !H &7? ?R[?eCJ'(? 	"? 
&*	? ? ? ? ? ?r'   