
    NgE                        d Z ddlZddlZddlZddlmZ ddlZddlmZ	 ddl
mZ 	 ddlZej                            g d           ddlZ	  ej        ddd	           dZn# e$ r  ed
           dZY nw xY wn%# e$ rZ ee            ed           edZ[ww xY wddlmZ ddlmZ ddlmZ  eej                             dd                    Z! eej                             dd                    Z" eej                             dd                    Z#ej$        %                                dd            Z&d Z'd Z( G d de          Z)dS )a	   Dataset reader that wraps TFDS datasets

Wraps many (most?) TFDS image-classification datasets
from https://github.com/tensorflow/datasets
https://www.tensorflow.org/datasets/catalog/overview#image_classification

Hacked together by / Copyright 2020 Ross Wightman
    N)Optional)ImageGPU    F)drop_remainderzWarning: This version of tfds doesn't have the latest even_splits impl. Please update or use tfds-nightly for better fine-grained split behaviour.TzMPlease install tensorflow_datasets package `pip install tensorflow-datasets`.)load_class_map)Reader)SharedCountTFDS_TP_SIZE   TFDS_SHUFFLE_SIZEi    TFDS_PREFETCH_SIZEi   INTEGER_ACCURATE   c                 F    t           j                            | ||          S )N)channels
dct_method)tfimagedecode_jpeg)serialized_imagefeaturer   r   s       Y/var/www/html/ai-engine/env/lib/python3.11/site-packages/timm/data/readers/reader_tfds.pydecode_exampler   0   s*    8         c                      fdt          dz             D              fdt                    D             S )Nc                 :    g | ]}t          |z  z            S  )round).0innum_sampless     r   
<listcomp>z&even_split_indices.<locals>.<listcomp>:   s*    CCC%K!+,,CCCr   r   c           	      D    g | ]} d |          d|dz             dS )[:r   ]r   )r!   r"   
partitionssplits     r   r%   z&even_split_indices.<locals>.<listcomp>;   s?    NNNu;;z!};;z!a%'8;;;NNNr   )range)r+   r#   r$   r*   s   ```@r   even_split_indicesr-   9   sO    CCCCCeAEllCCCJNNNNNU1XXNNNNr   c                 ^    d| j         vri S | j         d         fdj        D             }|S )Nlabelc                 <    i | ]}|                     |          S r   )str2int)r!   r#   class_labels     r   
<dictcomp>z$get_class_labels.<locals>.<dictcomp>B   s)    III!A{**1--IIIr   )featuresnames)infoclass_to_idxr2   s     @r   get_class_labelsr8   >   sC    dm##	-(KIIII{7HIIILr   c                        e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fd	Zd Z	 ddee         fdZd Z	d Z
d Zd ZddZddZ xZS )
ReaderTfdsa   Wrap Tensorflow Datasets for use in PyTorch

    There several things to be aware of:
      * To prevent excessive samples being dropped per epoch w/ distributed training or multiplicity of
         dataloader workers, the train iterator wraps to avoid returning partial batches that trigger drop_last
         https://github.com/pytorch/pytorch/issues/33413
      * With PyTorch IterableDatasets, each worker in each replica operates in isolation, the final batch
        from each worker could be a different size. For training this is worked around by option above, for
        validation extra samples are inserted iff distributed mode is enabled so that the batches being reduced
        across replicas are of same size. This will slightly alter the results, distributed validation will not be
        100% correct. This is similar to common handling in DistributedSampler for normal Datasets but a bit worse
        since there are up to N * J extra samples with IterableDatasets.
      * The sharding (splitting of dataset into TFRecord) files imposes limitations on the number of
        replicas and dataloader workers you can use. For really small datasets that only contain a few shards
        you may have to train non-distributed w/ 1-2 dataloader workers. This is likely not a huge concern as the
        benefit of distributed training or fast dataloading should be much less for small datasets.
      * This wrapper is currently configured to return individual, decompressed image samples from the TFDS
        dataset. The augmentation (transforms) and batching is still done in PyTorch. It would be possible
        to specify TF augmentation fn and return augmented batches w/ some modifications to other downstream
        components.

    NtrainFr   r   *   r   RGBr/   r   c                 $   t                                                       || _        || _        || _        || _        || _        |	| _        |pt          | _	        |pt          | _        |pt          | _        |
| _        || _        || _        || _        t%          j        ||          | _        |r| j                                         d| _        |rt-          |          | _        d| _        n+| j        dk    rt1          | j        j                  ni | _        | j        j        j        |         | _        | j        j        | _        d| _        d| _        tA          j!                    rZtA          j"                    rGtA          j#                    dk    r0tA          j$                    | _        tA          j#                    | _        d| _%        d| _&        d| _'        d| _(        d| _)        d| _*        d| _+        tY                      | _-        | j        | _.        dS )a   Tensorflow-datasets Wrapper

        Args:
            root: root data dir (ie your TFDS_DATA_DIR. not dataset specific sub-dir)
            name: tfds dataset name (eg `imagenet2012`)
            split: tfds dataset split (can use all TFDS split strings eg `train[:10%]`)
            is_training: training mode, shuffle enabled, dataset len rounded by batch_size
            batch_size: batch_size to use to unsure total samples % batch_size == 0 in training across all dis nodes
            download: download and build TFDS dataset if set, otherwise must use tfds CLI
            repeats: iterate through (repeat) the dataset this many times per iteration (once if 0 or 1)
            seed: common seed for shard shuffle across all distributed/worker instances
            input_key: name of Feature to return as data (input)
            input_img_mode: image mode if input is an image (currently PIL mode string)
            target_key: name of Feature to return as target (label)
            target_img_mode: image mode if target is an image (currently PIL mode string)
            prefetch_size: override default tf.data prefetch buffer size
            shuffle_size: override default tf.data shuffle buffer size
            max_threadpool_size: override default threadpool size for tf.data
        )data_dirFTr/   r   r   N)/super__init__rootr+   is_training
batch_sizerepeatscommon_seedPREFETCH_SIZEprefetch_sizeSHUFFLE_SIZEshuffle_sizeMAX_TP_SIZEmax_threadpool_size	input_keyinput_img_mode
target_keytarget_img_modetfdsbuilderdownload_and_prepareremap_classr	   r7   r8   r6   splits
split_infonum_examplesr$   	dist_rankdist_num_replicasdistis_availableis_initializedget_world_sizeget_rankglobal_num_workersnum_workersworker_infoworker_seedsubsplitds
init_countr   epoch_countreinit_each_iter)selfnamerB   r+   	class_maprC   rD   downloadrE   seedrM   rN   rO   rP   rH   rJ   rL   	__class__s                    r   rA   zReaderTfds.__init__^   s   L 		
&$ +;m(8L#6#E+  #,$.|D4888 	0L--///  	j .y 9 9D#DGKZaGaGa 01B C C CgiD,+259?7 !" 	;4#6#8#8 	;T=P=R=RUV=V=V!]__DN%)%8%:%:D" #$&== !% 0r   c                     || j         _        d S N)rf   value)rh   counts     r   	set_epochzReaderTfds.set_epoch   s    !&r   r`   c                 V    | j         d S ||| _        | j        | j        z  | _        d S d S ro   )rd   r`   rY   r_   )rh   r`   s     r   set_loader_cfgzReaderTfds.set_loader_cfg   s@     7F"*D&*&<t?O&OD### #"r   c           
      ~   t           j        j                                        }d}d}||| _        |j        | _        |j        | _        | j        | j        z  | _	        | j
        | j        z  |j        z   }	 | j	        dk    o| j        j        | j	        k     p| j         }|rt          rWt!          | j        t"          j        j        j                  s-t+          | j        | j	        | j                  }||         | _        n,t#          j        | j        | j	                  }||         | _        d}| j	        dk    r3| j        ,t4          j                            | j	        || j                  }t#          j        | j        | j        j         z   d|          }| j!        "                    | j        p| j        | j        tG          tI          | j%        dk    rdnd	          
          |          }t4          j        &                                }	tO          |	d          rdnd}
tQ          d| j)        | j        z            tU          |	|
          _+        dtU          |	|
          _,        |-                    |	          }| j        s| j.        dk    r|/                                }| j        r<|0                    tc          | j        | j2                  | j	        z  | j                  }|3                    tc          | j        | j	        z  | j4                            }t#          j5        |          | _6        | xj7        dz  c_7        dS )a"   Lazily initialize the dataset.

        This is necessary to init the Tensorflow dataset pipeline in the (dataloader) process that
        will be using the dataset instance. The __init__ method is called on the main process,
        this will be called in a dataloader worker process.

        NOTE: There will be problems if you try to re-use this dataset across different loader/worker
        instances once it has been initialized. Do not call any dataset methods that can call _lazy_init
        before it is passed to dataloader.
        r   r   N)num_input_pipelinesinput_pipeline_idnum_replicas_in_syncT)shuffle_seed shuffle_reshuffle_each_iterationinput_contextLr   )r   )r   )r+   shuffle_filesdecodersread_config	threadingexperimental_threading)rl   )8torchutilsdataget_worker_infora   rl   rb   r`   rY   r_   rX   idrV   
num_shardsrC   has_buggy_even_splits
isinstancerQ   corerU   SubSplitInfor-   r+   r$   rc   even_splitsr   
distributeInputContext
ReadConfigrF   rf   rp   rR   
as_datasetdictr   rN   OptionshasattrmaxrL   getattrprivate_threadpool_sizemax_intra_op_parallelismwith_optionsrE   repeatshuffleminrJ   prefetchrH   as_numpyrd   re   )rh   ra   r`   global_worker_idshould_subsplit	subsplitsr{   r   rd   optionsthread_members              r   
_lazy_initzReaderTfds._lazy_init   s&    k&6688 "*D*/D*6D&*&<t?O&OD##~0@@;>Q	 #59 bO.1HH`PTP`L`  
@ ) @%doty7G7TUU D$6tz4CZ\`\l$m$m	(12B(C $ 0T=T U UI$-.>$?DM"Q&&4=+@M66$($;"2%)%; 7  M
 o)D,<,BB-1'
 
 

 \$$--4:*T=PTW=W=W]^ _ _ _```#	 % 
 
 '//##'.w'D'DbJbBEaIaeieuIuBvBv''?CD''@__W%% 	t|a// B 	xC 0$2CDDH__fjfvwwB[[T-1HH$J\]]^^-##1r   c                 2   t          d| j                  | j        z  t          | j        | j                  z  }| j        s| j        dk    rt          j        |          }| j        r$t          j        || j        z            | j        z  }t          |          S )Nr   )
r   rE   r$   r_   rY   rC   mathceilrD   int)rh   num_worker_sampless     r   _num_samples_per_workerz"ReaderTfds._num_samples_per_worker  s    4<  4#33c$:QSWSi6j6jj 	 	?t599!%+=!>!> 	c!%+=+O!P!PSWSb!b%&&&r   c              #     K   | j         | j        r|                                  |                                 }d}| j         D ]}|| j                 }| j        r@| j        dk    r|j        dk    r|d d d d df         }t          j        || j                  }|| j	                 }| j
        rt          j        || j
                  }n| j        r| j        |         }||fV  |dz  }| j        r||k    r n| j        s<| j        dk    r3| j        .d|cxk     r|k     r#n d S ||k     r||fV  |dz  }||k     d S d S d S d S d S d S )Nr   r|   r   )moder   )rd   rg   r   r   rM   rN   ndimr   	fromarrayrO   rP   rT   r7   rC   rY   rc   )rh   target_sample_countsample_countsample
input_datatarget_datas         r   __iter__zReaderTfds.__iter__  s     7?d3?OO #::<< g 	 	F/J" S&#--*/Q2F2F!+AAAqqq!G!4J"_Zd>QRRR
 1K# =#ok@TUUU! ="/<k))))AL L4G$G$G   	"D$:Q$>$>4=C\L6666#6666666
 !444 +----! !44444	" 	"$>$>C\C\66
 54r   c                 >    |                                  | j        z  }|S ro   )r   r`   )rh   r$   s     r   __len__zReaderTfds.__len__K  s!    2244t7GGr   c                     J d            )NFzNot supportedr   )rh   indexbasenameabsolutes       r   	_filenamezReaderTfds._filenameO  s    %o%%%r   c                    | j         |                                  g }| j         D ]b}t          |          | j        k    r nGd|v r	|d         }n$d|v r	|d         }nd|v r	|d         }n
J d            |                    |           c|S )z0 Return all filenames in dataset, overrides baseN	file_namefilenamer   FzNo supported name field present)rd   r   lenr$   append)rh   r   r   r5   r   ri   s         r   	filenameszReaderTfds.filenamesR  s    7?OOg 	 	F5zzD,,,f$$k*v%%j)d|?????LLr   )Nr;   NFr   Fr   r<   r   r=   r/   r   NNNro   )FF)__name__
__module____qualname____doc__rA   rr   r   r   rt   r   r   r   r   r   r   __classcell__)rm   s   @r   r:   r:   F   s%        4   $#W1 W1 W1 W1 W1 W1r' ' '
 *.P P!#P P P PP P Pd' ' '*" *" *"X  & & & &       r   r:   )r   r   )*r   r   ossystypingr   r   torch.distributeddistributedrZ   PILr   
tensorflowr   configset_visible_devicestensorflow_datasetsrQ   r   r   	TypeErrorprintImportErrorerj   r	   readerr
   shared_countr   r   environgetrK   rI   rG   decodemake_decoderr   r-   r8   r:   r   r   r   <module>r      sx     				 



                         I!!"e,,,&&&&%Qu5555 % % % % [ 	\ 	\ 	\ $%    	E!HHH	E
YZZZ
G
 & % % % % %       % % % % % % c"*..3344s2:>>"5t<<==BJNN#7>>??    O O O
  ] ] ] ] ] ] ] ] ] ]s;   #A9 
A  A9  A52A9 4A55A9 9B>BB