
    ڧg                         d dl Z d dlmZ d dlmZmZmZmZmZm	Z	m
Z
 d dlmZ ddlmZmZ ddlmZ ddlmZ  G d	 d
e          ZdS )    N)Path)AnyCallableDictListOptionalTupleUnion)Tensor   )find_classesmake_dataset)
VideoClips)VisionDatasetc            !       P    e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 	 ddeeef         ded	ed
edee         dede	dee
         deeeef                  dededededededdf  fdZedeeef         fd            Zdee         dedede	dee         f
dZdefdZdedeeeef         fdZ xZS )UCF101a  
    `UCF101 <https://www.crcv.ucf.edu/data/UCF101.php>`_ dataset.

    UCF101 is an action recognition video dataset.
    This dataset consider every video as a collection of video clips of fixed size, specified
    by ``frames_per_clip``, where the step in frames between each clip is given by
    ``step_between_clips``. The dataset itself can be downloaded from the dataset website;
    annotations that ``annotation_path`` should be pointing to can be downloaded from `here
    <https://www.crcv.ucf.edu/data/UCF101/UCF101TrainTestSplits-RecognitionTask.zip>`_.

    To give an example, for 2 videos with 10 and 15 frames respectively, if ``frames_per_clip=5``
    and ``step_between_clips=5``, the dataset size will be (2 + 3) = 5, where the first two
    elements will come from video 1, and the next three elements from video 2.
    Note that we drop clips which do not have exactly ``frames_per_clip`` elements, so not all
    frames in a video might be present.

    Internally, it uses a VideoClips object to handle clip creation.

    Args:
        root (str or ``pathlib.Path``): Root directory of the UCF101 Dataset.
        annotation_path (str): path to the folder containing the split files;
            see docstring above for download instructions of these files
        frames_per_clip (int): number of frames in a clip.
        step_between_clips (int, optional): number of frames between each clip.
        fold (int, optional): which fold to use. Should be between 1 and 3.
        train (bool, optional): if ``True``, creates a dataset from the train split,
            otherwise from the ``test`` split.
        transform (callable, optional): A function/transform that takes in a TxHxWxC video
            and returns a transformed version.
        output_format (str, optional): The format of the output video tensors (before transforms).
            Can be either "THWC" (default) or "TCHW".

    Returns:
        tuple: A 3-tuple with the following entries:

            - video (Tensor[T, H, W, C] or Tensor[T, C, H, W]): The `T` video frames
            -  audio(Tensor[K, L]): the audio frames, where `K` is the number of channels
               and `L` is the number of points
            - label (int): class of the video clip
    r   NTr   THWCrootannotation_pathframes_per_clipstep_between_clips
frame_ratefoldtrain	transform_precomputed_metadatanum_workers_video_width_video_height_video_min_dimension_audio_samplesoutput_formatreturnc                    t                                          |           d|cxk    rdk    sn t          d|           d}|| _        || _        t          | j                  \  | _        }t          | j        ||d           | _	        d | j	        D             }t          |||||	|
|||||          }|| _        |                     ||||          | _        |                    | j                  | _        || _        d S )Nr      z$fold should be between 1 and 3, got )avi)is_valid_filec                     g | ]
}|d          S )r    .0xs     W/var/www/html/ai-engine/env/lib/python3.11/site-packages/torchvision/datasets/ucf101.py
<listcomp>z#UCF101.__init__.<locals>.<listcomp>R   s    111qad111    )r   r   r   r    r!   r"   )super__init__
ValueErrorr   r   r   r   classesr   samplesr   full_video_clips_select_foldindicessubsetvideo_clipsr   )selfr   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   
extensionsclass_to_idx
video_listr9   	__class__s                       r-   r1   zUCF101.__init__6   s!   $ 	D~~~~A~~~~JDJJKKK
	
%1$)%<%<"l#DI|ZW[\\\11DL111
 !#%'!5)'
 
 
  !,((_dERR&--dl;;"r/   c                     | j         j        S N)r5   metadatar:   s    r-   rA   zUCF101.metadatah   s    $--r/   r=   c                    
 |rdnd}| d|dd}t           j                            ||          }t                      
t	          |          5 }|                                }d |D             } fd|D             }
                    |           d d d            n# 1 swxY w Y   
fdt          t                              D             }	|	S )	Nr   testlist02dz.txtc                 h    g | ]/}|                                                     d           d         0S ) r   )stripsplitr*   s     r-   r.   z'UCF101._select_fold.<locals>.<listcomp>s   s1    :::AGGIIOOC((+:::r/   c                 n    g | ]1}t          j        j        j        g|                    d           R  2S )/)ospathjoinr   rJ   )r+   r,   r:   s     r-   r.   z'UCF101._select_fold.<locals>.<listcomp>t   s9    IIIqBGL:QWWS\\:::IIIr/   c                 (    g | ]}|         v |S r)   r)   )r+   iselected_filesr=   s     r-   r.   z'UCF101._select_fold.<locals>.<listcomp>v   s'    XXX
18W8W18W8W8Wr/   )	rM   rN   rO   setopen	readlinesupdaterangelen)r:   r=   r   r   r   nameffiddatar7   rR   s   ``        @r-   r6   zUCF101._select_foldl   s'   +wwV**D****GLL$//!WW 	(==??D::T:::DIIIIDIIID!!$'''		( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	(
 YXXXXeC
OO44XXXs   AB  B$'B$c                 4    | j                                         S r@   )r9   	num_clipsrB   s    r-   __len__zUCF101.__len__y   s    ))+++r/   idxc                     | j                             |          \  }}}}| j        | j        |                  d         }| j        |                     |          }|||fS )Nr   )r9   get_clipr4   r7   r   )r:   r`   videoaudioinfo	video_idxlabels          r-   __getitem__zUCF101.__getitem__|   s_    (,(8(A(A#(F(F%udIT\)45a8>%NN5))EeU""r/   )r   Nr   TNNr   r   r   r   r   r   )__name__
__module____qualname____doc__r
   strr   intr   boolr   r   r   r1   propertyrA   r   r6   r_   r	   r   rh   __classcell__)r>   s   @r-   r   r      s       ' '\ #$$((,:>$%#!0# 0#CI0# 0# 	0#
  0# SM0# 0# 0# H%0#  (S#X70# 0# 0# 0# "0# 0#  !0#" 
#0# 0# 0# 0# 0# 0#d .$sCx. . . . X.tCy 3 c Z^ cghkcl    , , , , ,#s #uVVS-@'A # # # # # # # #r/   r   )rM   pathlibr   typingr   r   r   r   r   r	   r
   torchr   folderr   r   video_utilsr   visionr   r   r)   r/   r-   <module>rx      s    				       D D D D D D D D D D D D D D D D D D       . . . . . . . . # # # # # # ! ! ! ! ! !w# w# w# w# w#] w# w# w# w# w#r/   