
    ڧgS                         d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	m
Z
mZ d dlmZ ddlmZmZ ddlmZ ddlmZ  G d	 d
e          ZdS )    N)Path)AnyCallableDictListOptionalTupleUnion)Tensor   )find_classesmake_dataset)
VideoClips)VisionDatasetc            !       f    e Zd ZdZdZdddZdZdZ	 	 	 	 	 	 	 	 	 	 	 	 d$dee	e
f         de	dededee         dededee         deee	ef                  dededededede	ddf  fdZedee	ef         fd            Zdee	         de	dededee         f
d Zdefd!Zd"edeeeef         fd#Z xZS )%HMDB51a  
    `HMDB51 <https://serre-lab.clps.brown.edu/resource/hmdb-a-large-human-motion-database/>`_
    dataset.

    HMDB51 is an action recognition video dataset.
    This dataset consider every video as a collection of video clips of fixed size, specified
    by ``frames_per_clip``, where the step in frames between each clip is given by
    ``step_between_clips``.

    To give an example, for 2 videos with 10 and 15 frames respectively, if ``frames_per_clip=5``
    and ``step_between_clips=5``, the dataset size will be (2 + 3) = 5, where the first two
    elements will come from video 1, and the next three elements from video 2.
    Note that we drop clips which do not have exactly ``frames_per_clip`` elements, so not all
    frames in a video might be present.

    Internally, it uses a VideoClips object to handle clip creation.

    Args:
        root (str or ``pathlib.Path``): Root directory of the HMDB51 Dataset.
        annotation_path (str): Path to the folder containing the split files.
        frames_per_clip (int): Number of frames in a clip.
        step_between_clips (int): Number of frames between each clip.
        fold (int, optional): Which fold to use. Should be between 1 and 3.
        train (bool, optional): If ``True``, creates a dataset from the train split,
            otherwise from the ``test`` split.
        transform (callable, optional): A function/transform that takes in a TxHxWxC video
            and returns a transformed version.
        output_format (str, optional): The format of the output video tensors (before transforms).
            Can be either "THWC" (default) or "TCHW".

    Returns:
        tuple: A 3-tuple with the following entries:

            - video (Tensor[T, H, W, C] or Tensor[T, C, H, W]): The `T` video frames
            - audio(Tensor[K, L]): the audio frames, where `K` is the number of channels
              and `L` is the number of points
            - label (int): class of the video clip
    zJhttps://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/hmdb51_org.rarzQhttps://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/test_train_splits.rar 15e67781e70dcfbdce2d7dbb9b3344b5)urlmd5r      NTr   THWCrootannotation_pathframes_per_clipstep_between_clips
frame_ratefoldtrain	transform_precomputed_metadatanum_workers_video_width_video_height_video_min_dimension_audio_samplesoutput_formatreturnc                    t                                          |           |dvrt          d|           d}t          | j                  \  | _        }t          | j        ||          | _        d | j        D             }t          |||||	|
|||||          }|| _	        || _
        || _        |                     ||||          | _        |                    | j                  | _        || _        d S )N)r   r      z$fold should be between 1 and 3, got )avic                     g | ]\  }}|S  r,   ).0path_s      W/var/www/html/ai-engine/env/lib/python3.11/site-packages/torchvision/datasets/hmdb51.py
<listcomp>z#HMDB51.__init__.<locals>.<listcomp>[   s    :::	qt:::    )r!   r"   r#   r$   r%   r&   )super__init__
ValueErrorr   r   classesr   samplesr   full_video_clipsr   r   _select_foldindicessubsetvideo_clipsr   )selfr   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   
extensionsclass_to_idxvideo_pathsr<   	__class__s                       r0   r4   zHMDB51.__init__=   s   $ 	y  JDJJKKK
%1$)%<%<"l#I
 
 ;:T\::: !#%'!5)'
 
 
  !,	
((otUSS&--dl;;"r2   c                     | j         j        S N)r8   metadatar=   s    r0   rD   zHMDB51.metadatas   s    $--r2   
video_listannotations_dirc                 \   |r| j         n| j        }d| d}t          j                            ||          }t          j        |          }t                      }	|D ]}
t          |
          5 }|                                }d d d            n# 1 swxY w Y   |D ]C}|	                                \  }}t          |          }||k    r|	                    |           Dg }t          |          D ];\  }}t          j                            |          |	v r|                    |           <|S )Nz*test_splitz.txt)	TRAIN_TAGTEST_TAGosr.   joinglobsetopen	readlinessplitintadd	enumeratebasenameappend)r=   rF   rG   r   r   
target_tagsplit_pattern_namesplit_pattern_pathannotation_pathsselected_filesfilepathfidlineslinevideo_filename
tag_stringtagr:   video_index
video_paths                       r0   r9   zHMDB51._select_foldw   sw   ',?T^^$-
54555W\\/;MNN9%788( 	7 	7Hh (3( ( ( ( ( ( ( ( ( ( ( ( ( ( ( 7 7-1ZZ\\*
*oo*$$"&&~666	7 '0'<'< 	, 	,#Kw
++~=={+++s   ,BB	B	c                 4    | j                                         S rC   )r<   	num_clipsrE   s    r0   __len__zHMDB51.__len__   s    ))+++r2   idxc                     | j                             |          \  }}}}| j        |         }| j        |         \  }}| j        |                     |          }|||fS rC   )r<   get_clipr:   r7   r   )r=   rh   videoaudior/   	video_idxsample_indexclass_indexs           r0   __getitem__zHMDB51.__getitem__   sc    %)%5%>%>s%C%C"ua|I.l3;>%NN5))Ee[((r2   )r   Nr   TNNr   r   r   r   r   r   )__name__
__module____qualname____doc__data_urlsplitsrI   rJ   r
   strr   rR   r   boolr   r   r   r4   propertyrD   r   r9   rg   r	   r   rp   __classcell__)rA   s   @r0   r   r      s       % %N \Hb1 F IH #$$((,:>$%#!4# 4#CI4# 4# 	4#
  4# SM4# 4# 4# H%4#  (S#X74# 4# 4# 4# "4# 4#  !4#" 
#4# 4# 4# 4# 4# 4#l .$sCx. . . . X.tCy 3 c Z^ cghkcl    ,, , , , ,)s )uVVS-@'A ) ) ) ) ) ) ) )r2   r   )rM   rK   pathlibr   typingr   r   r   r   r   r	   r
   torchr   folderr   r   video_utilsr   visionr   r   r,   r2   r0   <module>r      s     				       D D D D D D D D D D D D D D D D D D       . . . . . . . . # # # # # # ! ! ! ! ! !K) K) K) K) K)] K) K) K) K) K)r2   