
    ڧg=C                     8   d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	m
Z
mZmZmZmZ d dlZd dlmZmZmZmZ ddlmZ  ed          Zej        fded	ed
ededef
dZddej        dedededej        f
dZ G d d          ZdedefdZ G d d          ZdS )    NFraction)	AnyCallablecastDictListOptionalTupleTypeVarUnion)_probe_video_from_file_read_video_from_file
read_videoread_video_timestamps   )tqdmTptstimebase_fromtimebase_to
round_funcreturnc                 D    t          | d          |z  |z  } ||          S )zconvert pts between different time bases
    Args:
        pts: presentation timestamp, float
        timebase_from: original timebase. Fraction
        timebase_to: new timebase. Fraction
        round_func: rounding function.
    r   r   )r   r   r   r   new_ptss        \/var/www/html/ai-engine/env/lib/python3.11/site-packages/torchvision/datasets/video_utils.pypts_convertr      s-     sA.<G:g    tensorsizestepdilationc                 X   |                                  dk    r$t          d|                                             |                     d          }|                                 }||z  ||z  f}|||dz
  z  dz   z
  |z  dz   |f}|d         dk     rd|f}t	          j        | ||          S )a   
    similar to tensor.unfold, but with the dilation
    and specialized for 1d tensors

    Returns all consecutive windows of `size` elements, with
    `step` between windows. The distance between each element
    in a window is given by `dilation`.
    r   z*tensor should have 1 dimension instead of r   )dim
ValueErrorstridenumeltorch
as_strided)r   r    r!   r"   o_strider'   
new_stridenew_sizes           r   unfoldr-      s     zz||qTfjjllTTUUU}}QHLLNNE/8h#67J(dQh/!34=A4HH{Qt9FHj999r   c                   r    e Zd ZdZdee         ddfdZdefdZdede	ee         e
e         f         fdZdS )	_VideoTimestampsDatasetz
    Dataset used to parallelize the reading of the timestamps
    of a list of videos, given their paths in the filesystem.

    Used in VideoClips and defined at top level, so it can be
    pickled when forking.
    video_pathsr   Nc                     || _         d S Nr0   )selfr0   s     r   __init__z _VideoTimestampsDataset.__init__8   s    &r   c                 *    t          | j                  S r2   lenr0   r4   s    r   __len__z_VideoTimestampsDataset.__len__;       4#$$$r   idxc                 6    t          | j        |                   S r2   )r   r0   )r4   r<   s     r   __getitem__z#_VideoTimestampsDataset.__getitem__>   s    $T%5c%:;;;r   )__name__
__module____qualname____doc__r	   strr5   intr:   r   r
   floatr>    r   r   r/   r/   /   s         'DI '$ ' ' ' '% % % % %<s <uT#Y-G'H < < < < < <r   r/   xc                     | S )zH
    Dummy collate function to be used with _VideoTimestampsDataset
    rF   )rG   s    r   _collate_fnrI   B   s	     Hr   c                      e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 	 d.dee         ded	ed
ee         dee	ee
f                  dededededededededdfdZd/dZde	ee
f         ddfdZede	ee
f         fd            Zdee         dd fdZe	 d0dej        dededee         d
ee         deej        eee         ej        f         f         fd             Zd0deded
ee         ddfd!Zdefd"Zdefd#Zdefd$Zd%edeeef         fd&Zeded'ed(edeeej        f         fd)            Zd%edeej        ej        e	ee
f         ef         fd*Zde	ee
f         fd+Zd,e	ee
f         ddfd-Z dS )1
VideoClipsaE  
    Given a list of video files, computes all consecutive subvideos of size
    `clip_length_in_frames`, where the distance between each subvideo in the
    same video is defined by `frames_between_clips`.
    If `frame_rate` is specified, it will also resample all the videos to have
    the same frame rate, and the clips will refer to this frame rate.

    Creating this instance the first time is time-consuming, as it needs to
    decode all the videos in `video_paths`. It is recommended that you
    cache the results after instantiation of the class.

    Recreating the clips for different clip lengths is fast, and can be done
    with the `compute_clips` method.

    Args:
        video_paths (List[str]): paths to the video files
        clip_length_in_frames (int): size of a clip in number of frames
        frames_between_clips (int): step (in frames) between each clip
        frame_rate (float, optional): if specified, it will resample the video
            so that it has `frame_rate`, and then the clips will be defined
            on the resampled video
        num_workers (int): how many subprocesses to use for data loading.
            0 means that the data will be loaded in the main process. (default: 0)
        output_format (str): The format of the output video tensors. Can be either "THWC" (default) or "TCHW".
       r   Nr   THWCr0   clip_length_in_framesframes_between_clips
frame_rate_precomputed_metadatanum_workers_video_width_video_height_video_min_dimension_video_max_dimension_audio_samples_audio_channelsoutput_formatr   c                 f   || _         || _        || _        || _        |	| _        |
| _        || _        || _        |                                | _	        | j	        dvrt          d| d          ||                                  n|                     |           |                     |||           d S )N)rM   TCHWz5output_format should be either 'THWC' or 'TCHW', got .)r0   rR   rS   rT   rU   rV   rW   rX   upperrY   r%   _compute_frame_pts_init_from_metadatacompute_clips)r4   r0   rN   rO   rP   rQ   rR   rS   rT   rU   rV   rW   rX   rY   s                 r   r5   zVideoClips.__init__d   s    " '& )*$8!$8!,.*0022%555eUbeeefff (##%%%%$$%:;;;02F
SSSSSr   c                    g | _         g | _        dd lj        j                            t          | j                  d| j        t                    }t          t          |                    5 }|D ]r}|                    d           t          t          |           \  }}fd|D             }| j                             |           | j                            |           s	 d d d            d S # 1 swxY w Y   d S )Nr   rL   )
batch_sizerR   
collate_fn)totalr   c                 >    g | ]} j         |j                   S )dtype)	as_tensorlong).0r   r(   s     r   
<listcomp>z1VideoClips._compute_frame_pts.<locals>.<listcomp>   s,    YYY_U_S
CCCYYYr   )	video_pts	video_fpstorch.utils.datautilsdata
DataLoaderr/   r0   rR   rI   r   r8   updatelistzipextend)r4   dlpbarbatch	batch_pts	batch_fpsr(   s         @r   r^   zVideoClips._compute_frame_pts   sW   &( 	 */+*:*E*E#D$455("	 +F +
 +
 B    		1D 1 1A'+CK'8'8$	9 ZYYYyYYY	%%i000%%i00001		1 		1 		1 		1 		1 		1 		1 		1 		1 		1 		1 		1 		1 		1 		1 		1 		1 		1s   0A6C44C8;C8metadatac                    |d         | _         t          | j                   t          |d                   k    sJ |d         | _        t          | j                   t          |d                   k    sJ |d         | _        d S )Nr0   rl   rm   )r0   r8   rl   rm   )r4   r{   s     r   r_   zVideoClips._init_from_metadata   s}    #M24#$$H[,A(B(BBBBB!+.4#$$H[,A(B(BBBBB!+.r   c                 0    | j         | j        | j        d}|S )Nr0   rl   rm   r~   )r4   	_metadatas     r   r{   zVideoClips.metadata   s(      +
 
	
 r   indicesc                      fd|D             } fd|D             } fd|D             }|||d} t                     | j         j         j        | j         j         j         j         j         j	         j
         j                  S )Nc                 *    g | ]}j         |         S rF   r3   rj   ir4   s     r   rk   z%VideoClips.subset.<locals>.<listcomp>   s!    <<<qt'*<<<r   c                 *    g | ]}j         |         S rF   )rl   r   s     r   rk   z%VideoClips.subset.<locals>.<listcomp>        8881T^A&888r   c                 *    g | ]}j         |         S rF   )rm   r   s     r   rk   z%VideoClips.subset.<locals>.<listcomp>   r   r   r~   )rN   rO   rP   rQ   rR   rS   rT   rU   rV   rW   rX   rY   )type
num_framesr!   rP   rR   rS   rT   rU   rV   rW   rX   rY   )r4   r   r0   rl   rm   r{   s   `     r   subsetzVideoClips.subset   s    <<<<G<<<8888888	8888888	&""
 

 tDzz"&/!%"*(*,!%!:!%!:. 0,
 
 
 	
r   rl   r   r!   fpsc                    |d}||}t          |           |z  |z  }t                              t          t	          j        |                    ||          }| |         } t          | ||          }|                                st          j	        d           t          |t                    r|gt          |          z  }nt          |||          }||fS )Nr   zThere aren't enough frames in the current video to get a clip for the given clip length and frames between clips. The video (and potentially others) will be skipped.)r8   rK   _resample_video_idxrD   mathfloorr-   r'   warningswarn
isinstanceslice)	rl   r   r!   r   rP   total_frames_idxsclipsidxss	            r   compute_clips_for_videoz"VideoClips.compute_clips_for_video   s     ; CJ9~~
2S8..s4:l3K3K/L/LcS]^^e$	y*d33{{}} 	M\  
 eU## 	37SZZ'DD%T22Dd{r   c                    || _         || _        || _        g | _        g | _        t          | j        | j                  D ]U\  }}|                     |||||          \  }}| j        	                    |           | j        	                    |           Vt          j        d | j        D                       }|                    d                                          | _        dS )a  
        Compute all consecutive sequences of clips from video_pts.
        Always returns clips of size `num_frames`, meaning that the
        last few frames in a video can potentially be dropped.

        Args:
            num_frames (int): number of frames for the clip
            step (int): distance between two clips
            frame_rate (int, optional): The frame rate
        c                 ,    g | ]}t          |          S rF   r8   rj   vs     r   rk   z,VideoClips.compute_clips.<locals>.<listcomp>   s    'C'C'C1A'C'C'Cr   r   N)r   r!   rP   r   resampling_idxsrt   rl   rm   r   appendr(   rh   cumsumtolistcumulative_sizes)	r4   r   r!   rP   rl   r   r   r   clip_lengthss	            r   r`   zVideoClips.compute_clips   s     %	$
!!$.$.AA 	. 	.NIs66y*dTWYcddKE4Je$$$ ''----'C'C
'C'C'CDD , 3 3A 6 6 = = ? ?r   c                 *    |                                  S r2   )	num_clipsr9   s    r   r:   zVideoClips.__len__   s    ~~r   c                 *    t          | j                  S r2   r7   r9   s    r   
num_videoszVideoClips.num_videos   r;   r   c                     | j         d         S )zJ
        Number of subclips that are available in the video list.
        )r   r9   s    r   r   zVideoClips.num_clips  s     $R((r   r<   c                 v    t          j        | j        |          }|dk    r|}n|| j        |dz
           z
  }||fS )zw
        Converts a flattened representation of the indices into a video_idx, clip_idx
        representation.
        r   r   )bisectbisect_rightr   )r4   r<   	video_idxclip_idxs       r   get_clip_locationzVideoClips.get_clip_location	  sJ    
 '(=sCC	>>HHT29q=AAH(""r   original_fpsnew_fpsc                     ||z  }|                                 r t          |          }t          d d |          S t          j        | t          j                  |z  }|                                                    t          j                  }|S )Nrf   )	
is_integerrD   r   r(   arangefloat32r   toint64)r   r   r   r!   r   s        r   r   zVideoClips._resample_video_idx  sx    g%?? 	+ t99DtT***|Jem<<<tCzz||u{++r   c                    ||                                  k    r(t          d| d|                                   d          |                     |          \  }}| j        |         }| j        |         |         }ddlm}  |            }|dk    r| j        dk    rt          d          | j	        dk    rt          d          | j
        dk    rt          d	          | j        dk    rt          d
          | j        dk    rt          d          |dk    rK|d                                         }|d                                         }	t          |||	          \  }
}}n`t          |          }|j        }d}t#          t$          |d                                                   }t#          t$          |d                                                   }d\  }}t'          dd          }t'          |j        j        |j        j                  }|j        rct'          |j        j        |j        j                  }t3          |||t4          j                  }t3          |||t4          j                  }|j        }t=          || j        | j	        | j
        | j        ||f|| j        | j        ||f|          \  }
}}d|i}|||d<   | j         J| j!        |         |         }tE          |tF          j$                  r||d         z
  }|
|         }
| j         |d<   tK          |
          | j&        k    sJ |
j'         d| j&                     | j(        dk    r|
)                    dddd          }
|
|||fS )a7  
        Gets a subclip from a list of videos.

        Args:
            idx (int): index of the subclip. Must be between 0 and num_clips().

        Returns:
            video (Tensor)
            audio (Tensor)
            info (Dict)
            video_idx (int): index of the video in `video_paths`
        zIndex z out of range (z number of clips)r   )get_video_backendpyavz.pyav backend doesn't support _video_width != 0z/pyav backend doesn't support _video_height != 0z6pyav backend doesn't support _video_min_dimension != 0z6pyav backend doesn't support _video_max_dimension != 0z0pyav backend doesn't support _audio_samples != 0r   N)r   r   r   )
video_widthvideo_heightvideo_min_dimensionvideo_max_dimensionvideo_pts_rangevideo_timebaseaudio_samplesaudio_channelsaudio_pts_rangeaudio_timebaserm   	audio_fpsz x r[         )*r   
IndexErrorr   r0   r   torchvisionr   rS   r%   rT   rU   rV   rW   itemr   r   rm   r   rD   r   r   	numeratordenominator	has_audior   r   r   r   ceilaudio_sample_rater   rX   rP   r   r   r(   Tensorr8   r   shaperY   permute)r4   r<   r   r   
video_pathclip_ptsr   backend	start_ptsend_ptsvideoaudioinfo_inform   r   video_start_ptsvideo_end_ptsaudio_start_ptsaudio_end_ptsr   r   _resampling_idxs                           r   get_clipzVideoClips.get_clip!  s    $..""""]c]]$..:J:J]]]^^^"44S99	8%i0
:i(2111111##%%f A%% !QRRR!Q&& !RSSS(A-- !YZZZ(A-- !YZZZ"a'' !STTTf ((**Irl''))G!+J	7!K!KE5$$*:66EII"3(8(8(:(:;;O hrl&7&7&9&9::M-2*O]%a^^N%e&:&DeFZFfggN 4!)%*>*H%J^Jj!k!k"-o~~_c_i"j"j +M>>[_[d e e!3	3 -!/$($=$($=!0- @-"1#3!0- @-  OE5!  +D$$-[!?&!1)<XFN.%,77 D!/.2C!C.)E $D5zzT_,,,.R.R.R.R,,,''MM!Q1--EeT9,,r   c                     d | j         D             }d | j         D             }|r(t          j        |          }|                                }| j                                        }||d<   ||d<   |d= |d= |d= d|d	<   |S )
Nc                 ,    g | ]}t          |          S rF   r   r   s     r   rk   z+VideoClips.__getstate__.<locals>.<listcomp>z  s    :::a3q66:::r   c                 L    g | ]!}|                     t          j                  "S rF   )r   r(   r   )rj   rG   s     r   rk   z+VideoClips.__getstate__.<locals>.<listcomp>  s&    ???1QTT%+&&???r   video_pts_sizesrl   r   r   r   r   _version)rl   r(   catnumpy__dict__copy)r4   r   rl   ds       r   __getstate__zVideoClips.__getstate__y  s    ::4>:::
 @????	 	*	),,I "))I M  .
"+ gJ  ! *r   r   c                    d|vr	|| _         d S t          j        |d         t          j                  }t          j        ||d         d          }|d= ||d<   || _         |                     | j        | j        | j                   d S )Nr   rl   rf   r   r   )r$   )	r   r(   rh   r   splitr`   r   r!   rP   )r4   r   rl   s      r   __setstate__zVideoClips.__setstate__  s    QDMFOAkN%+FFF	K	1->+?QGGG	 "+4?DItGGGGGr   )rL   r   NNr   r   r   r   r   r   r   rM   )r   Nr2   )!r?   r@   rA   rB   r	   rC   rD   r
   rE   r   r   r5   r^   r_   propertyr{   r   staticmethodr(   r   r   r   r   r   r`   r:   r   r   r   r   r   r   r   rF   r   r   rK   rK   I   s        : &($%&*:>$%$% ##T #T#Y#T  ##T "	#T
 UO#T  (S#X7#T #T #T #T "#T "#T #T #T #T 
#T #T #T #TJ1 1 1 14/DcN /t / / / / $sCx.    X
d3i 
L 
 
 
 
2 qu <-08;BJ5/_ghm_n	u|U4;#<==	>   \2@ @ @3 @HUO @_c @ @ @ @.         %C % % % %)3 ) ) ) )
#S 
#U38_ 
# 
# 
# 
# 	 	5 	5 	UZ[`bgbn[nUo 	 	 	 \	V-C V-E%,d3PS8nVY*Y$Z V- V- V- V-pd38n    8Hd38n H H H H H H Hr   rK   )r   ) r   r   r   	fractionsr   typingr   r   r   r   r	   r
   r   r   r   r(   torchvision.ior   r   r   r   ro   r   r   r   rD   r   r   r-   r/   rI   rK   rF   r   r   <module>r      s            S S S S S S S S S S S S S S S S S S S S S S  k k k k k k k k k k k k      GCLL bfak 	 	S 	 	 	V^ 	ps 	 	 	 	: :5< :s :# : :U\ : : : :(< < < < < < < <&1     ZH ZH ZH ZH ZH ZH ZH ZH ZH ZHr   