
    g*                         d Z ddlZddlmZmZmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZmZmZmZmZmZ dd	lmZmZ d
dlmZ  ej        e          Z G d de          ZdS )zq
Processor class for InstructBLIP. Largely copy of Blip2Processor with addition of a tokenizer for the Q-Former.
    N)ListOptionalUnion   )BatchFeature)
VideoInput)ProcessorMixin)
AddedTokenBatchEncodingPaddingStrategyPreTokenizedInput	TextInputTruncationStrategy)
TensorTypelogging   )AutoTokenizerc            $           e Zd ZdZg dZdgZdZdZdZd" fd	Z		 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d#de
deeeee         ee         f         dedeeeef         deeeef         dee         dedee         dee         dededededededeeeef                  def"dZd Zd Zed             Z fd Ze fd!            Z xZS )$InstructBlipVideoProcessora  
    Constructs an InstructBLIPVideo processor which wraps a InstructBLIP image processor and a LLaMa/T5 tokenizer into a single
    processor.

    [`InstructBlipVideoProcessor`] offers all the functionalities of [`InstructBlipVideoImageProcessor`] and [`AutoTokenizer`]. See the
    docstring of [`~InstructBlipVideoProcessor.__call__`] and [`~InstructBlipVideoProcessor.decode`] for more information.

    Args:
        image_processor (`InstructBlipVideoImageProcessor`):
            An instance of [`InstructBlipVideoImageProcessor`]. The image processor is a required input.
        tokenizer (`AutoTokenizer`):
            An instance of ['PreTrainedTokenizer`]. The tokenizer is a required input.
        qformer_tokenizer (`AutoTokenizer`):
            An instance of ['PreTrainedTokenizer`]. The Q-Former tokenizer is a required input.
        num_query_tokens (`int`, *optional*):
            Number of tokens used by the Qformer as queries, should be same as in model's config.
    )image_processor	tokenizerqformer_tokenizernum_query_tokensInstructBlipVideoImageProcessorr   Nc                     t          ddd          | _        |                    | j        gd           || _        t	                                          |||           d S )Nz<video>FT)
normalizedspecial)special_tokens)r
   video_token
add_tokensr   super__init__)selfr   r   r   r   kwargs	__class__s         ~/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/models/instructblipvideo/processing_instructblipvideo.pyr"   z#InstructBlipVideoProcessor.__init__A   sd    %iE4PPPd./EEE 0)5FGGGGG    TFr   imagestextadd_special_tokenspadding
truncation
max_lengthstridepad_to_multiple_ofreturn_attention_maskreturn_overflowing_tokensreturn_special_tokens_maskreturn_offsets_mappingreturn_token_type_idsreturn_lengthverbosereturn_tensorsreturnc                    ||t          d          t                      }|t          |t                    r|g}n?t          |t                    s*t          |d         t                    st          d           | j        d||||||||	|
|||||dd|}| j        s|qi }| j        j        | j        z  dz  }|                     |gt          |          z  dd          }|D ]+}d	 t          ||         ||                   D             ||<   ,n|}|t                              d
           t          ||          }|                    |            | j        d||||||||	|
||||||d|}|                    d          |d<   |                    d          |d<   |,|                     ||          }|                    |           |S )a%  
        This method uses [`InstructBlipVideoImageProcessor.__call__`] method to prepare image(s) or video(s) for the model, and
        [`BertTokenizerFast.__call__`] to prepare text for the model.

        Please refer to the docstring of the above two methods for more information.
        Nz3You have to specify at least one of images or text.r   zAInvalid input text. Please provide a string, or a list of strings)r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7      F)r*   r7   c                     g | ]
\  }}||z   S  r<   ).0img_encodingtxt_encodings      r&   
<listcomp>z7InstructBlipVideoProcessor.__call__.<locals>.<listcomp>   s1     ( ( (6L, %|3( ( (r'   aK  Expanding inputs for video tokens in InstructBLIPVideo should be done in processing. Please follow instruction here (https://gist.github.com/zucchini-nlp/65f22892b054dc0d68228af56fbeaac2) to update your InstructBLIPVideo model. Using processors without these attributes in the config is deprecated and will throw an error in v4.47.)tensor_type	input_idsqformer_input_idsattention_maskqformer_attention_mask)r7   r<   )
ValueErrorr   
isinstancestrlistr   r   r   contentlenziploggerwarning_oncer   updater   popr   )r#   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r$   encoding_text_encodingtext_encodingvideo_tokensvideo_token_encodingkqformer_text_encodingimage_encodings                             r&   __call__z#InstructBlipVideoProcessor.__call__G   s   4 >dlRSSS>>$$$ fvd++ fJtAw4L4L f !deee+T^ #5%%#5&;*C+E'=&;+#   ! N* $0V5G "$,t/DDqH  (,~~!NSYY.5Y] (6 ( ($ (  A( (:=>RST>UWefgWh:i:i( ( (M!$$ !/%''B   *-^TTTMOOM***$:D$: %#5%%#5&;*C+E'=&;+-% %  !% %!$ -B,E,Ek,R,RH()1F1J1JK[1\1\H-.!11&1XXNOON+++r'   c                 &     | j         j        |i |S )z
        This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.batch_decode`]. Please
        refer to the docstring of this method for more information.
        )r   batch_decoder#   argsr$   s      r&   r[   z'InstructBlipVideoProcessor.batch_decode   s    
 +t~*D;F;;;r'   c                 &     | j         j        |i |S )z
        This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.decode`]. Please refer to
        the docstring of this method for more information.
        )r   decoder\   s      r&   r_   z!InstructBlipVideoProcessor.decode   s    
 %t~$d5f555r'   c                     | j         j        }| j        j        }t          t                              ||z                       S N)r   model_input_namesr   rI   dictfromkeys)r#   tokenizer_input_namesimage_processor_input_namess      r&   rb   z,InstructBlipVideoProcessor.model_input_names   s<     !% @&*&:&L#DMM"7:U"UVVWWWr'   c                    t           j                            |          rt          d| d          t          j        |d           t           j                            |d          }| j                            |           d| j        v }|r| j        	                    d            t                      j        |fi |}|r| xj        dgz  c_        |S )NzProvided path (z#) should be a directory, not a fileT)exist_okr   )ospathisfilerF   makedirsjoinr   save_pretrained
attributesremover!   )r#   save_directoryr$   qformer_tokenizer_pathqformer_presentoutputsr%   s         r&   rn   z*InstructBlipVideoProcessor.save_pretrained   s    7>>.)) 	db~bbbccc
NT2222!#n>Q!R!R../EFFF .@ 	8O""#6777)%'').CCFCC 	5OO 344OOr'   c                      t                      j        |fi |}t          |t                    r|d         }t	          j        |d          }||_        |S )Nr   r   )	subfolder)r!   from_pretrainedrG   tupler   r   )clspretrained_model_name_or_pathr$   	processorr   r%   s        r&   rw   z*InstructBlipVideoProcessor.from_pretrained   se    +EGG+,ITTVTT	 i'' 	%!!I)9:Wcvwww&7	#r'   ra   )NNTFNNr   NNFFFFFTN) __name__
__module____qualname____doc__ro   valid_kwargsimage_processor_classtokenizer_classqformer_tokenizer_classr"   r   r   r   r   r   boolrH   r   r   r   intr   r   rY   r[   r_   propertyrb   rn   classmethodrw   __classcell__)r%   s   @r&   r   r   (   sD        $ GFFJ&'L=%O-H H H H H H "^b#'5:;?$(,004*/+0',&+#;?#l ll I0$y/4HYCZZ[l !	l
 tS/12l $%778l SMl l %SMl  (~l $(l %)l !%l  $l l  !l" !sJ!78#l& 
'l l l l^< < <6 6 6 X X XX    &     [    r'   r   )r   ri   typingr   r   r   image_processing_utilsr   image_utilsr   processing_utilsr	   tokenization_utils_baser
   r   r   r   r   r   utilsr   r   autor   
get_loggerr|   rM   r   r<   r'   r&   <module>r      s<    
			 ( ( ( ( ( ( ( ( ( ( 2 2 2 2 2 2 % % % % % % . . . . . .                ) ( ( ( ( ( ( (             
	H	%	%A A A A A A A A A Ar'   