
    g&(                         d Z ddlZddlmZmZ ddlmZ ddlmZ ddl	m
Z
mZmZ ddlmZmZmZmZ dd	lmZ d
dlmZ  ej        e          Z G d de
d          Z G d de          ZdS )zq
Processor class for InstructBLIP. Largely copy of Blip2Processor with addition of a tokenizer for the Q-Former.
    N)ListUnion   )BatchFeature)
ImageInput)ProcessingKwargsProcessorMixinUnpack)
AddedTokenBatchEncodingPreTokenizedInput	TextInput)logging   )AutoTokenizerc            
       ,    e Zd Zdddddddddd	i dZdS )InstructBlipProcessorKwargsTFr   )	add_special_tokenspaddingstridereturn_overflowing_tokensreturn_special_tokens_maskreturn_offsets_mappingreturn_token_type_idsreturn_lengthverbose)text_kwargsimages_kwargsN)__name__
__module____qualname__	_defaults     t/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/models/instructblip/processing_instructblip.pyr   r   &   sF         #').*/&+%*"

 

  IIIr$   r   F)totalc            
            e Zd ZdZg dZdgZdZdZdZd fd	Z		 	 	 	 dde
d	eeeee         ee         f         d
ee         defdZd Zd Zed             Z fdZe fd            Z xZS )InstructBlipProcessora  
    Constructs an InstructBLIP processor which wraps a BLIP image processor and a LLaMa/T5 tokenizer into a single
    processor.

    [`InstructBlipProcessor`] offers all the functionalities of [`BlipImageProcessor`] and [`AutoTokenizer`]. See the
    docstring of [`~BlipProcessor.__call__`] and [`~BlipProcessor.decode`] for more information.

    Args:
        image_processor (`BlipImageProcessor`):
            An instance of [`BlipImageProcessor`]. The image processor is a required input.
        tokenizer (`AutoTokenizer`):
            An instance of ['PreTrainedTokenizer`]. The tokenizer is a required input.
        qformer_tokenizer (`AutoTokenizer`):
            An instance of ['PreTrainedTokenizer`]. The Q-Former tokenizer is a required input.
        num_query_tokens (`int`, *optional*):"
            Number of tokens used by the Qformer as queries, should be same as in model's config.
    )image_processor	tokenizerqformer_tokenizernum_query_tokensBlipImageProcessorr   Nc                     t          ddd          | _        |                    | j        gd           || _        t	                                          |||           d S )Nz<image>FT)
normalizedspecial)special_tokens)r   image_token
add_tokensr,   super__init__)selfr)   r*   r+   r,   kwargs	__class__s         r%   r5   zInstructBlipProcessor.__init__P   sd    %iE4PPPd./EEE 0)5FGGGGGr$   imagestextr7   returnc                    ||t          d           | j        t          fd| j        j        i|}t                      }|t          |t                    r|g}n?t          |t                    s*t          |d         t                    st          d          |d         	                    dd          } | j        |fi |d         ddi}	||d         d<   | j
        p|ni }
| j        j        | j
        z  }|                     |gt          |          z  dd	          }|	D ]+}d
 t          ||         |	|                   D             |
|<   ,n|	}
|t                              d           t#          |
|          }
|                    |
            | j        |fi |d         }|	                    d          |d<   |	                    d          |d<   |) | j        |fi |d         }|                    |           |S )a  
        This method uses [`BlipImageProcessor.__call__`] method to prepare image(s) for the model, and
        [`BertTokenizerFast.__call__`] to prepare text for the model.

        Please refer to the docstring of the above two methods for more information.
        Args:
            images (`ImageInput`):
                The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
                tensor. Both channels-first and channels-last formats are supported.
            text (`TextInput`, `PreTokenizedInput`, `List[TextInput]`, `List[PreTokenizedInput]`):
                The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
                (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
                `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
        Nz,You have to specify at least images or text.tokenizer_init_kwargsr   zAInvalid input text. Please provide a string, or a list of stringsr   return_tensorsF)r   r>   c                     g | ]
\  }}||z   S r#   r#   ).0img_encodingtxt_encodings      r%   
<listcomp>z2InstructBlipProcessor.__call__.<locals>.<listcomp>   s1     ( ( (6L, %|3( ( (r$   aA  Expanding inputs for image tokens in InstructBLIP should be done in processing. Please follow instruction here (https://gist.github.com/zucchini-nlp/e9f20b054fa322f84ac9311d9ab67042) to update your InstructBLIP model. Using processors without these attributes in the config is deprecated and will throw an error in v4.47.)tensor_type	input_idsqformer_input_idsattention_maskqformer_attention_maskr   )
ValueError_merge_kwargsr   r*   init_kwargsr   
isinstancestrlistpopr,   r2   contentlenziploggerwarning_oncer   updater+   r)   )r6   r9   r:   audiovideosr7   output_kwargsencodingr>   _text_encodingtext_encodingimage_tokensimage_token_encodingkqformer_text_encodingimage_encodings                   r%   __call__zInstructBlipProcessor.__call__V   s   , >dlKLLL**'
 
"&."<
 
 
  >>$$$ fvd++ fJtAw4L4L f !deee +=9==>NPTUUN+T^DffM-4PffaefffN=KM-()9: $0V5G "#/7$:OO'+~~!NSYY.5Y] (6 ( ($ (  A( (:=>RST>UWefgWh:i:i( ( (M!$$ !/%''B   *-^TTTMOOM***$:D$:4$`$`=Q^C_$`$`!,A,E,Ek,R,RH()1F1J1JK[1\1\H-.1T1&[[M/<Z[[NOON+++r$   c                 &     | j         j        |i |S )z
        This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.batch_decode`]. Please
        refer to the docstring of this method for more information.
        )r*   batch_decoder6   argsr7   s      r%   rc   z"InstructBlipProcessor.batch_decode   s    
 +t~*D;F;;;r$   c                 &     | j         j        |i |S )z
        This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.decode`]. Please refer to
        the docstring of this method for more information.
        )r*   decoderd   s      r%   rg   zInstructBlipProcessor.decode   s    
 %t~$d5f555r$   c                     | j         j        }| j        j        }t          t                              ||z                       S N)r*   model_input_namesr)   rN   dictfromkeys)r6   tokenizer_input_namesimage_processor_input_namess      r%   rj   z'InstructBlipProcessor.model_input_names   s<     !% @&*&:&L#DMM"7:U"UVVWWWr$   c                    t           j                            |          rt          d| d          t          j        |d           t           j                            |d          }| j                            |           d| j        v }|r| j        	                    d            t                      j        |fi |}|r| xj        dgz  c_        |S )NzProvided path (z#) should be a directory, not a fileT)exist_okr+   )ospathisfilerI   makedirsjoinr+   save_pretrained
attributesremover4   )r6   save_directoryr7   qformer_tokenizer_pathqformer_presentoutputsr8   s         r%   rv   z%InstructBlipProcessor.save_pretrained   s    7>>.)) 	db~bbbccc
NT2222!#n>Q!R!R../EFFF .@ 	8O""#6777)%'').CCFCC 	5OO 344OOr$   c                      t                      j        |fi |}t          |t                    r|d         }t	          j        |d          }||_        |S )Nr   r+   )	subfolder)r4   from_pretrainedrL   tupler   r+   )clspretrained_model_name_or_pathr7   	processorr+   r8   s        r%   r   z%InstructBlipProcessor.from_pretrained   se    +EGG+,ITTVTT	 i'' 	%!!I)9:Wcvwww&7	#r$   ri   )NNNN)r   r    r!   __doc__rw   valid_kwargsimage_processor_classtokenizer_classqformer_tokenizer_classr5   r   r   r   r   r   r
   r   r   ra   rc   rg   propertyrj   rv   classmethodr   __classcell__)r8   s   @r%   r(   r(   7   sd        $ GFFJ&'L0%O-H H H H H H "^bM MM I0$y/4HYCZZ[M 45M 
M M M M`< < <6 6 6 X X XX    &     [    r$   r(   )r   rq   typingr   r   image_processing_utilsr   image_utilsr   processing_utilsr   r	   r
   tokenization_utils_baser   r   r   r   utilsr   autor   
get_loggerr   rS   r   r(   r#   r$   r%   <module>r      sZ    
			         2 2 2 2 2 2 % % % % % % H H H H H H H H H H                              
	H	%	%    "2%    "b b b b bN b b b b br$   