
    gI-                        d Z ddlmZmZmZmZ ddlmZ ddlm	Z	m
Z
mZ ddlmZmZmZmZmZ ddlmZmZ ddlmZ erdd	lmZ  ej        e          Zd
efdZd Z G d ded          Z G d ded          Z G d de          Z dS )z
Processor class for IDEFICS2.
    )TYPE_CHECKINGListOptionalUnion   )BatchFeature)
ImageInputis_valid_image
load_image)ImagesKwargsProcessingKwargsProcessorMixinUnpack!_validate_images_text_input_order)
AddedToken	TextInput)logging)PreTokenizedInputreturnc                 V    t          | t                    o|                     d          S )Nhttp)
isinstancestr
startswith)vals    l/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/models/idefics2/processing_idefics2.pyis_urlr   )   s#    c3:CNN6$:$::    c                 >    t          |           pt          |           S N)r   r
   )elems    r   is_image_or_image_urlr"   -   s    $<</>$///r   c                   &    e Zd ZU ee         ed<   dS )Idefics2ImagesKwargsimage_seq_lenN)__name__
__module____qualname__r   int__annotations__ r   r   r$   r$   1   s"         C=     r   r$   F)totalc                   ,    e Zd ZU eed<   ddddi dZdS )Idefics2ProcessorKwargsimages_kwargsTF)add_special_tokenspaddingis_split_into_words)text_kwargsr/   N)r&   r'   r(   r$   r*   	_defaultsr+   r   r   r.   r.   5   sB         '''' #'#(
 

  IIIr   r.   c            
            e Zd ZdZddgZddgZdZdZddede	f fd
Z
d Z	 	 	 	 ddeeee         eee                  f         deedee         ed         f         dee         defdZd Zd Zed             Z xZS )Idefics2Processora  
    Constructs a IDEFICS2 processor which wraps a LLama tokenizer and IDEFICS2 image processor into a single processor.

    [`IdeficsProcessor`] offers all the functionalities of [`Idefics2ImageProcessor`] and [`LlamaTokenizerFast`]. See
    the docstring of [`~IdeficsProcessor.__call__`] and [`~IdeficsProcessor.decode`] for more information.

    Args:
        image_processor (`Idefics2ImageProcessor`):
            An instance of [`Idefics2ImageProcessor`]. The image processor is a required input.
        tokenizer (`PreTrainedTokenizerBase`, *optional*):
            An instance of [`PreTrainedTokenizerBase`]. This should correspond with the model's text model. The tokenizer is a required input.
        image_seq_len (`int`, *optional*, defaults to 64):
            The length of the image sequence i.e. the number of <image> tokens per image in the input.
            This parameter is used to build the string from the input prompt and image tokens and should match the
            config.perceiver_config.resampler_n_latents value for the model used.
        chat_template (`str`, *optional*): A Jinja template which will be used to convert lists of messages
            in a chat into a tokenizable string.
    image_processor	tokenizerr%   chat_templateIdefics2ImageProcessorAutoTokenizerN@   c                    |t          d          |t          d          t          ddd          | _        t          ddd          | _        t          ddd          | _        || _        d	| j        | j        | j        gi}|                    |           t                                          |||
           d S )Nz)You need to specify an `image_processor`.z"You need to specify a `tokenizer`.z<fake_token_around_image>FT)
normalizedspecialz<image>z<end_of_utterance>additional_special_tokens)r9   )	
ValueErrorr   fake_image_tokenimage_tokenend_of_utterance_tokenr%   r0   super__init__)selfr7   r8   r%   r9   kwargstokens_to_add	__class__s          r   rF   zIdefics2Processor.__init__[   s    "HIIIABBB *+FSXbf g g g%iE4PPP&01ERWae&f&f&f#* ($*?AQSWSn)o
 	$$]333)=QQQQQr   c                     g }|D ]t}g }|D ]X}t          |          r|                    |           't          |          r"|                    t          |                     Y|                    |           u|S r    )r
   appendr   r   )rG   promptsprompt_imagespromptimagesr!   s         r   _extract_images_from_promptsz.Idefics2Processor._extract_images_from_promptsm   s     	) 	)FF 4 4!$'' 4MM$''''D\\ 4MM*T"2"2333  ((((r   rP   textr   rH   r   c                 R   ||t          d          t          ||          \  }} | j        t          fd| j        j        i|}|d                             dd          }||n| j        }g }t                      }	|*t          |t                    r|g}n?t          |t                    s*t          |d         t                    st          d          | j        j        }
| j        j        }|
 ||z   |
 }| j        j        r|dz  }g }|D ]p}|                    |                    |                     |                    ||          }|                    |
 |
 |
           }|                    |           q | j        |fi |d	         }|	                    |           |t+          |          r|gg}nt          |t                    rt+          |d                   r|g}nZt          |t                    sEt          |d         t                    s*t+          |d         d                   st          d
          d |D             }|||k    st          d| d| d          d |D             } | j        |fi |d         }|	                    |           |	S )a
  
        Processes the input prompts and returns a BatchEncoding.

        Example:

        ```python
        >>> import requests
        >>> from transformers import Idefics2Processor
        >>> from transformers.image_utils import load_image

        >>> processor = Idefics2Processor.from_pretrained("HuggingFaceM4/idefics2-8b", image_seq_len=2)
        >>> processor.image_processor.do_image_splitting = False  # Force as False to simplify the example

        >>> url1 = "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
        >>> url2 = "https://cdn.britannica.com/59/94459-050-DBA42467/Skyline-Chicago.jpg"

        >>> image1, image2 = load_image(url1), load_image(url2)
        >>> images = [[image1], [image2]]

        >>> text = [
        ...     "<image>In this image, we see",
        ...     "bla bla bla<image>",
        ... ]
        >>> outputs = processor(images=images, text=text, return_tensors="pt", padding=True)
        >>> input_ids = outputs.input_ids
        >>> input_tokens = processor.tokenizer.batch_decode(input_ids)
        >>> print(input_tokens)
        ['<s><fake_token_around_image><image><image><fake_token_around_image> In this image, we see', '<s> bla bla bla<fake_token_around_image><image><image><fake_token_around_image>']
        ```

        Args:
            images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[torch.Tensor]`, *optional*):
                The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
                tensor. If is of type `List[ImageInput]`, it's assumed that this is for a single prompt i.e. of batch size 1.
            text (`Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]]`, *optional*):
                The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
                (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
                `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).

                Wherever an image token, `<image>` is encountered it is expanded to
                `<fake_token_around_image>` + `<image>` * `image_seq_len` * <fake_token_around_image>`.
            return_tensors (`Union[str, TensorType]`, *optional*):
                If set, will return tensors of a particular framework. See [`PreTrainedTokenizerFast.__call__`] for more
                information.

        Nz+You must provide either `text` or `images`.tokenizer_init_kwargsr/   r%   r   zAInvalid input text. Please provide a string, or a list of strings   r3   zdInvalid input images. Please provide a single image or a list of images or a list of list of images.c                 ,    g | ]}t          |          S r+   )len.0samples     r   
<listcomp>z.Idefics2Processor.__call__.<locals>.<listcomp>   s    !C!C!C&#f++!C!C!Cr   z!The number of images in the text z and images  z should be the same.c                 &    g | ]}d  |D             S )c                 ,    g | ]}t          |          S r+   )r   )rY   ims     r   r[   z9Idefics2Processor.__call__.<locals>.<listcomp>.<listcomp>   s    777"z"~~777r   r+   rX   s     r   r[   z.Idefics2Processor.__call__.<locals>.<listcomp>   s'    MMMF77777MMMr   )rA   r   _merge_kwargsr.   r8   init_kwargspopr%   r   r   r   listrB   contentrC   r7   do_image_splittingrL   countreplaceupdater"   )rG   rP   rR   audiovideosrH   output_kwargsr%   n_images_in_textinputsrB   rC   	image_strprompt_stringsrZ   text_inputsn_images_in_imagesimage_inputss                     r   __call__zIdefics2Processor.__call__y   s>   l <FNJKKK8FF**#
 
"&."<
 
 

 &o6::?DQQ)6)BHZ$$$ fvd++ fJtAw4L4L f !deee  $4<*2K+\[=-H\JZ\\I#6 *%M	N . . ''[(A(ABBBY??+;(O=M(O(OTdQfgg%%f----($.XX=;WXXKMM+&&&$V,, !(FD)) 	.CF1I.N.N 	 vt,,"6!9d33 .fQil;;
 !z   "D!CF!C!C!C(:>N(N(N 8HWi  
 NMfMMMF/4/YY-:XYYLMM,'''r   c                 &     | j         j        |i |S )z
        This method forwards all its arguments to LlamaTokenizerFast's [`~PreTrainedTokenizer.batch_decode`]. Please
        refer to the docstring of this method for more information.
        )r8   batch_decoderG   argsrH   s      r   rt   zIdefics2Processor.batch_decode   s    
 +t~*D;F;;;r   c                 &     | j         j        |i |S )z
        This method forwards all its arguments to LlamaTokenizerFast's [`~PreTrainedTokenizer.decode`]. Please refer to
        the docstring of this method for more information.
        )r8   decoderu   s      r   rx   zIdefics2Processor.decode   s    
 %t~$d5f555r   c                     | j         j        }| j        j        }t          t                              ||z                       S r    )r8   model_input_namesr7   rb   dictfromkeys)rG   tokenizer_input_namesimage_processor_input_namess      r   rz   z#Idefics2Processor.model_input_names  s:     $ @&*&:&L#DMM"7:U"UVVWWWr   )Nr<   N)NNNN)r&   r'   r(   __doc__
attributesvalid_kwargsimage_processor_classtokenizer_classr)   r   rF   rQ   r   r	   r   r   r   r.   r   rr   rt   rx   propertyrz   __classcell__)rJ   s   @r   r6   r6   B   sg        & $[1J#_5L4%OR Rs R`c R R R R R R$
 
 
 OSbfy yj$z"2Dj9I4JJKy I2DOTJ]E^^_y 01y 
y y y yv< < <6 6 6 X X XX X X X Xr   r6   N)!r   typingr   r   r   r   feature_extraction_utilsr   image_utilsr	   r
   r   processing_utilsr   r   r   r   r   tokenization_utils_baser   r   utilsr   r   
get_loggerr&   loggerboolr   r"   r$   r.   r6   r+   r   r   <module>r      s    8 7 7 7 7 7 7 7 7 7 7 7 4 4 4 4 4 4 A A A A A A A A A A              = < < < < < < <        =<<<<<< 
	H	%	%;4 ; ; ; ;0 0 0! ! ! ! !<u ! ! ! !
 
 
 
 
.e 
 
 
 
DX DX DX DX DX DX DX DX DX DXr   