
    g<                        d Z ddlmZmZmZ ddlmZ ddlmZm	Z	 ddl
mZmZmZmZmZmZ ddlmZmZmZ ddlmZ  ej        e          Zd	Zd
  ed          D             d  ed          D             z   Z G d de          Z G d de          Z G d ded          Zde fdZ!d Z"d Z#d Z$deee                  fdZ% G d de          Z&dS )z 
Processor class for PaliGemma.
    )ListOptionalUnion   )BatchFeature)
ImageInputis_valid_image)ImagesKwargsProcessingKwargsProcessorMixin
TextKwargsUnpack!_validate_images_text_input_order)
AddedTokenPreTokenizedInput	TextInput)loggingz<image>c                     g | ]	}d |dd
S )z<locz0>4> .0is     n/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/models/paligemma/processing_paligemma.py
<listcomp>r   *   s"    555Aq555    i   c                     g | ]	}d |dd
S )z<segz0>3r   r   r   s     r   r   r   *   s"    8]8]8]Q8]8]8]r      c                   R    e Zd ZU eeeeee         ee         f                  ed<   dS )PaliGemmaTextKwargssuffixN)	__name__
__module____qualname__r   r   r   r   r   __annotations__r   r   r   r    r    -   s<         U9&7i$O`Jaabccccccr   r    c                   &    e Zd ZU ee         ed<   dS )PaliGemmaImagesKwargsdo_convert_rgbN)r"   r#   r$   r   boolr%   r   r   r   r'   r'   1   s"         TN"""""r   r'   c                   6    e Zd ZU eed<   eed<   ddiddidZdS )	PaliGemmaProcessorKwargstext_kwargsimages_kwargspaddingFdata_formatchannels_first)r,   r-   N)r"   r#   r$   r    r%   r'   	_defaultsr   r   r   r+   r+   5   sM         $$$$(((( u
 +
	 IIIr   r+   F)totalreturnc                 V    t          | t                    o|                     d          S )Nhttp)
isinstancestr
startswith)vals    r   is_urlr:   C   s#    c3:CNN6$:$::r   c                 >    t          |           pt          |           S N)r:   r	   elems    r   is_image_or_image_urlr?   H   s    $<</>$///r   c                 J    t          | t                    pt          |           S r<   )r6   r7   r?   r=   s    r   _is_str_or_imagerA   L   s     dS""A&;D&A&AAr   c                      ||z  |z   | |  dS )aZ  
    Builds a string from the input prompt and image tokens.
    For example, for the call:
    build_string_from_input(
        prompt="Prefix str"
        bos_token="<s>",
        image_seq_len=3,
        image_token="<im>",
    )
    The output will be:
    "<im><im><im><s>Initial str"
    Args:
        prompt (`List[Union[str, ImageInput]]`): The input prompt.
        bos_token (`str`): The beginning of sentence token.
        image_seq_len (`int`): The length of the image sequence.
        image_token (`str`): The image token.
        num_images (`int`): Number of images in the prompt.
    
r   prompt	bos_tokenimage_seq_lenimage_token
num_imagess        r   build_string_from_inputrJ   P   s'    & M)J6M	M6MMMMr   c                 z   t          | t          t          f          rIt          | d         t          t          f          r't          | d         d                   rd | D             S t          | t          t          f          rt          | d                   r| S t          |           r| gS t	          d|            )a  
    Accepts images in list or nested list format, and makes a list of images for preprocessing.

    Args:
        images (`Union[List[List[ImageInput]], List[ImageInput], ImageInput]`):
            The input image.

    Returns:
        list: A list of images.
    r   c                     g | ]	}|D ]}|
S r   r   )r   img_listimgs      r   r   z'make_batched_images.<locals>.<listcomp>s   s%    ???h??s????r   z"Could not make batched video from )r6   listtupler	   
ValueError)imagess    r   make_batched_imagesrS   g   s     &4-(( Zq	D%=-Q-Q VdeklmenopeqVrVr ??F????	FT5M	*	* ~fQi/H/H 			 x
B&BB
C
CCr   c            
            e Zd ZdZddgZdgZdZdZ	 	 	 d fd	Z	 	 	 	 dd	e	d
e
eeee         ee         f         dee         defdZd Zd Zed             Z xZS )PaliGemmaProcessora  
    Constructs a PaliGemma processor which wraps a PaliGemma image processor and a PaliGemma tokenizer into a single processor.

    [`PaliGemmaProcessor`] offers all the functionalities of [`SiglipImageProcessor`] and [`LlamaTokenizerFast`]. See the
    [`~PaliGemmaProcessor.__call__`] and [`~PaliGemmaProcessor.decode`] for more information.

    Args:
        image_processor ([`SiglipImageProcessor`], *optional*):
            The image processor is a required input.
        tokenizer ([`LlamaTokenizerFast`], *optional*):
            The tokenizer is a required input.
        chat_template (`str`, *optional*): A Jinja template which will be used to convert lists of messages
            in a chat into a tokenizable string.
    image_processor	tokenizerchat_templateSiglipImageProcessor)GemmaTokenizerGemmaTokenizerFastNc                    |t          d          |t          d          t          |d          st          d          |j        | _        t          t          dd          }d|gi}|                    |           |                    t                     |                    t                    | _	        d|_
        d|_        t                                          |||	           d S )
Nz)You need to specify an `image_processor`.z"You need to specify a `tokenizer`.image_seq_lengthz;Image processor is missing an `image_seq_length` attribute.FT)
normalizedspecialadditional_special_tokens)rX   )rQ   hasattrr]   r   IMAGE_TOKENadd_special_tokens
add_tokensEXTRA_TOKENSconvert_tokens_to_idsimage_token_idadd_bos_tokenadd_eos_tokensuper__init__)selfrV   rW   rX   kwargsrH   tokens_to_add	__class__s          r   rk   zPaliGemmaProcessor.__init__   s     "HIIIABBB(:;; 	\Z[[[ / @ MMM4{mD$$]333\***'==kJJ"'	"'	)=QQQQQr   rR   textrm   r3   c                     t          ||          \  }}  j        t          fd j        j        i|}|d                             dd          }|dnd}|t          d          |t                              d           d	}t          |          r|g}n+t          |t                    rt          |d
                   r	 ||t          d |D                       st                              d           t          |t                    ret          |t                    rPt          |          t          |          k    r0t          dt          |           dt          |           d          t!          |          r|gg}nt          |t                    r"t!          |d
                   rd |D             }nZt          |t                    r6t          |d
         t                    rt!          |d
         d
                   st          d          |t          |          r|g}| fd|D             } fdt#          ||          D             }	t%          |          }n fd|D             }d |D             }	  j        |fi |d         d         }
|d                             dd          |d         dxx          j        z  cc<     j        |	f||d|d         }i |d|
i}|r=|d                             |d         d
k    d          }|                    d|i           t1          |          S )ah  
        Main method to prepare for the model one or several sequences(s) and image(s). This method forwards the `text`
        and `kwargs` arguments to LlamaTokenizerFast's [`~LlamaTokenizerFast.__call__`] if `text` is not `None` to encode
        the text. To prepare the image(s), this method forwards the `images` and `kwrags` arguments to
        SiglipImageProcessor's [`~SiglipImageProcessor.__call__`] if `images` is not `None`. Please refer to the doctsring
        of the above two methods for more information.

        The usage for PaliGemma fine-tuning preparation is slightly different than usual. suffix passed are suffixes to
        the prompt in `text`, and will be placed after the prompt. This is because attention is handled differently for
        the prefix and the suffix. For instance,
        ```python
        image = PIL_cow_image
        prompt = "answer en Where is the cow standing?"
        suffix = "on the beach"
        inputs = processor(text=prompt, images=image, suffix=suffix)
        ```
        Here `inputs` will contain the `input_ids` and `token_type_ids` that follow
        ```python
        inputs["input_ids"][:, 256:]
        # tensor([[     2,   6006,    603,    573,  13910,   9980, 235336,    108,    477,   573,   8318]])
        inputs["token_type_ids"][:, 256:]
        tensor([[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1]])
        ```
        Meaning the last three tokens are of "label" ("suffix") type while the other ones are of "prefix" type.


        Args:
            images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[torch.Tensor]`):
                The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
                tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape (C, H, W), where C is a
                number of channels, H and W are image height and width.
            text (`str`, `List[str]`, `List[List[str]]`):
                The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
                (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
                `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
            return_tensors (`str` or [`~utils.TensorType`], *optional*):
                If set, will return tensors of a particular framework. Acceptable values are:

                - `'tf'`: Return TensorFlow `tf.constant` objects.
                - `'pt'`: Return PyTorch `torch.Tensor` objects.
                - `'np'`: Return NumPy `np.ndarray` objects.
                - `'jax'`: Return JAX `jnp.ndarray` objects.
            suffix (`str`, `List[str]`, `List[List[str]]`):
                The suffixes or batch of suffixes to be encoded. Only necessary for finetuning. See https://github.com/google-research/big_vision/blob/main/big_vision/configs/proj/paligemma/README.md
                for more information. If your prompt is "<image> What is on the image", the suffix corresponds to the expected prediction "a cow sitting on a bench".

        Returns:
            [`BatchFeature`]: A [`BatchFeature`] with the following fields:

            - **input_ids** -- List of token ids to be fed to a model. Returned when `text` is not `None`. If `suffix`
              is provided, the `input_ids` will also contain the suffix input ids.
            - **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when
              `return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names` and if `text` is not
              `None`).
            - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
            - **labels** -- Labels compatible with training if `suffix` is not None
        tokenizer_init_kwargsr,   r!   NTFzF`images` are expected as arguments to a `PaliGemmaProcessor` instance.z]You are using PaliGemma without a text prefix. It will perform as a picture-captioning model. r   c              3   (   K   | ]}t           |v V  d S r<   )rb   r   samples     r   	<genexpr>z.PaliGemmaProcessor.__call__.<locals>.<genexpr>  s'      @@{f,@@@@@@r   ai  You are passing both `text` and `images` to `PaliGemmaProcessor`. The processor expects special image tokens in the text, as many tokens as there are images per each text. It is recommended to add `<image>` tokens in the very beginning of your text and `<bos>` token after that. For this call, we will infer how many images each text has and add special tokens.z	Received z images for zK prompts. Each prompt should be associated with an image or list of images.c                     g | ]}|gS r   r   )r   images     r   r   z/PaliGemmaProcessor.__call__.<locals>.<listcomp>  s    :::%ug:::r   zAimages must be an image, list of images or list of list of imagesc                 .    g | ]}|j         j        z   S r   )rW   	eos_token)r   sfxrl   s     r   r   z/PaliGemmaProcessor.__call__.<locals>.<listcomp>!  s#    OOOcDN$<<OOOr   c                     g | ]Q\  }}t          |j        j        j        t          t          |t                    rt          |          nd           RS )   rD   )rJ   rW   rF   r]   rb   r6   rO   len)r   rE   
image_listrl   s      r   r   z/PaliGemmaProcessor.__call__.<locals>.<listcomp>#  so     	! 	! 	! +
 ,%"&.":&*&;$/6@T6R6R#Y3z???XY  	! 	! 	!r   c                 `    g | ]*}|                     t          t          j        z            +S r   )replacerb   r]   )r   rv   rl   s     r   r   z/PaliGemmaProcessor.__call__.<locals>.<listcomp>/  s0    lll]c{K$BW4WXXlllr   c                     g | ]}| d S )rC   r   ru   s     r   r   z/PaliGemmaProcessor.__call__.<locals>.<listcomp>0  s     B B B6F B B Br   r-   pixel_values
max_length)	text_pairreturn_token_type_ids	input_idstoken_type_idsilabels)data)r   _merge_kwargsr+   rW   init_kwargspoprQ   loggerwarning_oncerA   r6   rO   anywarningr   r   r	   ziprS   rV   getr]   masked_fillupdater   )rl   rR   rp   audiovideosrm   output_kwargsr!   r   input_stringsr   inputsreturn_datar   s   `             r   __call__zPaliGemmaProcessor.__call__   s"   D 9FF**$
 
"&."<
 
 

 }-11(DAA(.(:>efff<o   DD!! 	6DDd## 	(8a(A(A 	 2@@4@@@@@ (C<   dD)) j.F.F 6{{c$ii//( XF  X  XT  X  X  X  
 "&)) j%hZFF-- j.2K2K j::6:::FF$VT22 jz&)T7R7R jWeflmnfopqfrWsWs j$%hiii%*:6*B*B%$XF%OOOOOOOF	! 	! 	! 	! /2$.?.?	! 	! 	! -V44llllgklll B BT B B B+t+FUUmO6TUUVde '++L$??K-(666$:OO666
"7
 
 M*	
 
 ?>>>  	3K(44V<L5MQR5RTXYYF&1222----r   c                 &     | j         j        |i |S )z
        This method forwards all its arguments to GemmaTokenizerFast's [`~PreTrainedTokenizer.batch_decode`]. Please
        refer to the docstring of this method for more information.
        )rW   batch_decoderl   argsrm   s      r   r   zPaliGemmaProcessor.batch_decodeG  s    
 +t~*D;F;;;r   c                 &     | j         j        |i |S )z
        This method forwards all its arguments to GemmaTokenizerFast's [`~PreTrainedTokenizer.decode`]. Please refer to
        the docstring of this method for more information.
        )rW   decoder   s      r   r   zPaliGemmaProcessor.decodeO  s    
 %t~$d5f555r   c                     | j         j        }| j        j        }t          t                              ||z                       S r<   )rW   model_input_namesrV   rO   dictfromkeys)rl   tokenizer_input_namesimage_processor_input_namess      r   r   z$PaliGemmaProcessor.model_input_namesV  s<     !% @&*&:&L#DMM"7:U"UVVWWWr   )NNN)NNNN)r"   r#   r$   __doc__
attributesvalid_kwargsimage_processor_classtokenizer_classrk   r   r   r   r   r   r   r+   r   r   r   r   propertyr   __classcell__)ro   s   @r   rU   rU   ~   s-         $[1J#$L2>O 	R R R R R R8 "^bW. W.W. I0$y/4HYCZZ[W. 12W. 
W. W. W. W.t< < <6 6 6 X X XX X X X Xr   rU   N)'r   typingr   r   r   feature_extraction_utilsr   image_utilsr   r	   processing_utilsr
   r   r   r   r   r   tokenization_utils_baser   r   r   utilsr   
get_loggerr"   r   rb   rangere   r    r'   r+   r)   r:   r?   rA   rJ   rS   rU   r   r   r   <module>r      s    ) ( ( ( ( ( ( ( ( ( 4 4 4 4 4 4 5 5 5 5 5 5 5 5                        
       
	H	%	%55t5558]8]RWRWX[R\R\8]8]8]]d d d d d* d d d# # # # #L # # #
 
 
 
 
/u 
 
 
 
;4 ; ; ; ;
0 0 0B B BN N N.D4Z(8#9 D D D D.]X ]X ]X ]X ]X ]X ]X ]X ]X ]Xr   