
    g{                     
   d Z ddlZddlmZmZmZmZmZ ddlZ	ddl
mZ ddlmZmZmZmZ ddlmZmZ ddlmZmZmZ  e            rd	d
lmZ  ej        e          Z e            rddlZdZdZdZ dZ!dZ"dZ#dZ$dZ%dZ& G d ded          Z'dee(         ded         de(de(de(de(ddfd Z)d!eeee(                  df         d"dd#eed                  de(d$e(ded         fd%Z*d&e+de+fd'Z,d&e+defd(Z-d&e+d)e.dee(         fd*Z/d+e+d)e.dee(         fd,Z0d-eee+                  d.eeed                           d/e(d0e(d1e1d2e1ded3         fd4Z2d5 Z3d6 Z4d7e.d8e.d)e.dee(         fd9Z5d:e.d;e.d<e.d=e.d)e.dee(         fd>Z6 G d? d@e          Z7dS )Az$
Image/Text processor class for GIT
    N)DictListOptionalTupleUnion   )
ImageInput)ProcessingKwargsProcessorMixinUnpack!_validate_images_text_input_order)PreTokenizedInput	TextInput)is_torch_availableloggingrequires_backends   )FuyuBatchFeaturez<box>z</box>z<point>z</point>z<0x00>z<0x01>z<0x02>z<0x03>z<0x04>c                   .    e Zd Zddddddddddd
i dZdS )FuyuProcessorKwargsTFr   )
add_special_tokenspaddingstridereturn_attention_maskreturn_overflowing_tokensreturn_special_tokens_maskreturn_offsets_mappingreturn_token_type_idsreturn_lengthverbose)text_kwargsimages_kwargsN)__name__
__module____qualname__	_defaults     d/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/models/fuyu/processing_fuyu.pyr   r   5   sI         #'%)).*/&+%*"
 
  IIIr(   r   F)totalall_bi_tokens_to_placefull_unpacked_streamtorch.Tensor
fill_value
batch_sizenew_seq_lenoffsetreturnc                 "   t          |           |k    sJ t          |          |k    sJ t          j        ||g||d         j        |d         j                  }t          |          D ]$}| |         }||         |||z            ||d|f<   %|S )zTakes an unpacked stream of tokens (i.e. a list of tensors, one for each item in the batch) and does
    the required padding to create a single tensor for the batch of shape batch_size x new_seq_len.
    r   )r.   dtypedeviceN)lentorchfullr4   r5   range)	r+   r,   r.   r/   r0   r1   new_padded_tensorbitokens_to_places	            r)   full_unpacked_stream_to_tensorr=   G   s     %&&*4444#$$
2222 
	[!"1%+#A&-	   J n n042Fr2J6TcflTlKl2m"...//r(   num_real_text_tokensinput_streamimage_tokensnum_sub_sequencesc                 R   g }t          |          D ]}g }||         d         }t          j        |||df         gd          }	|j        d         | |         d         z   }
|                    |	d|
                    |                    t          j        |d                     |S )a  Takes an input_stream tensor of shape B x S x ?. For each subsequence, adds any required
    padding to account for images and then unpacks the subsequences to create a single sequence per item in the batch.
    Returns a list of tensors, one for each item in the batch.r   dimN)r9   r7   catshapeappend)r>   r?   r@   r/   rA   all_bi_streambatch_indexall_si_streamimage_adjustmentsubsequence_streamnum_real_tokenss              r)   construct_full_unpacked_streamrN   f   s     MZ(( 
> 
>
 (4Q7"Y(8,{TU~:V'W]^___*036J;6WXY6ZZ/0@0@ABBBUY}!<<<====r(   promptc                    |                      t          t                    } |                      t          t                    } |                      t
          t                    } |                      t          t                    } | S N)	replaceTEXT_REPR_POINT_OPENTOKEN_POINT_OPEN_STRINGTEXT_REPR_POINT_CLOSETOKEN_POINT_CLOSE_STRINGTEXT_REPR_BBOX_OPENTOKEN_BBOX_OPEN_STRINGTEXT_REPR_BBOX_CLOSETOKEN_BBOX_CLOSE_STRING)rO   s    r)   $_replace_string_repr_with_token_tagsr[      s[    ^^02IJJF^^13KLLF^^/1GHHF^^02IJJFMr(   c                    g }t          j        dt           dt           dt           dt
           d	          }|                    |           }t          |          D ]i\  }}t          |          dk    s|t          t          t          t
          fv r5|	                    ||dk    o||dz
           t          t          fv f           j|S )zY
    Given a string prompt, converts the prompt into a list of TextTokenConversions.
    (|)r   r   )
recompilerX   rZ   rT   rV   split	enumerater6   rG   )rO   prompt_text_listregex_patternprompt_splitielems         r)   +_segment_prompt_into_text_token_conversionsri      s    
  Jt"tt%<tt?VttYqttt M !&&v..L\** 

 

4t99>>T"##$	&
 
 
 1q5e\!a%05KMd4eef	
 	
 	
 	
 r(   scale_factorc                    t          |           } t          |           }g }|D ]b}|d         r-t          |d         ||          }|                    |           7|                     ||d         d          j                   c|S )a  
    This function transforms the prompt in the following fashion:
    - <box> <point> and </box> </point> to their respective token mappings
    - extract the coordinates from the tag
    - transform the coordinates into the transformed image space
    - return the prompt tokens with the transformed coordinates and new tags

    Bounding boxes and points MUST be in the following format: <box>y1, x1, y2, x2</box> <point>x, y</point> The spaces
    and punctuation added above are NOT optional.
    r   r   Fr   )r[   ri   _transform_within_tagsextend	input_ids)rO   rj   	tokenizerrd   transformed_prompt_tokensrh   within_tag_tokenizeds          r)   #_transform_coordinates_and_tokenizers      s      2&99F C6JJ+-  e e7 	e#9$q'<QZ#[#[ %,,-ABBBB%,,YYtAwSX-Y-Y-Y-cdddd$$r(   textc                 P   |                      d          }t          |          dk    r%j        t                   }j        t                   }n$j        t
                   }j        t                   }d |D             }t          |          dk    rt          |d         |d         |          }n_t          |          dk    r-t          |d         |d         |d         |d         |	          }nt          d
t          |                     fd|D             }|g|z   |gz   S )z
    Given a bounding box of the fashion <box>1, 2, 3, 4</box> | <point>1, 2</point> This function is responsible for
    converting 1, 2, 3, 4 into tokens of 1 2 3 4 without any commas.
    ,   c                 P    g | ]#}t          |                                          $S r'   )floatstrip).0nums     r)   
<listcomp>z*_transform_within_tags.<locals>.<listcomp>   s(    ;;;sciikk"";;;r(   r   r   )xyrj      r   )topleftbottomrightrj   zInvalid number of ints: c                 D    g | ]}j         t          |                   S r'   )vocabstr)r{   r|   rp   s     r)   r}   z*_transform_within_tags.<locals>.<listcomp>   s&    GGGCioc#hh'GGGr(   )
rb   r6   r   rT   rV   rX   rZ    scale_point_to_transformed_imagescale_bbox_to_transformed_image
ValueError)	rt   rj   rp   num_int_strstoken_space_open_stringtoken_space_close_stringnum_intsnum_ints_translatedtokenss	     `      r)   rm   rm      sD    ::c??L
<A"+/2I"J#,?3K#L  "+/2H"I#,?3J#K  <;l;;;H
8}}>!PXYZP[jvwww	X!		=!A;1+%
 
 
 CCMMCCDDDGGGG3FGGGF#$v-1I0JJJr(   promptsscale_factorsmax_tokens_to_generatemax_position_embeddingsadd_BOSadd_beginning_of_answer_token)r-   r-   c                     |Hg }t          ||          D ]4\  }}	|                     fdt          ||	          D                        5n fd|D             }|}
|r j        d         n j        d         fd|
D             }
|r2 j        t                   }|
D ]}|d                             |           d |
D             }t	          j        |          }t          ||z   |          }||z   |k    r%t                              d	| d
| d| d           t          |
|          D ]f\  }}t          ||          D ]P\  }}t          |          |k    rt          d          ||z
  }|                     j        d         g|z             Qgt          j        |
t          j                  }t          j        |t          j                  }||fS )a"  
    Given a set of prompts and number of tokens to generate:
    - tokenize prompts
    - set the sequence length to be the max of length of prompts plus the number of tokens we would like to generate
    - pad all the sequences to this length so we can convert them into a 3D tensor.
    Nc                 \    g | ](\  }}t          ||                                          )S r'   )rs   item)r{   rO   rj   rp   s      r)   r}   z:_tokenize_prompts_with_image_and_batch.<locals>.<listcomp>   sF       , 8@Q@Q@S@SU^__  r(   c                 ,    g | ]}fd |D             S )c                 :    g | ]}                     |          S r'   )tokenize)r{   rO   rp   s     r)   r}   zE_tokenize_prompts_with_image_and_batch.<locals>.<listcomp>.<listcomp>  s'    %Z%Z%ZVi&8&8&@&@%Z%Z%Zr(   r'   )r{   
prompt_seqrp   s     r)   r}   z:_tokenize_prompts_with_image_and_batch.<locals>.<listcomp>  s/    $u$u$u_i%Z%Z%Z%Zz%Z%Z%Z$u$u$ur(   z<s>z|ENDOFTEXT|c                 ,    g | ]}fd |D             S )c                     g | ]}g|z   	S r'   r'   )r{   r~   	bos_tokens     r)   r}   zE_tokenize_prompts_with_image_and_batch.<locals>.<listcomp>.<listcomp>	  s    ;;;1	{Q;;;r(   r'   )r{   r   r   s     r)   r}   z:_tokenize_prompts_with_image_and_batch.<locals>.<listcomp>	  s.    ]]]
;;;;
;;;]]]r(   c                 &    g | ]}d  |D             S )c                 ,    g | ]}t          |          S r'   )r6   r{   r~   s     r)   r}   zE_tokenize_prompts_with_image_and_batch.<locals>.<listcomp>.<listcomp>  s    :::!s1vv:::r(   r'   )r{   prompts_tokens_seqs     r)   r}   z:_tokenize_prompts_with_image_and_batch.<locals>.<listcomp>  s)    ddd?Q::'9:::dddr(   z!Max subsequence prompt length of z + max tokens to generate zexceeds context length of z+. Will generate as many tokens as possible.z5Length of subsequence prompt exceeds sequence length.r4   )ziprG   r   BEGINNING_OF_ANSWER_STRINGnpmaxminloggerwarningr6   r   rn   r7   tensorint64)rp   r   r   r   r   r   r   rq   r   scale_factor_seqprompts_tokensboa	token_seqprompts_lengthmax_prompt_lensamples_lengthprompt_tokens_seqprompts_length_seqprompt_tokensprompt_lengthpadding_sizeprompts_tokens_tensorprompts_length_tensorr   s   `                      @r)   &_tokenize_prompts_with_image_and_batchr      s   "  $&!,/,G,G 	 	(J(%,,   03J@P0Q0Q     	 %v$u$u$umt$u$u$u!.N 3OE*		OM2	]]]]n]]]N$ &o89' 	& 	&IbM  %%%% edUcdddN&00N*@@BYZZN..1HHHrrrZprrm)@mmm	
 	
 	

 25^^1T1T R R--,/0ACU,V,V 	R 	R(M==!!N22 !XYYY)M9L  )/-"@!AL!PQQQQ		R "Lu{KKK!Lu{KKK "777r(   c                 j    t          j        | |z                                t           j                  S rQ   r   roundastypeint32)original_coordsscale_hs     r)    original_to_transformed_h_coordsr   0  '    8Og-..55bh???r(   c                 j    t          j        | |z                                t           j                  S rQ   r   )r   scale_ws     r)    original_to_transformed_w_coordsr   5  r   r(   r~   r   c                     t          t          j        | dz  g          |          d         }t          t          j        |dz  g          |          d         }||gS Nrw   r   r   r   arrayr   )r~   r   rj   x_scaledy_scaleds        r)   r   r   9  sW    /!a%0A0A<PPQRSH/!a%0A0A<PPQRSHhr(   r   r   r   r   c                 n   t          t          j        | dz  g          |          d         }t          t          j        |dz  g          |          d         }t          t          j        |dz  g          |          d         }t          t          j        |dz  g          |          d         }||||gS r   r   )	r   r   r   r   rj   
top_scaledleft_scaledbottom_scaledright_scaleds	            r)   r   r   ?  s     2"(C!G92E2E|TTUVWJ228TAXJ3G3GVVWXYK4RXvzl5K5K\ZZ[\]M3BHeai[4I4I<XXYZ[L]LAAr(   c            
            e Zd ZdZddgZg ZdZdZ fdZde	e
         defd	Zd
 Z	 	 	 	 ddedeeee	e         eef                  dee         ddfdZddZd Zd Z xZS )FuyuProcessora  
    Constructs a Fuyu processor which wraps a Fuyu image processor and a Llama tokenizer into a single processor.

    [`FuyuProcessor`] offers all the functionalities of [`FuyuImageProcessor`] and [`LlamaTokenizerFast`]. See the
    [`~FuyuProcessor.__call__`] and [`~FuyuProcessor.decode`] for more information.

    Args:
        image_processor ([`FuyuImageProcessor`]):
            The image processor is a required input.
        tokenizer ([`LlamaTokenizerFast`]):
            The tokenizer is a required input.
    image_processorrp   FuyuImageProcessorAutoTokenizerc                     t                                          ||           || _        || _        d| _        d| _        d| _        d| _        d S )N)r   rp   
   i @  r   r   )super__init__r   rp   r   r   pad_token_iddummy_image_index)selfr   rp   kwargs	__class__s       r)   r   zFuyuProcessor.__init__\  sU    INNN."&(#',$!#r(   model_inputsr   c           	      >   t          d |D                       }t          d |D                       }g g g g d}|D ]}|                                D ]\  }}|dk    r||j        d         z
  }	t          j        t          j        |j        d         |	f| j        t          j                  |gd          }
||                             |
           t          j        t          j	        |j        d         |	t          j                  t          j
        |          gd          }|d	                             |           |d
k    r||                             |           ||j        d         z
  }t          j        t          j        |j        d         |f| j        t          j                  |gd          }||                             |           ddg}|r|                    d	           |D ]!}t          j        ||         d          ||<   "|S )Nc              3   <   K   | ]}|d          j         d         V  dS )ro   r   NrF   r{   entrys     r)   	<genexpr>zEFuyuProcessor._left_pad_inputs_with_attention_mask.<locals>.<genexpr>f  s/      "Y"Y55#5#;A#>"Y"Y"Y"Y"Y"Yr(   c              3   <   K   | ]}|d          j         d         V  dS )image_patches_indicesr   Nr   r   s     r)   r   zEFuyuProcessor._left_pad_inputs_with_attention_mask.<locals>.<genexpr>g  s2      ,o,oY^U3J-K-QRS-T,o,o,o,o,o,or(   )ro   image_patchesr   attention_maskro   r   r   r   rC   r   r   r   )r   itemsrF   r7   rE   r8   r   longrG   zeros	ones_liker   )r   r   r   max_length_input_idsmax_length_image_patch_indicesbatched_inputsr   keyr   num_padding_tokenspadded_input_idsr   num_padding_indicespadded_indicesbatched_keyss                  r)   $_left_pad_inputs_with_attention_maskz2FuyuProcessor._left_pad_inputs_with_attention_maske  s|   ""Y"YL"Y"Y"YYY),,o,obn,o,o,o)o)o&')BY[oqrr! "	? "	?E${{}} !? !?V+%%)=Q)O&',y!JQ9K'LdN_glgqrrr" ( ( ($ #3'../?@@@%*YV\!_6HPUPZ[[[]b]lms]t]tu& & &N ##34;;NKKKKO++"3'..v6666 +I6<XY?*Z'%*Y!J!'a2E FH^fkfp   #	 & & &N #3'..~>>>>C!?D $%<=  	2 0111 	H 	HC"')N3,?Q"G"G"GN3r(   c           	         t          j        ddd          }| j                            ||||||d          }	t	          | j        ||| j        | j        dd          \  }
}t          ||
|	d         d| j	                  }t          |t          j
        |
d          |	d         d| j	                  }t          d	 |D                       }t          || j        z   | j                  }t          |t          d
|d
         j        d
                             }t          |g|dd|d
          }t          j        d |	d         D                       }|d
                             d
          ||d}|S )Nr   T)image_inputimage_presentimage_unpadded_himage_unpadded_wimage_placeholder_idimage_newline_idvariable_sized)rp   r   r   r   r   r   r   image_input_ids)r>   r?   r@   r/   rA   r   image_patch_indices_per_batchc              3   0   K   | ]}|j         d          V  dS )r   Nr   r   s     r)   r   z4FuyuProcessor.get_sample_encoding.<locals>.<genexpr>  s(      RRRRRRRRr(   r   )r+   r,   r.   r/   r0   r1   c                     g | ]
}|d          S r   r'   r{   imgs     r)   r}   z5FuyuProcessor.get_sample_encoding.<locals>.<listcomp>  s    +a+a+asCF+a+a+ar(   r   )ro   r   r   )r7   onesr   preprocess_with_tokenizer_infor   rp   r   r   rN   subsequence_length	full_liker   r   rF   r=   stack	unsqueeze)r   r   r   image_unpadded_heightsimage_unpadded_widthsr  r  tensor_batch_imagesr   model_image_inputr   r   image_padded_unpacked_tokens&unpacked_image_patch_indices_per_batchmax_prompt_lengthmax_seq_len_batchr<   image_patch_input_indicesimage_patches_tensorbatch_encodings                       r)   get_sample_encodingz!FuyuProcessor.get_sample_encoding  s    
1a++ 0OO+'32!5- P 
 
 )On'#'#>$($@*.)
 )
 )
%~ (F!/&*+<="5(
 (
 (
$ 2P!/;;*+JK"52
 2
 2
.  RR5QRRRRR 1D4O OQUQmnn/Q8TUV8W8]^_8`1a1abb %C$3#4!G)%
 %
 %
!  %{+a+a>OP_>`+a+a+abb5a8BB1EE1%>
 

 r(   Nimagesrt   r   r2   r   c                 T   t          | dg           ||t          d          t          ||          \  }} | j        t          fd| j        j        i|}|d                             dd          st          d          |>|<t          	                    d	           | j        | _
         | j        |fi |d         }|S | |t          	                    d
           dgg}|=|;t          |t                    r|gg}n!t          |t                    rd |D             }d|d         d<    | j        j        |fi |d         }	|	d         }
|	d         }|	d         }|	d         }d| _        t#          |
          | _        |                     dd          d         d         }|                     dd          d         d         }t'          j        d |
D                                           d          }g }t-          |||||          D ]t\  }}}}}|                     |g|gt'          j        |g          t'          j        |g          |||                    d                    }|                    |           u|                     |d          }t7          |          S )a  
        Main method to prepare for the model one or several sequences(s) and image(s). This method forwards the `text`
        and `kwargs` arguments to LlamaTokenizerFast's [`~LlamaTokenizerFast.__call__`] if `text` is not `None` to
        encode the text. To prepare the image(s), this method forwards the `images` and `kwargs` arguments to
        FuyuImageProcessor's [`~FuyuImageProcessor.__call__`] if `images` is not `None`. Please refer to the doctsring
        of the above two methods for more information.

        Args:
            images (`PIL.Image.Image`, `List[PIL.Image.Image]`):
                The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
                tensor. Both channels-first and channels-last formats are supported.
            text (`str`, `List[str]`):
                The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
                (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
                `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).

        Returns:
            [`FuyuBatchEncoding`]: A [`FuyuBatchEncoding`] with the following fields:

            - **input_ids** -- Tensor of token ids to be fed to a model. Returned when `text` is not `None`.
            - **image_patches** -- List of Tensor of image patches. Returned when `images` is not `None`.
            - **image_patches_indices** -- Tensor of indices where patch embeddings have to be inserted by the model.
            - **attention_mask** -- List of indices specifying which tokens should be attended to by the model when
              `return_attention_mask=True`.
        r7   Nz?You have to specify either text or images. Both cannot be None.tokenizer_init_kwargsr!   r   Tz>`return_attention_mask=False` is not supported for this model.zMYou are processing a text with no associated image. Make sure it is intended.zNYou are processing an image with no associated text. Make sure it is intended. c                     g | ]}|gS r'   r'   )r{   text_seqs     r)   r}   z*FuyuProcessor.__call__.<locals>.<listcomp>  s    ;;;(H:;;;r(   ptr"   return_tensorsr  r  r  image_scale_factorsr   z	|SPEAKER|Frl   ro   z	|NEWLINE|c                     g | ]
}|d          S r	  r'   r
  s     r)   r}   z*FuyuProcessor.__call__.<locals>.<listcomp>)  s    *J*J*Jc3q6*J*J*Jr(   r   )r   r   r  r  r  r  r  )r   r   )data)r   r   r   _merge_kwargsr   rp   init_kwargs
setdefaultr   r   current_processor
isinstancer   listr   
preprocessr  r6   r/   r7   r  r  r   r  r   rG   r   r   )r   r  rt   audiovideosr   output_kwargstext_encodingr   image_encodingbatch_imagesr  r  r   r  r  r  all_encodingsrO   rj   image_unpadded_heightimage_unpadded_widthtensor_batch_imagesample_encodingr  s                            r)   __call__zFuyuProcessor.__call__  s=   B 	$	*** <FN^___8FF**
 
"&."<
 
 
 ]+667NPTUU 	_]^^^NNjkkk%)^D"*DN4PP=3OPPM  <F.NNkllltfG 2$$$ < 6(D$'' <;;d;;;
 <@o&'788-8bb=Q`Cabb%h/!/0H!I ./F G&'<="#l++  $~~ke~TTU`abcd>>+%>PPQ\]^_`#k*J*J\*J*J*JKKUUVWXX eh]$:<QSff
 f
 	2 	2aFL"79MOa #66+n',|5J4K'L'L&+l4H3I&J&J%9!1$6$@$@$C$C 7  O   1111BB&d C 
 
  ^4444r(   c                    	 d fd		 fd	 fd}	 fd}|6 j         j        d          j         j        d         fft          |          z  }n |j        d         d	k    rt	          d
          t          |          t          |          k    rt	          d          g }t          ||          D ]2\  }} |||          } |||          }|                    |           3|S )a  
        Transforms raw coordinates detected by [`FuyuForCausalLM`] to the original images' coordinate space.
        Coordinates will be returned in "box" format, with the following pattern:
            `<box>top, left, bottom, right</box>`

        Point coordinates are not supported yet.

        Args:
            outputs ([`GenerateOutput`]):
                Raw outputs from `generate`.
            target_sizes (`torch.Tensor`, *optional*):
                Tensor of shape (batch_size, 2) where each entry is the (height, width) of the corresponding image in
                the batch. If set, found coordinates in the output sequence are rescaled to the target sizes. If left
                to None, coordinates will not be rescaled.

        Returns:
            `GenerateOutput`: Same output type returned by `generate`, with output token ids replaced with
                boxed and possible rescaled coordinates.
        Nc                     | \  }}|%j         j        d         }j         j        d         }n|\  }}||k    r||k    rdS t          ||z  ||z            S )Nheightwidthg      ?)r   sizer   )original_sizetarget_sizer>  r?  
max_height	max_widthr   s         r)   scale_factor_to_fitzGFuyuProcessor.post_process_box_coordinates.<locals>.scale_factor_to_fitU  sr    )MFE"!16x@
 05g>		(3%
I	!!f
&:&:szF*I,=>>>r(   c                 `   j                             |          }j                             |          }| |k                        d          d         }| |k                        d          d         }t          j        |          r$t          j        |          r|d         |d         fS dS )NT)as_tupler   NN)rp   convert_tokens_to_idsnonzeror7   any)r   start_token	end_tokenstart_idend_idstarting_positionsending_positionsr   s          r)   find_delimiters_pairzHFuyuProcessor.post_process_box_coordinates.<locals>.find_delimiters_pair`  s    ~;;KHHH^99)DDF"(H"4!=!=t!=!L!LQ!O && 09949HHKy+,, D;K1L1L D*1-/?/BCC<r(   c           
      z    | t           t                    x}dk    r|\  }}||dz   k    r-j                            | |dz   |                   } |          fd|D             \  }}}}	dt           | d| d| d|	 t
           
}
j                            |
          dd          }
j                            |
          }
t          j	        |
          
                    |           }
t          j        | d |         |
| |dz   d          gd          }  | t           t                    x}dk    | S )NrH     r   c                 T    g | ]$}d t          t          |          z            z  %S rw   intry   r{   cscales     r)   r}   zWFuyuProcessor.post_process_box_coordinates.<locals>.tokens_to_boxes.<locals>.<listcomp>y  s1    +V+V+V!AE!HHu4D0E0E,E+V+V+Vr(    , r   )rX   rZ   rp   convert_ids_to_tokensrW   rY   r   rI  r7   r   torE   )r   rA  pairstartendcoordsr   r   r   r   replacementr[  rR  rE  r   s              @r)   tokens_to_boxeszCFuyuProcessor.post_process_box_coordinates.<locals>.tokens_to_boxesk  s   //8NPghhh4 n   "
s%!)## ==fUQYQT_>UVV ,+M::+V+V+V+Vv+V+V+V(T65 n"5msmmdmmfmmPUmWkmm"n55kBB122F"nBB;OO#l;77::6BBF6E6NKa		AR#SUVWW+ 0/8NPghhh4 n  , Mr(   c                 j  	  
| t           t                    x}dk    r|\  }}||dz   k    r-j                            | |dz   |                   } |          		fd|D             \  }}dt           | d| t
           }j                            |          dd          }j                            |          }t          j	        |          
                    |           }t          j        | d |         || |dz   d          gd          }  
| t           t                    x}dk    | S )NrH  r   r   c                 T    g | ]$}d t          t          |          z            z  %S rV  rW  rY  s     r)   r}   zXFuyuProcessor.post_process_box_coordinates.<locals>.tokens_to_points.<locals>.<listcomp>  s1    BBBaCa5 0111BBBr(   r\  r]  r   )rT   rV   rp   r^  rS   rU   r   rI  r7   r   r_  rE   )r   rA  r`  ra  rb  rc  r~   r   rd  r[  rR  rE  r   s            @r)   tokens_to_pointszDFuyuProcessor.post_process_box_coordinates.<locals>.tokens_to_points  sr   //8OQijjj4 p   "
s%!)## ==fUQYQT_>UVV ,+M::BBBB6BBB1 X"6WWWQW@UWW"n55kBB122F"nBB;OO#l;77::6BBF6E6NKa		AR#SUVWW+ 0/8OQijjj4 p  , Mr(   r>  r?  r   rw   zTEach element of target_sizes must contain the size (h, w) of each image of the batchzCMake sure that you pass in as many target sizes as output sequencesrQ   )r   r@  r6   rF   r   r   rG   )
r   outputstarget_sizesre  rh  resultsseqr@  rR  rE  s
   `       @@r)   post_process_box_coordinatesz*FuyuProcessor.post_process_box_coordinates@  sh   *		? 		? 		? 		? 		? 		?		  		  		  		  		 	 	 	 	 	 	 	2	 	 	 	 	 	 	2 !16x@$BVB[\cBdegjmnujvjvvLL"a''stttw<<3|,,,,bcccWl33 	  	 IC!/#t,,C""3--CNN3r(   c                 &     | j         j        |i |S )z
        This method forwards all its arguments to LlamaTokenizerFast's [`~PreTrainedTokenizer.batch_decode`]. Please
        refer to the docstring of this method for more information.
        )rp   batch_decoder   argsr   s      r)   ro  zFuyuProcessor.batch_decode  s    
 +t~*D;F;;;r(   c                 &     | j         j        |i |S )z
        This method forwards all its arguments to LlamaTokenizerFast's [`~PreTrainedTokenizer.decode`]. Please refer to
        the docstring of this method for more information.
        )rp   decoderp  s      r)   rs  zFuyuProcessor.decode  s    
 %t~$d5f555r(   )NNNNrQ   )r#   r$   r%   __doc__
attributesvalid_kwargsimage_processor_classtokenizer_classr   r   r   boolr   r  r	   r   r   r   r   r   r   r   r;  rm  ro  rs  __classcell__)r   s   @r)   r   r   I  sE         $[1JL0%O$ $ $ $ $/d /dh / / / /b@ @ @H "NRf5 f5f5 uS$s)Y8IIJKf5 ,-f5 
f5 f5 f5 f5Pk k k kZ< < <6 6 6 6 6 6 6r(   r   )8rt  r`   typingr   r   r   r   r   numpyr   image_utilsr	   processing_utilsr
   r   r   r   tokenization_utils_baser   r   utilsr   r   r   image_processing_fuyur   
get_loggerr#   r   r7   rW   rY   rS   rU   rX   rZ   rT   rV   r   r   rX  r=   rN   r   r[   ri   ry   rs   rm   ry  r   r   r   r   r   r   r'   r(   r)   <module>r     s]    
			 5 5 5 5 5 5 5 5 5 5 5 5 5 5     % % % % % % k k k k k k k k k k k k C C C C C C C C C C C C C C C C C C  8777777 
	H	%	%  LLL     " ! " " # %     *%    $ I~.  	
      >T#Y ?@  tN+, 	
  
.   8          2% %5 %X\]`Xa % % % %@ K  KE  Kc  K  K  K  KFF8$s)_F8 Dn!567F8  	F8
 !F8 F8 $(F8 )*F8 F8 F8 F8T@ @ @
@ @ @   %  u  QUVYQZ        B	BB%*B38BHMB	#YB B B Bp6 p6 p6 p6 p6N p6 p6 p6 p6 p6r(   