
    gDx              
          d Z ddlZddlZddlZddlmZmZmZmZ ddl	m
Z
 ddlmZmZ ddlmZmZmZmZmZ ddlmZ dd	lmZmZ eeeeef                  eeeeeef                  eeeeef                           eeeeeef                           f         Z G d
 ded          Z G d ded          Z G d ded          Z G d de          Zdeeeeef         dedeeef         fdZ dededefdZ!d Z"d Z#d Z$d dZ%dS )!zProcessor class for KOSMOS-2.    N)ListOptionalTupleUnion   )BatchFeature)
ImageInput
is_batched)ImagesKwargsProcessingKwargsProcessorMixin
TextKwargsUnpack)
AddedToken)BatchEncoding	TextInputc                   ^    e Zd ZU eee                  ed<   ee         ed<   ee         ed<   dS )Kosmos2ImagesKwargsbboxesnum_image_tokensfirst_image_token_idN)__name__
__module____qualname__r   r   float__annotations__int     j/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/models/kosmos2/processing_kosmos2.pyr   r   %   sG         T%[!!!!sm###"3-'''''r   r   F)totalc                   &    e Zd ZU ee         ed<   dS )Kosmos2TextKwargsadd_eos_tokenN)r   r   r   r   boolr   r   r   r    r#   r#   +   s"         D>!!!!!r   r#   c            
       F    e Zd ZU eed<   eed<   dddddddddd	ddid	Zd
S )Kosmos2ProcessorKwargstext_kwargsimages_kwargsTFr   )	add_special_tokenspaddingstridereturn_overflowing_tokensreturn_special_tokens_maskreturn_offsets_mappingreturn_token_type_idsverboser$   r   @   )r(   r)   N)r   r   r   r#   r   r   	_defaultsr   r   r    r'   r'   /   sg         """"&&&& #').*/&+%*"

 

 
 IIIr   r'   c                       e Zd ZdZddgZdgZdZdZd fd	Z	 	 	 	 dd
e	de
eee         f         dee         defdZd Zd Z	 	 	 d de
eee         f         d
e	dedee         de
eee         f         f
dZd Zd Zd!dZed             Zdede
eee                  eee                  f         defdZde
eeef         eeeeef         f         deeef         fdZ  xZ!S )"Kosmos2Processora,  
    Constructs an KOSMOS-2 processor which wraps a KOSMOS-2 image processor and a KOSMOS-2 tokenizer into a single
    processor.

    [`Kosmos2Processor`] offers all the functionalities of [`CLIPImageProcessor`] and some functionalities of
    [`XLMRobertaTokenizerFast`]. See the docstring of [`~Kosmos2Processor.__call__`] and [`~Kosmos2Processor.decode`]
    for more information.

    Args:
        image_processor (`CLIPImageProcessor`):
            An instance of [`CLIPImageProcessor`]. The image processor is a required input.
        tokenizer (`XLMRobertaTokenizerFast`):
            An instance of ['XLMRobertaTokenizerFast`]. The tokenizer is a required input.
        num_patch_index_tokens (`int`, *optional*, defaults to 1024):
            The number of tokens that represent patch indices.
    image_processor	tokenizernum_patch_index_tokensCLIPImageProcessorAutoTokenizer   c                 d   d|_         d| _        d| _        d| _        d| _        d| _        d| _        d| _        d	| _        d
| _	        d| _
        d| _        | j        | j        | j        | j        | j        | j        | j        | j        | j	        | j
        | j        g| _        || _        d t          | j                  D             }g }| j        |z   D ](}|                    t!          |ddd                     )|                    |           t%                                          ||           d S )NFz</doc>z<image>z</image>z</chunk>z</line>z<phrase>z	</phrase>z<object>z	</object></delimiter_of_multi_objects/>z<grounding>c                 Z    g | ](}d t          |                              d           d)S )<patch_index_   >)strzfill.0xs     r    
<listcomp>z-Kosmos2Processor.__init__.<locals>.<listcomp>   s4    mmmQ@c!ffll1oo@@@mmmr   T)lstriprstrip
normalized)r0   	eod_token	boi_token	eoi_token	eoc_token	eol_token	bop_token	eop_token	boo_token	eoo_token	dom_token	grd_token
tag_tokensr8   rangeappendr   
add_tokenssuper__init__)	selfr6   r7   r8   kwargspatch_index_tokenstokens_to_addtoken	__class__s	           r    r[   zKosmos2Processor.__init__[   s?   */	'!"##"#$#$9& NNNNNNNNNNN
 '=#mm%PTPkJlJlmmm_'99 	a 	aE  E$uY^!_!_!_````]+++)44444r   Nimagestextr]   returnc                 	    ||t          d            j        t          fd j        j        i|}|d                             dd          }|d                             dd          }|d                             dd          }	|d	                             d
d          }
|d	         d         }|d	         d         }|d	                             dd          }t                      }|)  j        |fi |d         }|	                    |           |ǉ 
                    ||||          }|rK|
sIt          |t                    r j        j         | }n#t          |t                    r fd|D             }|d	         d         o|
|d	         d<   ||nd|d	         d<   ||nd|d	         d<     j        d$d|i|d	         }|	                    |           ||d	         d<   ||d	         d<   ||d	         d<   |||	 j        j        dz   }	|}t!          |          dz   }t          t#          |	|	|z                       }dgdg|z  z   dgz   }g }g }|d         }t          |t                    r|g}|d         g|d<   |D ]}|d|         |z   |||z   d         z   }|                    |           t'          j        |          }|rdg|z   }|dgt)          |          t)          |          z
  z  z  }|                    |           t          |t                    r-t+          d t-          |j                  D             d           }|d         \  }}|d         \  }}|d	         d         o|
|d	         d<   d|d	         d<     j        d$d||         gi|d	         }t)          |j        d                   |k    r j        j        dk    r5 fd|D             }fd|D             }fd|d         D             |d<   nD j        j        dk    r4 fd|D             }fd |D             }fd!|d         D             |d<   t          |t                    r#|!|d         }|d         d         |d<   |d         }|	                    t3          ||d         |d"|#                     |S )%a	  
        This method uses [`CLIPImageProcessor.__call__`] method to prepare image(s) for the model, and
        [`XLMRobertaTokenizerFast.__call__`] to prepare text for the model.

        Please refer to the docstring of the above two methods for more information.

        The rest of this documentation shows the arguments specific to `Kosmos2Processor`.

        Args:
            bboxes (`Union[List[Tuple[int]], List[Tuple[float]], List[List[Tuple[int]]], List[List[Tuple[float]]]]`, *optional*):
                The bounding bboxes associated to `texts`.
            num_image_tokens (`int`, *optional* defaults to 64):
                The number of (consecutive) places that are used to mark the placeholders to store image information.
                This should be the same as `latent_query_num` in the instance of `Kosmos2Config` you are using.
            first_image_token_id (`int`, *optional*):
                The token id that will be used for the first place of the subsequence that is reserved to store image
                information. If unset, will default to `self.tokenizer.unk_token_id + 1`.
            add_eos_token (`bool`, defaults to `False`):
                Whether or not to include `EOS` token id in the encoding when `add_special_tokens=True`.
        Nz*You have to specify either images or text.tokenizer_init_kwargsr)   r   r   r2   r   r(   r$   Fr*   r+   return_tensors)r   c                 0    g | ]}j         j         | S r   )r7   	bos_token)rE   sr\   s     r    rG   z-Kosmos2Processor.__call__.<locals>.<listcomp>   s)    KKKt~7<<<KKKr   rc      r   	input_idsattention_maskc                 6    g | ]\  }}|t          |          fS r   len)rE   idxrF   s      r    rG   z-Kosmos2Processor.__call__.<locals>.<listcomp>   s&    TTTvsAc3q66]TTTr   c                     | d         S Nr   )rF   s    r    <lambda>z+Kosmos2Processor.__call__.<locals>.<lambda>   s    defhdi r   )keyrt   rightc                 V    g | ]%}|j         j        gt          |          z
  z  z   &S r   r7   pad_token_idrp   rE   rF   max_len_paddedr\   s     r    rG   z-Kosmos2Processor.__call__.<locals>.<listcomp>  s;    $v$v$vghQ$.*E)F.[^_`[a[aJa)b%b$v$v$vr   c                 B    g | ]}|d gt          |          z
  z  z   S r   ro   rE   rF   r|   s     r    rG   z-Kosmos2Processor.__call__.<locals>.<listcomp>  =     6 6 6DEA~A'> ??6 6 6r   c                 B    g | ]}|d gt          |          z
  z  z   S r~   ro   r   s     r    rG   z-Kosmos2Processor.__call__.<locals>.<listcomp>
  r   r   leftc                 V    g | ]%}j         j        gt          |          z
  z  |z   &S r   ry   r{   s     r    rG   z-Kosmos2Processor.__call__.<locals>.<listcomp>  s<    $v$v$vghdn&A%BnWZ[\W]W]F]%^ab%b$v$v$vr   c                 B    g | ]}d gt          |          z
  z  |z   S r~   ro   r   s     r    rG   z-Kosmos2Processor.__call__.<locals>.<listcomp>  =     6 6 6DEQC>CFF#:;a?6 6 6r   c                 B    g | ]}d gt          |          z
  z  |z   S r~   ro   r   s     r    rG   z-Kosmos2Processor.__call__.<locals>.<listcomp>  r   r   )rl   rm   image_embeds_position_mask)datatensor_typer   )
ValueError_merge_kwargsr'   r7   init_kwargspop
setdefaultr   r6   updatepreprocess_examples
isinstancerB   ri   listunk_token_idr   rW   rX   copyrp   sorted	enumeraterl   padding_sider   )r\   rb   rc   audiovideosr]   output_kwargsr   r   r   r$   r*   r+   rg   encodingimage_encodingtext_encodingwith_bosstart_indeximage_token_idsbase_image_embeds_position_maskrl   r   all_input_idstext_idsmasksorted_length_min_len_not_paddedrq   r|   s   `                             @r    __call__zKosmos2Processor.__call__   sx   8 >dlIJJJ**"
 
"&."<
 
 
 /33HdCC(9==>PRTUU,_=AABXZ^__%m488%PP*=9:NO.y9&}5@@AQSWXX>>1T1&[[M/<Z[[NOON+++++D&&Sc+ddD! L- LdC(( L"n6>>>DDd++ LKKKKdKKKDm,-ABT} -()=> BHggUZM-(3OU~^^cgM-()9:*DNUUUm8TUUMOOM***=Om$%9:29m$Y/9Gm$%56 2#+'+~'BQ'F$ *H h--!+K #5)=?SVf?f#g#ghhO/0cQC:J4J.JaS.P+ I)+&$[1M$$$ J!..67G.H-I)*) 8 8#L[L1OCh{]mOmOoOoFpp  ***y!@AA &3:Ds8}}s4yy899*11$7777$%%  &TT=;R1S1STTTZiZi! ! ! )6a(8%%&r*Q!-01EFX= m,-AB BFm,-=> . ` `T#YK `=Q^C_ ` `!$]%<Q%?!@!@%77~2g==$v$v$v$v$vlu$v$v$v	6 6 6 6Ic6 6 626 6 6 6IQRbIc6 6 6!122 4>>$v$v$v$v$vlu$v$v$v	6 6 6 6Ic6 6 626 6 6 6IQRbIc6 6 6!12
 $$$ K)?%aL	-56F-G-J)*-G-J* OO%.*23C*D6P 
 !/  	 	 	 r   c                    |dS t          |t                    st          d          |D ]}|t          |t                    s|g}|D ]~}t          |t                    rXt	          |          dk    rt          d |D                       s;t	          |          dk    rt          d |D                       st          d          dS )a  
        Check `bboxes` for a single text example. It could be
            - `None`: no bounding box associated to a text.
            - A list with each element being the bounding boxes associated to one `<phrase> ... </phrase>` pair found
              in a text. This could be:
                  - `None`: no bounding box associated to a `<phrase> ... </phrase>` pair.
                  - A tuple of 2 integers: A single bounding box specified by patch indices.
                  - A tuple of 4 float point number: A single bounding box specified by (normalized) coordinates.
                  - A list containing the above 2 tuple types: Multiple bounding boxes for a
                   `<phrase> ... </phrase>` pair.
        Nz@`bboxes` (for a single text example) should be `None` or a list.   c              3   @   K   | ]}t          |t                    V  d S N)r   r   rD   s     r    	<genexpr>zAKosmos2Processor._check_bboxes_for_single_text.<locals>.<genexpr>C  s,      .S.Saz!S/A/A.S.S.S.S.S.Sr   r@   c              3   @   K   | ]}t          |t                    V  d S r   )r   r   rD   s     r    r   zAKosmos2Processor._check_bboxes_for_single_text.<locals>.<genexpr>D  s,      1X1X1*Q2F2F1X1X1X1X1X1Xr   a'  Each element in `bboxes` (for a single text example) should be either `None`, a tuple containing 2 integers or 4 float point numbers, or a list containing such tuples. Also make sure the arguments `texts` and `bboxes` passed to `preprocess_text` are both in batches or both for a single example.)r   r   r   tuplerp   all)r\   r   bboxelements       r    _check_bboxes_for_single_textz.Kosmos2Processor._check_bboxes_for_single_text*  s    >FFD)) 	a_```  	 	D|d++ v 
 
!'511 	\\Q&&3.S.S7.S.S.S+S+S&G))c1X1XPW1X1X1X.X.X)$@  
	 	r   c                 l    |                                 }|| d| }|                     ||          }|S )N )strip_insert_patch_index_tokens)r\   rc   imager   img_info_tokenss        r    _preprocess_single_examplez+Kosmos2Processor._preprocess_single_exampleM  sD    zz||%....D ..tV<<r   r2   textsr   r   c                 d   	  j         g|z  }d                     j         g|z    j        gz             	d}t          |t                    rd}|g}|dgt          |          z  }nt          |          s|g}t          |          t          |          k    r0t          dt          |           dt          |           d          |s                     |           |g}nT|?t          |t                    st          d          |D ]}                     |           ndgt          |          z  }t          |          t          |          k    r0t          d	t          |           dt          |           d          	 fd
t          |||          D             }|s|d         }|S )a-  Add image and bounding box information to `texts` as image and patch index tokens.

        Args:
            texts (`Union[TextInput, List[TextInput]]`): The texts to be processed.
            images (`ImageInput`, *optional*): The images associated to `texts`.
            bboxes (`Union[List[Tuple[int]], List[Tuple[float]], List[List[Tuple[int]]], List[List[Tuple[float]]]]`, *optional*):
                The bounding bboxes associated to `texts`.
            num_image_tokens (`int`, *optional*, defaults to 64):
                The number of image tokens (used as latent queries). This should corresponds to the `latent_query_num`
                attribute in `Kosmos2Config`.

        Returns:
            `Union[TextInput, List[TextInput]]`: The processed texts with image and patch index tokens.
        r   TFNzGThe number of examples in `texts` and `images` should be the same. Got  v.s. 	 instead.zS`bboxes` should be `None` or a list (as a batch) when `texts` is passed as a batch.zGThe number of examples in `texts` and `bboxes` should be the same. Got c                 H    g | ]\  }}}                     |||          S r   )r   )rE   rc   r   r   r   r\   s       r    rG   z8Kosmos2Processor.preprocess_examples.<locals>.<listcomp>  sC     
 
 
!eT ++D%OO
 
 
r   r   )rL   joinrM   r   rB   rp   r
   r   r   r   zip)
r\   r   rb   r   r   
img_tokensbatchedrF   resultr   s
   `        @r    r   z$Kosmos2Processor.preprocess_examplesW  s9   , n%(88
((DN#3j#@DNCS#STT eS!! 	GGE>Vc%jj(FFF## 	XFu::V$$ CZ]^cZdZd  C  Clopvlwlw  C  C  C    		)..v666XFFfd++ x !vwww 6 622155556 Vc%jj(Fv;;#e**$$ CZ]^cZdZd  C  Clopvlwlw  C  C  C  
 
 
 
 
%(%?%?
 
 

  	AYFr   c                 &     | j         j        |i |S )z
        This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.batch_decode`]. Please
        refer to the docstring of this method for more information.
        )r7   batch_decoder\   argsr]   s      r    r   zKosmos2Processor.batch_decode  s    
 +t~*D;F;;;r   c                 &     | j         j        |i |S )z
        This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.decode`]. Please refer to
        the docstring of this method for more information.
        )r7   decoder   s      r    r   zKosmos2Processor.decode  s    
 %t~$d5f555r   Tc                 h    |                     | j                  d         }|rt          |          S |S rs   )splitrM   +clean_text_and_extract_entities_with_bboxes)r\   rc   cleanup_and_extractcaptions       r    post_process_generationz(Kosmos2Processor.post_process_generation  s6    **T^,,R0 	H>wGGGr   c                     | j         j        }| j        j        }t          t                              ||z                       S r   )r7   model_input_namesr6   r   dictfromkeys)r\   tokenizer_input_namesimage_processor_input_namess      r    r   z"Kosmos2Processor.model_input_names  s<     !% @&*&:&L#DMM"7:U"UVVWWWr   c                    |t          |          dk    r|S t          t          j        d|                    }t          |          t          |          k    r0t	          dt          |           dt          |           d          d}g }t          ||          D ]\  }}|                                \  }}	|                    |||	                    |	}|>t          |t                    r|g}g }
t          d |D                       st	          d          |D ]4}|                     |          \  }}|
                    | d	|            5t          |
          dk    rd
                    |
          }|                    d| d           |t          |          k     r|                    ||d                     d                    |          }|S )Nr   z<phrase>.+?</phrase>)stringzuThe number of elements in `bboxes` should be the same as the number of `<phrase> ... </phrase>` pairs in `text`. Got r   r   c              3      K   | ]}|d uV  	d S r   r   )rE   boxs     r    r   z>Kosmos2Processor._insert_patch_index_tokens.<locals>.<genexpr>  s&      773s$777777r   zTThe multiple bounding boxes for a single phrase should not contain any `None` value.r   z  </delimiter_of_multi_objects/> z	<object> z
 </object> )rp   r   refinditerr   r   spanrX   r   r   r   #_convert_bbox_to_patch_index_tokensr   )r\   rc   r   matched_phrasescurr_posbuffermatchedr   r   endpatch_index_stringsr   patch_index_1patch_index_2position_strs                  r    r   z+Kosmos2Processor._insert_patch_index_tokens  sS   >S[[A--Kr{+B4PPPQQ3v;;.. {  IL  M\  I]  I]  {  {  eh  io  ep  ep  {  {  {    &99 	@ 	@MGT\\^^FAsMM$x|,---H|$&& v"$77$77777  j    O O/3/W/WX[/\/\,}#**m+M+Mm+M+MNNNN&''1,,=BBCVWWLMM>l>>>????c$iiMM$xyy/***wwvr   r   c                 F   t          |          dk    r|\  }}n9t          t          j        | j                            }t          ||          \  }}dt          |                              d           d}dt          |                              d           d}||fS )Nr   r?   r@   rA   )rp   r   mathsqrtr8   coordinate_to_patch_indexrB   rC   )r\   r   idx_1idx_2num_patches_per_sidetoken_1token_2s          r    r   z4Kosmos2Processor._convert_bbox_to_patch_index_tokens  s     t99>>LE55 $'ty1L'M'M#N#N 4T;OPPLE58#e**"2"21"5"58888#e**"2"21"5"5888r   )r;   )NNNN)NNr2   )T)"r   r   r   __doc__
attributesvalid_kwargsimage_processor_classtokenizer_classr[   r	   r   r   r   r   r'   r   r   r   r   	BboxInputr   r   rB   r   r   r   r   propertyr   r   r   r   r   __classcell__)ra   s   @r    r5   r5   D   sV        " $[1J,-L0%O+5 +5 +5 +5 +5 +5^ "26` `` ItI./` /0` 
` ` ` `D! ! !F   " *,@ @YY/0@ @ 	@
 #3-@ 
sDI~	@ @ @ @F< < <6 6 6    X X XX
+s +E$uSzBRTXY^_dYeTfBf<g +lo + + + +Z %S/5ue1K+LLM 	sCx               r   r5   r   r   rd   c                 2   | \  }}}}||k    r||k    st          d          t          j        ||z            }t          j        ||z            }t          j        ||z  dz
            }t          j        ||z  dz
            }	||z  |z   }
|	|z  |z   }|
|fS )a  Convert a bounding box to a pair of patch indices.

    Args:
        bbox (`Tuple[float, float, float, float]`):
            The 4 coordinates of the bounding box, with the format being (x1, y1, x2, y2) specifying the upper-left and
            lower-right corners of the box. It should have x2 > x1 and y2 > y1.
        num_patches_per_side (`int`): the number of patches along each side.

    Returns:
        `Tuple[int, int]`: A pair of patch indices representing the upper-left patch and lower-right patch.
    zTThe coordinates in `bbox` should be `(x1, y1, x2, y2)` with `x2 > x1` and `y2 > y1`.rk   )r   r   floorceil)r   r   x1y1x2y2ul_xul_ylr_xlr_yul_idxlr_idxs               r    r   r     s     RRGGRoppp:b//00D:b//00D9R..233D9R..233D((4/F((4/F6>r   r  r  c                 (   d|z  }| |z  }| |z  }||z  }||z  }| |k    r||z  }||z  }	||z  |z   }
||z  |z   }nS||k    s||k    r||z  }||z  }	||z  |z   }
||z  |z   }n,||z  |dz  z   }||z  |dz  z   }	||z  |dz  z   }
||z  |dz  z   }||	|
|fS )a  
    Given a grid of length `num_patches_per_side` and the indices of the upper-left and lower-right corners of a
    bounding box, returns the normalized coordinates of the bounding box, in the form (x1, y1, x2, y2).

    Args:
        ul_idx (`int`): the index of the grid cell that corresponds to the upper-left corner of the bounding box.
        lr_idx (`int`): the index of the grid cell that corresponds to the lower-right corner of the bounding box.
        num_patches_per_side (`int`): the number of patches along each side.

    Returns:
        `Tuple[float]`: the normalized coordinates of the bounding box, in the form (x1, y1, x2, y2).
    g      ?r   r   )r  r  r   	cell_sizer  r  r  r  r   r   r   r   s               r    patch_index_to_coordinater	    s    **I ((D))D((D))D III	)I	)	III	)I	)I	A-I	A-I	A-I	A-r2r>r   c           
         d}t          j        ||           }g }|D ]}|                    d          }|                                \  }}}|s8d}|                    d          d         |                    d          d         f}|                    d          }	g }
|	D ]}t          j        d|          }t          j        d|dd                   }|r|r|rX|
                    t          |                    d                    t          |                    d                    f           |
                    t          |                    d                    t          |                    d                    f           |r|                    |||
f           |
D ]0}d|d          d	|d          d
}|                    |||gf           1|S )a  Extract entities contained in `text`. The bounding bboxes is given in the form of patch indices.

    This functioin is only intended to be used within `clean_text_and_extract_entities_with_bboxes` where further
    processing happens, including converting to normalized coordinates and whitespace character cleaning up.

    Examples:

    ```python
    >>> text = "<grounding> An image of<phrase> a snowman</phrase><object><patch_index_0044><patch_index_0863></object> warming himself by<phrase> a fire</phrase><object><patch_index_0005><patch_index_0911></object>."
    >>> entities = extract_entities_with_patch_indices(text)
    >>> entities
    [(' a snowman', (31, 41), [(44, 863)]), (' a fire', (130, 137), [(5, 911)])]
    ```z(?:(<phrase>([^<]+)</phrase>))?<object>((?:<patch_index_\d+><patch_index_\d+></delimiter_of_multi_objects/>)*<patch_index_\d+><patch_index_\d+>)</object>r   Nr   r=   z<patch_index_(\d+)>rk   r?   z><patch_index_rA   )	r   r   r   groupsr   searchrX   r   group)rc   patternmatchesentities_with_patch_indicesmatchr   
phrase_tagphrasematch_contentpatch_index_pairsentity_bboxespairrF   yr   entitys                   r    #extract_entities_with_patch_indicesr  A  s    kG k'4((G #% K Kzz!}},1LLNN)
FM 	8FJJqMM!$ejjmmA&67D *//0PQQ% 		M 		MD	0$77A	0$qrr(;;A MQ M M!((#aggajj//3qwwqzz??)KLLLL!((#aggajj//3qwwqzz??)KLLL 	K'..m/LMMMM% K KJaJJQJJJ+22FD4&3IJJJJK
 '&r   c           	          | \  }\  }}t          t          j        dd|d|                             }t          t          j        dd|d|                             }|||ff}|S )zfAdjust the positions of the entities in `text` to be relative to the text with special fields removed.<.*?>r   N)rp   r   sub)r  rc   entity_namestartr   adjusted_startadjusted_endadjusted_entitys           r    adjust_entity_positionsr#  {  sk     &K%T&5&\::;;Nrvgr4:6677L"^\$BCOr   c                    |                                  }t          |           t          |                                           z
  }g }|D ]\  }\  }}}t          |          t          |                                          z
  }	t          |          t          |                                          z
  }
||z
  |	z   }||z
  |
z
  }|                                 }|                    |||f|f           ||fS )z9Remove the spaces around the text and the entities in it.)r   rp   rH   rI   rX   )rc   entitiesnew_textleading_spacesnew_entitiesr  r  r   r   entity_name_leading_spacesentity_name_trailing_spacess              r    _cleanup_spacesr+    s    zz||HYYT[[]]!3!33NL-5 A A)\eS6%(%5%5K<N<N<P<P8Q8Q%Q"&)+&6&6[=O=O=Q=Q9R9R&R#&)CCN"%@@!''))[5#,?@@@@\!!r       c                    t          j        dd|           }t          |           }g }|D ]K}|dd         |d         }}t          ||           }fd|D             }	|                    ||	fz              Lt          ||          S )a  Remove the tag tokens from `text`, extract entities in it with some cleaning up of white characters.

    Examples:

    ```python
    >>> text = "<grounding> An image of<phrase> a snowman</phrase><object><patch_index_0044><patch_index_0863></object> warming himself by<phrase> a fire</phrase><object><patch_index_0005><patch_index_0911></object>."
    >>> clean_text, entities = clean_text_and_extract_entities_with_bboxes(text)
    >>> clean_text
    'An image of a snowman warming himself by a fire.'

    >>> entities
    [('a snowman', (12, 21), [(0.390625, 0.046875, 0.984375, 0.828125)]), ('a fire', (41, 47), [(0.171875, 0.015625, 0.484375, 0.890625)])]
    ```r  r   r   r   c                 J    g | ]}t          |d          |d                    S )r   rk   )r	  )rE   r   r   s     r    rG   z?clean_text_and_extract_entities_with_bboxes.<locals>.<listcomp>  s0    qqqbf5d1gtAwH\]]qqqr   )r   r  r  r#  rX   r+  )
rc   r   processed_textr  r%  itemr  r   r"  bboxes_in_coordss
    `        r    r   r     s     VGR..N"Ed"K"KH+ ? ?acDG1&$??qqqqjpqqq+;*==>>>>>8444r   )r,  )&r   r   r   r   typingr   r   r   r   image_processing_utilsr   image_utilsr	   r
   processing_utilsr   r   r   r   r   tokenization_utilsr   tokenization_utils_baser   r   r   r   r   r   r#   r'   r5   r   r	  r  r#  r+  r   r   r   r    <module>r8     s   $ #   				 / / / / / / / / / / / / 2 2 2 2 2 2 1 1 1 1 1 1 1 1 b b b b b b b b b b b b b b , , , , , , ? ? ? ? ? ? ? ? sCxueUE)	*+eCHo	eE5%'(	)*,	( ( ( ( (,e ( ( ( (" " " " "
% " " " "    -U    *n  n  n  n  n ~ n  n  n bE%u*D$E ]` ejknpskset    >(c (3 (c ( ( ( (Z7' 7' 7't  " " "*5 5 5 5 5 5r   