
    gI)                         d Z ddlmZmZmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZmZmZmZ ddlmZmZ  ej        e          Z G d	 d
ed          Z G d ded          Z G d de          ZdS )z
Processor class for UDOP.
    )ListOptionalUnion)logging   )BatchFeature)
ImageInput)ProcessingKwargsProcessorMixin
TextKwargsUnpack)PreTokenizedInput	TextInputc                       e Zd ZU eeee         eee                  f                  ed<   eeee                  eeee                           f         ed<   dS )UdopTextKwargsword_labelsboxesN)__name__
__module____qualname__r   r   r   int__annotations__     d/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/models/udop/processing_udop.pyr   r       sd         %S	4S	? :;<<<<d3i$tDI"77888888r   r   F)totalc            
       8    e Zd ZU eed<   dddddddddd	i dZdS )UdopProcessorKwargstext_kwargsTFr   )	add_special_tokenspadding
truncationstridereturn_overflowing_tokensreturn_special_tokens_maskreturn_offsets_mappingreturn_lengthverbose)r   images_kwargsN)r   r   r   r   r   	_defaultsr   r   r   r   r   %   sT          #').*/&+"

 

  IIIr   r   c                        e Zd ZdZddgZdZdZdgZ fdZ	 	 dddd	d
e	e
         deeeee         ee         f         dee         defdZd Zd Zd Zd Zed             Z xZS )UdopProcessora  
    Constructs a UDOP processor which combines a LayoutLMv3 image processor and a UDOP tokenizer into a single processor.

    [`UdopProcessor`] offers all the functionalities you need to prepare data for the model.

    It first uses [`LayoutLMv3ImageProcessor`] to resize, rescale and normalize document images, and optionally applies OCR
    to get words and normalized bounding boxes. These are then provided to [`UdopTokenizer`] or [`UdopTokenizerFast`],
    which turns the words and bounding boxes into token-level `input_ids`, `attention_mask`, `token_type_ids`, `bbox`.
    Optionally, one can provide integer `word_labels`, which are turned into token-level `labels` for token
    classification tasks (such as FUNSD, CORD).

    Additionally, it also supports passing `text_target` and `text_pair_target` to the tokenizer, which can be used to
    prepare labels for language modeling tasks.

    Args:
        image_processor (`LayoutLMv3ImageProcessor`):
            An instance of [`LayoutLMv3ImageProcessor`]. The image processor is a required input.
        tokenizer (`UdopTokenizer` or `UdopTokenizerFast`):
            An instance of [`UdopTokenizer`] or [`UdopTokenizerFast`]. The tokenizer is a required input.
    image_processor	tokenizerLayoutLMv3ImageProcessor)UdopTokenizerUdopTokenizerFast	text_pairc                 L    t                                          ||           d S )N)super__init__)selfr-   r.   	__class__s      r   r5   zUdopProcessor.__init__S   s#    )44444r   N)audiovideosimagestextkwargsreturnc                    | j         t          fd| j        j        i| | j        | }|d                             dd          }|d                             dd          }	|d                             dd          }
|d                             dd          }|d                             d	d          }|d                             d
d          }| j        j        r|t          d          | j        j        r|	t          d          |r|st          d          | | j        di |d         S  | j        dd|i|d         }|                    dd          }|                    dd          }|d                             d
d           |d                             dd           |
|d         d<   ||n||d         d<   |	|d         d<   |1| j        j        r%|
#t          |t                    r|g}||d         d<    | j        dd||n|i|d         }|du r%|                     |d         |d                   |d<   |                    |           |S )a~  
        This method first forwards the `images` argument to [`~UdopImageProcessor.__call__`]. In case
        [`UdopImageProcessor`] was initialized with `apply_ocr` set to `True`, it passes the obtained words and
        bounding boxes along with the additional arguments to [`~UdopTokenizer.__call__`] and returns the output,
        together with the prepared `pixel_values`. In case [`UdopImageProcessor`] was initialized with `apply_ocr` set
        to `False`, it passes the words (`text`/``text_pair`) and `boxes` specified by the user along with the
        additional arguments to [`~UdopTokenizer.__call__`] and returns the output, together with the prepared
        `pixel_values`.

        Alternatively, one can pass `text_target` and `text_pair_target` to prepare the targets of UDOP.

        Please refer to the docstring of the above two methods for more information.
        tokenizer_init_kwargsr   r   Nr   r2   r$   Fr&   text_targetzdYou cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.zaYou cannot provide word labels if you initialized the image processor with apply_ocr set to True.zKYou cannot return overflowing tokens without returning the offsets mapping.r:   r)   wordstext_pair_targetr;   Tpixel_valuesoverflow_to_sample_mappingr   )_merge_kwargsr   r.   init_kwargs'prepare_and_validate_optional_call_argspopgetr-   	apply_ocr
ValueError
isinstancestrget_overflowing_imagesupdate)r6   r:   r;   r8   r9   argsr<   output_kwargsr   r   r2   r$   r&   r@   featuresfeatures_wordsfeatures_boxesencoded_inputss                     r   __call__zUdopProcessor.__call__V   s   8 +*
 
"&."<
 
 ;d:DA	
 
 m,00$??#M266}dKK!-044[$GG	$1-$@$D$DE`bg$h$h!!.}!=!A!ABZ\a!b!b#M266}dKK) 	u/@v   ) 	{/Fs   % 	l-C 	ljkkk"!4>  .   ,t+\\6\]?=[\\H%\\'488N%\\'488N-(,,]DAAA-(,,-?FFF8AM-(5=B=NEETbM-(1:EM-(7 D$8$ByGXdC(( " 6D<Jm,[9+T^  !-TT>. N )D00+/+F+F^,n=Y.Z, ,( OON+++Or   c                     g }|D ]}|                     ||                    t          |          t          |          k    r/t          dt          |           dt          |                     |S )Nz`Expected length of images to be the same as the length of `overflow_to_sample_mapping`, but got z and )appendlenrK   )r6   r:   rD   images_with_overflow
sample_idxs        r   rN   z$UdopProcessor.get_overflowing_images   s    !4 	< 	<J ''z(:;;;;#$$,F(G(GGGV,--V V478R4S4SV V  
 $#r   c                 &     | j         j        |i |S )z
        This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.batch_decode`]. Please
        refer to the docstring of this method for more information.
        r.   batch_decoder6   rP   r<   s      r   r^   zUdopProcessor.batch_decode   s    
 +t~*D;F;;;r   c                 &     | j         j        |i |S )z
        This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.decode`]. Please refer
        to the docstring of this method for more information.
        )r.   decoder_   s      r   ra   zUdopProcessor.decode   s    
 %t~$d5f555r   c                 :    | j                             |d          S )a  
        Post-process the output of the model to decode the text.

        Args:
            generated_outputs (`torch.Tensor` or `np.ndarray`):
                The output of the model `generate` function. The output is expected to be a tensor of shape `(batch_size, sequence_length)`
                or `(sequence_length,)`.

        Returns:
            `List[str]`: The decoded text.
        T)skip_special_tokensr]   )r6   generated_outputss     r   post_process_image_text_to_textz-UdopProcessor.post_process_image_text_to_text   s!     ~**+<RV*WWWr   c                 
    g dS )N)rC   	input_idsbboxattention_maskr   )r6   s    r   model_input_nameszUdopProcessor.model_input_names   s    FFFFr   )NN)r   r   r   __doc__
attributesimage_processor_classtokenizer_classoptional_call_argsr5   r   r	   r   r   r   r   r   r   r   rV   rN   r^   ra   re   propertyrj   __classcell__)r7   s   @r   r,   r,   7   sK        * $[1J6<O%5 5 5 5 5
 (,^b\ \ \ \$\ I0$y/4HYCZZ[\ ,-\ 
\ \ \ \~$ $ $< < <6 6 6X X X G G XG G G G Gr   r,   N)rk   typingr   r   r   transformersr   image_processing_utilsr   image_utilsr	   processing_utilsr
   r   r   r   tokenization_utils_baser   r   
get_loggerr   loggerr   r   r,   r   r   r   <module>rz      s`    ) ( ( ( ( ( ( ( ( (             2 2 2 2 2 2 % % % % % % T T T T T T T T T T T T C C C C C C C C 
	H	%	%9 9 9 9 9Zu 9 9 9 9
    *%    $lG lG lG lG lGN lG lG lG lG lGr   