
    g                         d Z ddlZddlZddlmZmZ ddlmZ ddlm	Z	 ddl
mZ  e	j        e          Z G d	 d
          ZdS )zTokenization classes for RAG.    N)ListOptional   )BatchEncoding)logging   )	RagConfigc                       e Zd Zd Zd Zed             Zd Zd Zd Z	d Z
d Z	 	 	 	 	 	 ddee         deee                  dee         dee         dedededefdZd	S )RagTokenizerc                 :    || _         || _        | j         | _        d S N)question_encoder	generatorcurrent_tokenizer)selfr   r   s      d/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/models/rag/tokenization_rag.py__init__zRagTokenizer.__init__   s"     0"!%!6    c                 ~   t           j                            |          rt          d| d          t          j        |d           t           j                            |d          }t           j                            |d          }| j                            |           | j                            |           d S )NzProvided path (z#) should be a directory, not a fileT)exist_okquestion_encoder_tokenizergenerator_tokenizer)	ospathisfile
ValueErrormakedirsjoinr   save_pretrainedr   )r   save_directoryquestion_encoder_pathgenerator_paths       r   r   zRagTokenizer.save_pretrained#   s    7>>.)) 	db~bbbccc
NT2222 "^=Y Z Zn6KLL--.CDDD&&~66666r   c                     ddl m} |                    dd           }|t          j        |          }|                    ||j        d          }|                    ||j        d          } | ||          S )N   )AutoTokenizerconfigr   )r&   	subfolderr   )r   r   )auto.tokenization_autor%   popr	   from_pretrainedr   r   )clspretrained_model_name_or_pathkwargsr%   r&   r   r   s          r   r*   zRagTokenizer.from_pretrained,   s     	;:::::Hd++>./LMMF(88)&2IUq 9 
 
 "11)&2BNc 2 
 
	 s$4	JJJJr   c                      | j         |i |S r   )r   r   argsr-   s      r   __call__zRagTokenizer.__call__>   s    %t%t6v666r   c                 &     | j         j        |i |S r   )r   batch_decoder/   s      r   r3   zRagTokenizer.batch_decodeA   s    *t~*D;F;;;r   c                 &     | j         j        |i |S r   )r   decoder/   s      r   r5   zRagTokenizer.decodeD   s    $t~$d5f555r   c                     | j         | _        d S r   )r   r   r   s    r   _switch_to_input_modez"RagTokenizer._switch_to_input_modeG   s    !%!6r   c                     | j         | _        d S r   )r   r   r7   s    r   _switch_to_target_modez#RagTokenizer._switch_to_target_modeJ   s    !%r   NlongestT	src_texts	tgt_texts
max_lengthmax_target_lengthpaddingreturn_tensors
truncationreturnc           
          t          j        dt                     || j        j        } | |fd||||d|}	||	S || j        j        } | d|d||||d|}
|
d         |	d<   |	S )Nu4  `prepare_seq2seq_batch` is deprecated and will be removed in version 5 of 🤗 Transformers. Use the regular `__call__` method to prepare your inputs and the tokenizer under the `with_target_tokenizer` context manager to prepare your targets. See the documentation of your specific tokenizer for more detailsT)add_special_tokensrA   r>   r@   rB   )text_targetrE   rA   r@   r>   rB   	input_idslabels )warningswarnFutureWarningr   model_max_length)r   r<   r=   r>   r?   r@   rA   rB   r-   model_inputsrH   s              r   prepare_seq2seq_batchz"RagTokenizer.prepare_seq2seq_batchM   s     	 	
 	
 	
 /@Jt
#)!!
 
 
 
 $ $ 6 G 
!#)(!
 
 
 
 "(!4Xr   )NNNr;   NT)__name__
__module____qualname__r   r   classmethodr*   r1   r3   r5   r8   r:   r   strr   intboolr   rO   rI   r   r   r   r      s+       7 7 7
7 7 7 K K [K"7 7 7< < <6 6 67 7 70 0 0 *.$(+/ ", ,9, DI&, SM	,
 $C=, , , , 
, , , , , ,r   r   )__doc__r   rJ   typingr   r   tokenization_utils_baser   utilsr   configuration_ragr	   
get_loggerrP   loggerr   rI   r   r   <module>r^      s    $ # 				  ! ! ! ! ! ! ! ! 4 4 4 4 4 4       ( ( ( ( ( ( 
	H	%	%\ \ \ \ \ \ \ \ \ \r   