
    gB                     t   U d Z ddlZddlZddlZddlZddlmZ ddlmZm	Z	m
Z
mZmZ ddlmZ ddlmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZmZmZmZmZ ddlm Z  ddl!m"Z" ddl#m$Z$m%Z%m&Z&m'Z'm(Z(  e            rddl)m*Z* ndZ* ej+        e,          Z-er- e            Z.ee/ee
e/         e
e/         f         f         e0d<   n eg d e            rdnd e            rdndffdd e            rdndffdd e            rdndffdd e            rdnd e            rdndffddd e            rdndffd  e            rd!nddffd"d#d$ e            rd%nd e            rd&ndffd'd( e            rd)ndffd*d+d,d-d e            rdndffd.d/ e            rd0ndffd1d e            rd2ndffd3d4 e            rd5ndffd6d e            rdndffd7d8 e            rd9nd e            rd:ndffd;d< e            rd=nd e            rd>ndffd?d e            rdndffd@d4 e            rd5ndffdAdB e            rdCndffdDdB e            rdCndffdEdF e            rdGnd e            rdHndffdIdJ e            rdKndffdLd e            rdMndffdNdO e            rdPndffdQ e            rdRnd e            rdSndffdTdUdVdWd4 e            rd5ndffdXd/ e            rd0ndffdYdZ e            rd[ndffd\ e            rd]nd e            rd^ndffd_d` e            rdandffdbdc e            rddndffdedf e            rdgndffdhd e            rdndffdi e            rdjnddffdkdld e            rdmndffdnd e            rdondffdp e            rdqnddffdrdsdt e            rdundffdvdwdx e            rdyndffdz e            rd{nd e            rd|ndffd} e            rd{nd e            rd|ndffd~d e            rdndffdd e            rdmndffd e            rdnddffdd/ e            rd0ndffdd/ e            rd0ndffdd/ e            rd0ndffdd e            rdondffddd/ e            rd0ndffddd e            rdndffddB e            rdCndffdd e            rdndffddd4 e            rd5ndffdd e            rd>ndffdd= e            rd>ndffdd= e            rd>ndffdd/ e            rd0ndffdd/ e            rd0ndffd e            rd=nd e            rd>ndffd e            rd=nd e            rd>ndffdd e            rdnd e            rdndffdd e            rdndffdd e            rdndffdd e            rdndffdd e            rdndffdd e            rdndffdd e            rdndffd e            rd=nd e            rd>ndffdd= e            rd>ndffdd= e            rd>ndffdd= e            rd>ndffdd= e            rd>ndffdd e            rdndffd e            rdnd e            rdndffddd e            rdndffd e            rdnddffdd e            rdondffdd e            rdondffd e            rdnddffd e            rdnd e            rdndffd e            rdnd e            rdndffdd4 e            rd5ndffdd e            rdndffdȑd e            rd=nd e            rd>ndffd e            rd=nd e            rd>ndffdd= e            rd>ndffd e            rdnddffdd e            rdndffdd e            rdmndffdd e            rdndffdd e            rdondffdd4 e            rd5ndffd e            rdnd e            rdndffdd e            rdndffdd e            rdndffdd e            rdndffdߑdd e            rdndffd e            rdnd e            rdndffd e            rdnd e            rdndffd e            rdnd e            rdndffdd e            rdondffdd e            rdondffddB e            rdCndffddB e            rdCndffdd e            rdndffdd/ e            rd0ndffddB e            rdCndffddB e            rdCndffdd= e            rd>ndffd e            rd(nd e            rd)ndffd e            rd(nd e            rd)ndffdd e            rd=nd e            rd>ndffddJ e            rdKndffdd= e            rd>ndffdd= e            rd>ndffddd e            rdndffdd e            rdmndffd e            rdnddffddd e            rdndffdd  e            rdndffdd  e            rdndffdd  e            rdndffdd  e            rdndffddd e            rdndffd	 e            rd{nd e            rd|ndffd
 e            rdnd e            rdndffd e            rdnd e            rdndffdd e            rdndffdd4 e            rd5ndffdd4 e            rd5ndffddd e            rdndffdd e            rdondffd e            rdnd e            rdndffd e            rdnd e            rdndffd e            rdnddffd  e            rd!nddffd"d# e            rd$nddffd%d&d' e            rd(ndffd)d e            rdondffd*d/ e            rd0ndffd+ e            rdnd e            rdndffd, e            rdnd e            rdndffd-d.d/d0d e            rdndffd1 e            rd2nd e            rd3ndffd4 e            rdnd e            rdndffd5d= e            rd>ndffd6d e            rdndffd7d= e            rd>ndffd8d e            rdndffd9d:d;d<d=d>d? e            rd@ndffdAdB e            rdCndffdB e            rdCnd e            rdDndffdEdF e            rdGnddffdH e            rdnd e            rdndffdI e            rdnd e            rdndffdJ e            rdKnd e            rdLndffdM e            rdnd e            rdndffdN e            rdnd e            rdndffdO e            rd=nd e            rd>ndff          Z. e"e$e.          Z1dP  e$j2                    D             Z3dQe/fdRZ4	 	 	 	 	 	 	 	 dadUee/ej5        f         dVe
ee/ej5        f                  dWe6dXe
e6         dYe
e	e/e/f                  dZe
ee6e/f                  d[e
e/         d\e6d]e/fd^Z7 G d_ d`          Z8dS (b  zAuto Tokenizer class.    N)OrderedDict)TYPE_CHECKINGDictOptionalTupleUnion   )PretrainedConfig)get_class_from_dynamic_moduleresolve_trust_remote_code)load_gguf_checkpoint)PreTrainedTokenizer)TOKENIZER_CONFIG_FILE)cached_fileextract_commit_hashis_g2p_en_availableis_sentencepiece_availableis_tokenizers_availablelogging   )EncoderDecoderConfig   )_LazyAutoMapping)CONFIG_MAPPING_NAMES
AutoConfigconfig_class_to_model_typemodel_type_to_module_name!replace_list_option_in_docstrings)PreTrainedTokenizerFastTOKENIZER_MAPPING_NAMESalbertAlbertTokenizerAlbertTokenizerFastalignBertTokenizerBertTokenizerFastbark)bart)BartTokenizerBartTokenizerFastbarthezBarthezTokenizerBarthezTokenizerFast)bartpho)BartphoTokenizerNbertzbert-generationBertGenerationTokenizer)zbert-japanese)BertJapaneseTokenizerN)bertweet)BertweetTokenizerNbig_birdBigBirdTokenizerBigBirdTokenizerFastbigbird_pegasusPegasusTokenizerPegasusTokenizerFast)biogpt)BioGptTokenizerN)
blenderbot)BlenderbotTokenizerBlenderbotTokenizerFast)zblenderbot-small)BlenderbotSmallTokenizerNblipzblip-2GPT2TokenizerGPT2TokenizerFastbloomBloomTokenizerFastbridgetowerRobertaTokenizerRobertaTokenizerFastbros)byt5)ByT5TokenizerN	camembertCamembertTokenizerCamembertTokenizerFast)canine)CanineTokenizerN	chameleonLlamaTokenizerLlamaTokenizerFastchinese_clipclapclipCLIPTokenizerCLIPTokenizerFastclipseg)clvp)ClvpTokenizerN
code_llamaCodeLlamaTokenizerCodeLlamaTokenizerFastcodegenCodeGenTokenizerCodeGenTokenizerFastcohereCohereTokenizerFastconvbertConvBertTokenizerConvBertTokenizerFastcpmCpmTokenizerCpmTokenizerFast)cpmant)CpmAntTokenizerN)ctrl)CTRLTokenizerN)zdata2vec-audioWav2Vec2CTCTokenizerNzdata2vec-textdbrxdebertaDebertaTokenizerDebertaTokenizerFastz
deberta-v2DebertaV2TokenizerDebertaV2TokenizerFast
distilbertDistilBertTokenizerDistilBertTokenizerFastdprDPRQuestionEncoderTokenizerDPRQuestionEncoderTokenizerFastelectraElectraTokenizerElectraTokenizerFasternieernie_mErnieMTokenizer)esm)EsmTokenizerNfalconr   falcon_mambaGPTNeoXTokenizerFastfastspeech2_conformerFastSpeech2ConformerTokenizer)flaubert)FlaubertTokenizerNfnetFNetTokenizerFNetTokenizerFast)fsmt)FSMTTokenizerNfunnelFunnelTokenizerFunnelTokenizerFastgemmaGemmaTokenizerGemmaTokenizerFastgemma2gitglmzgpt-sw3GPTSw3Tokenizergpt2gpt_bigcodegpt_neogpt_neox)gpt_neox_japanese)GPTNeoXJapaneseTokenizerNgptj)zgptsan-japanese)GPTSanJapaneseTokenizerNzgrounding-dinogroupvitherbertHerbertTokenizerHerbertTokenizerFast)hubertrn   ibertideficsidefics2idefics3instructblipinstructblipvideojambajetmoe)jukebox)JukeboxTokenizerNzkosmos-2XLMRobertaTokenizerXLMRobertaTokenizerFastlayoutlmLayoutLMTokenizerLayoutLMTokenizerFast
layoutlmv2LayoutLMv2TokenizerLayoutLMv2TokenizerFast
layoutlmv3LayoutLMv3TokenizerLayoutLMv3TokenizerFast	layoutxlmLayoutXLMTokenizerLayoutXLMTokenizerFastledLEDTokenizerLEDTokenizerFastliltllamallava
llava_nextllava_next_videollava_onevision
longformerLongformerTokenizerLongformerTokenizerFastlongt5T5TokenizerT5TokenizerFast)luke)LukeTokenizerNlxmertLxmertTokenizerLxmertTokenizerFastm2m_100M2M100Tokenizermambamamba2marianMarianTokenizermbartMBartTokenizerMBartTokenizerFastmbart50MBart50TokenizerMBart50TokenizerFastmegazmegatron-bert)zmgp-str)MgpstrTokenizerNmistralmixtralmllamamlukeMLukeTokenizer
mobilebertMobileBertTokenizerMobileBertTokenizerFastmoshimpnetMPNetTokenizerMPNetTokenizerFastmptmramt5MT5TokenizerMT5TokenizerFastmusicgenmusicgen_melodymvpMvpTokenizerMvpTokenizerFast)myt5)MyT5TokenizerNnezhanllbNllbTokenizerNllbTokenizerFastznllb-moenystromformerolmoolmoezomdet-turbo	oneformerz
openai-gptOpenAIGPTTokenizerOpenAIGPTTokenizerFastoptowlv2owlvit	paligemmapegasus	pegasus_x)	perceiver)PerceiverTokenizerN	persimmonphiphi3phimoe)phobert)PhobertTokenizerN
pix2structpixtralplbartPLBartTokenizer)
prophetnet)ProphetNetTokenizerNqdqbertqwen2Qwen2TokenizerQwen2TokenizerFastqwen2_audio	qwen2_moeqwen2_vl)rag)RagTokenizerNrealmRealmTokenizerRealmTokenizerFastrecurrent_gemmareformerReformerTokenizerReformerTokenizerFastrembertRemBertTokenizerRemBertTokenizerFast	retribertRetriBertTokenizerRetriBertTokenizerFastrobertazroberta-prelayernorm)roc_bert)RoCBertTokenizerNroformerRoFormerTokenizerRoFormerTokenizerFastrwkvseamless_m4tSeamlessM4TTokenizerSeamlessM4TTokenizerFastseamless_m4t_v2siglipSiglipTokenizerspeech_to_textSpeech2TextTokenizer)speech_to_text_2)Speech2Text2TokenizerNspeecht5SpeechT5Tokenizer)splinter)SplinterTokenizerSplinterTokenizerFastsqueezebertSqueezeBertTokenizerSqueezeBertTokenizerFaststablelm
starcoder2switch_transformerst5)tapas)TapasTokenizerN)tapex)TapexTokenizerN)z
transfo-xl)TransfoXLTokenizerNtvpudopUdopTokenizerUdopTokenizerFastumt5video_llavaviltvipllavavisual_bert)vits)VitsTokenizerN)wav2vec2rn   )zwav2vec2-bertrn   )zwav2vec2-conformerrn   )wav2vec2_phoneme)Wav2Vec2PhonemeCTCTokenizerNwhisperWhisperTokenizerWhisperTokenizerFastxclipxglmXGLMTokenizerXGLMTokenizerFast)xlm)XLMTokenizerNzxlm-prophetnetXLMProphetNetTokenizerzxlm-robertazxlm-roberta-xlxlnetXLNetTokenizerXLNetTokenizerFastxmodyosozambac                     i | ]\  }}||	S  rn  ).0kvs      f/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/models/auto/tokenization_auto.py
<dictcomp>rs  >  s    @@@41a!Q@@@    
class_namec                    | dk    rt           S t                                          D ]S\  }}| |v rJt          |          }t	          j        d| d          }	 t          ||           c S # t          $ r Y Ow xY wTt          j	                                        D ]%\  }}|D ]}t          |dd           | k    r|c c S &t	          j        d          }t          ||           rt          ||           S d S )Nr   .ztransformers.models__name__transformers)r   r    itemsr   	importlibimport_modulegetattrAttributeErrorTOKENIZER_MAPPING_extra_contenthasattr)ru  module_name
tokenizersmoduleconfig	tokenizermain_modules          rr  tokenizer_class_from_namer  A  sI   ...&&#:#@#@#B#B  Z##3K@@K,->->->@UVVFvz22222!    $ 0>DDFF ! !
# 	! 	!Iy*d33zAA       B	! ).99K{J'' 0{J///4s   A**
A76A7F pretrained_model_name_or_path	cache_dirforce_downloadresume_downloadproxiestokenrevisionlocal_files_only	subfolderc	                    |	                     dd          }
|
-t          j        dt                     |t	          d          |
}|	                    dd          }t          | t          ||||||||ddd|          }|t          	                    d           i S t          ||          }t          |d	
          5 }t          j        |          }ddd           n# 1 swxY w Y   ||d<   |S )a	  
    Loads the tokenizer configuration from a pretrained model tokenizer configuration.

    Args:
        pretrained_model_name_or_path (`str` or `os.PathLike`):
            This can be either:

            - a string, the *model id* of a pretrained model configuration hosted inside a model repo on
              huggingface.co.
            - a path to a *directory* containing a configuration file saved using the
              [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.

        cache_dir (`str` or `os.PathLike`, *optional*):
            Path to a directory in which a downloaded pretrained model configuration should be cached if the standard
            cache should not be used.
        force_download (`bool`, *optional*, defaults to `False`):
            Whether or not to force to (re-)download the configuration files and override the cached versions if they
            exist.
        resume_download:
            Deprecated and ignored. All downloads are now resumed by default when possible.
            Will be removed in v5 of Transformers.
        proxies (`Dict[str, str]`, *optional*):
            A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
            'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
        token (`str` or *bool*, *optional*):
            The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
            when running `huggingface-cli login` (stored in `~/.huggingface`).
        revision (`str`, *optional*, defaults to `"main"`):
            The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
            git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
            identifier allowed by git.
        local_files_only (`bool`, *optional*, defaults to `False`):
            If `True`, will only try to load the tokenizer configuration from local files.
        subfolder (`str`, *optional*, defaults to `""`):
            In case the tokenizer config is located inside a subfolder of the model repo on huggingface.co, you can
            specify the folder name here.

    <Tip>

    Passing `token=True` is required when you want to use a private model.

    </Tip>

    Returns:
        `Dict`: The configuration of the tokenizer.

    Examples:

    ```python
    # Download configuration from huggingface.co and cache.
    tokenizer_config = get_tokenizer_config("google-bert/bert-base-uncased")
    # This model does not have a tokenizer config so the result will be an empty dict.
    tokenizer_config = get_tokenizer_config("FacebookAI/xlm-roberta-base")

    # Save a pretrained tokenizer locally and you can reload its config
    from transformers import AutoTokenizer

    tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
    tokenizer.save_pretrained("tokenizer-test")
    tokenizer_config = get_tokenizer_config("tokenizer-test")
    ```use_auth_tokenNrThe `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.V`token` and `use_auth_token` are both specified. Please set only the argument `token`._commit_hashF)r  r  r  r  r  r  r  r   _raise_exceptions_for_gated_repo%_raise_exceptions_for_missing_entries'_raise_exceptions_for_connection_errorsr  z\Could not locate the tokenizer configuration file, will try to use the model config instead.zutf-8)encoding)popwarningswarnFutureWarning
ValueErrorgetr   r   loggerinfor   openjsonload)r  r  r  r  r  r  r  r  r  kwargsr  commit_hashresolved_config_filereaderresults                  rr  get_tokenizer_configr  ]  s^   R ZZ 0$77N! A	
 	
 	
 uvvv**^T22K&%%'))..305     #rsss	%&:KHHK	"W	5	5	5 #6""# # # # # # # # # # # # # # #(F>Ms   =CC"%C"c                   X    e Zd ZdZd Ze ee          d                         ZddZ	dS )AutoTokenizera  
    This is a generic tokenizer class that will be instantiated as one of the tokenizer classes of the library when
    created with the [`AutoTokenizer.from_pretrained`] class method.

    This class cannot be instantiated directly using `__init__()` (throws an error).
    c                      t          d          )Nz}AutoTokenizer is designed to be instantiated using the `AutoTokenizer.from_pretrained(pretrained_model_name_or_path)` method.)EnvironmentError)selfs    rr  __init__zAutoTokenizer.__init__  s    _
 
 	
rt  c           
      v
   |                     dd          }|Dt          j        dt                     |                    dd          t          d          ||d<   |                     dd          }d|d<   |                     d	d          }|                     d
d          }|                     dd          }|                    dd          }	|d}
t                              |d          }|Jt          d| dd                    d t                                          D                        d          |\  }}|r,|t          |          }
nt                              d           |
t          |          }
|
t          d| d           |
j        |g|R i |S t          |fi |}d|v r|d         |d<   |                    d          }d}d|v rGt          |d         t          t           f          r	|d         }n|d                             dd          }|t          |t"                    sM|	r7t%          ||	fi |}t'          |d          d         }t)          j        d'i |}nt)          j        |fd|i|}|j        }t/          |d          rd|j        v r|j        d         }|du}t3          |          t4          v p(|duo$t          |          dupt          |dz             du}t7          ||||          }|r|r|r|d         	|d         }n|d         }t9          ||fi |}
|                     dd          }t:          j                            |          r|
                                   |
j        |g|R d|i|S |fd}
|r)|!                    d          s| d}t          |          }
|
|}t          |          }
|
t          d| d           |
j        |g|R i |S t          |tD                    rdt3          |j#                  t3          |j$                  ur5t                              d |j$        j%         d!|j#        j%         d"           |j$        }tM          t3          |          j'                  }|Vt4          t3          |                   \  }}|r|s| |j        |g|R i |S | |j        |g|R i |S t          d#          t          d$|j%         d%d                    d& t4                                          D                        d          )(a]  
        Instantiate one of the tokenizer classes of the library from a pretrained model vocabulary.

        The tokenizer class to instantiate is selected based on the `model_type` property of the config object (either
        passed as an argument or loaded from `pretrained_model_name_or_path` if possible), or when it's missing, by
        falling back to using pattern matching on `pretrained_model_name_or_path`:

        List options

        Params:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                Can be either:

                    - A string, the *model id* of a predefined tokenizer hosted inside a model repo on huggingface.co.
                    - A path to a *directory* containing vocabulary files required by the tokenizer, for instance saved
                      using the [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.
                    - A path or url to a single saved vocabulary file if and only if the tokenizer only requires a
                      single vocabulary file (like Bert or XLNet), e.g.: `./my_model_directory/vocab.txt`. (Not
                      applicable to all derived classes)
            inputs (additional positional arguments, *optional*):
                Will be passed along to the Tokenizer `__init__()` method.
            config ([`PretrainedConfig`], *optional*)
                The configuration object used to determine the tokenizer class to instantiate.
            cache_dir (`str` or `os.PathLike`, *optional*):
                Path to a directory in which a downloaded pretrained model configuration should be cached if the
                standard cache should not be used.
            force_download (`bool`, *optional*, defaults to `False`):
                Whether or not to force the (re-)download the model weights and configuration files and override the
                cached versions if they exist.
            resume_download:
                Deprecated and ignored. All downloads are now resumed by default when possible.
                Will be removed in v5 of Transformers.
            proxies (`Dict[str, str]`, *optional*):
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
            revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
                git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
                identifier allowed by git.
            subfolder (`str`, *optional*):
                In case the relevant files are located inside a subfolder of the model repo on huggingface.co (e.g. for
                facebook/rag-token-base), specify it here.
            use_fast (`bool`, *optional*, defaults to `True`):
                Use a [fast Rust-based tokenizer](https://huggingface.co/docs/tokenizers/index) if it is supported for
                a given model. If a fast tokenizer is not available for a given model, a normal Python-based tokenizer
                is returned instead.
            tokenizer_type (`str`, *optional*):
                Tokenizer type to be loaded.
            trust_remote_code (`bool`, *optional*, defaults to `False`):
                Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
                should only be set to `True` for repositories you trust and in which you have read the code, as it will
                execute code present on the Hub on your local machine.
            kwargs (additional keyword arguments, *optional*):
                Will be passed to the Tokenizer `__init__()` method. Can be used to set special tokens like
                `bos_token`, `eos_token`, `unk_token`, `sep_token`, `pad_token`, `cls_token`, `mask_token`,
                `additional_special_tokens`. See parameters in the `__init__()` for more details.

        Examples:

        ```python
        >>> from transformers import AutoTokenizer

        >>> # Download vocabulary from huggingface.co and cache.
        >>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")

        >>> # Download vocabulary from huggingface.co (user-uploaded) and cache.
        >>> tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-german-cased")

        >>> # If vocabulary files are in a directory (e.g. tokenizer was saved using *save_pretrained('./test/saved_model/')*)
        >>> # tokenizer = AutoTokenizer.from_pretrained("./test/bert_saved_model/")

        >>> # Download vocabulary from huggingface.co and define model-specific arguments
        >>> tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-base", add_prefix_space=True)
        ```r  Nr  r  r  r  T
_from_autouse_fasttokenizer_typetrust_remote_code	gguf_filezPassed `tokenizer_type` z3 does not exist. `tokenizer_type` should be one of z, c              3      K   | ]}|V  d S Nrn  ro  cs     rr  	<genexpr>z0AutoTokenizer.from_pretrained.<locals>.<genexpr>C  s"       K Kq K K K K K Krt  rw  zt`use_fast` is set to `True` but the tokenizer class does not have a fast version.  Falling back to the slow version.zTokenizer class z is not currently imported.r  tokenizer_classauto_mapr  F)return_tensorsFastr   r   code_revisionz- does not exist or is not currently imported.z The encoder model config class: z3 is different from the decoder model config class: z. It is not recommended to use the `AutoTokenizer.from_pretrained()` method in this case. Please use the encoder and decoder specific tokenizer classes.zzThis tokenizer cannot be instantiated. Please make sure you have `sentencepiece` installed in order to use this tokenizer.z!Unrecognized configuration class z8 to build an AutoTokenizer.
Model type should be one of c              3   $   K   | ]}|j         V  d S r  )rx  r  s     rr  r  z0AutoTokenizer.from_pretrained.<locals>.<genexpr>  s$      4b4bAQZ4b4b4b4b4b4brt  rn  )(r  r  r  r  r  r  r    joinkeysr  r  warningfrom_pretrainedr  
isinstancetuplelistr
   r   r   r   	for_modelr  r  r  typer  r   r   ospathisdirregister_for_auto_classendswithr   decoderencoder	__class__r   rx  )clsr  inputsr  r  r  r  r  r  r  r  tokenizer_class_tupletokenizer_class_nametokenizer_fast_class_nametokenizer_configconfig_tokenizer_classtokenizer_auto_map	gguf_pathconfig_dicthas_remote_codehas_local_code	class_ref_tokenizer_class_candidate
model_typetokenizer_class_pytokenizer_class_fasts                              rr  r  zAutoTokenizer.from_pretrained  s   Z  $4d;;%M E   zz'4((4 l   -F7OHd++#|::j$//$4d;;"JJ':DAAJJ{D11	 %"O$;$?$?PT$U$U!$, O~ O Oyy K K,C,H,H,J,J K K KKKO O O  
 ?T; "; ,8&?@Y&Z&ZOONN=   &";<P"Q"Q& !e4H!e!e!efff2?23PdSYddd]cddd 00MXXQWXX---%5n%EF>"!1!5!56G!H!H!)))*:6FF ]%5j%A""%5j%A%E%EoW[%\%\" ")f&677   +,I9 _ _X^ _ _I"6yQV"W"W"WX`"aK'1@@K@@FF'75 IZ^d F &,%;"vz** F&//Q/Q%+__%E",D8f):: 
"$. )*@AAM Z,-Cf-LMMUYY	 	 6<no
 
  	e0 	e 2.q1=.q1		.q1	;IGdoohnooO

?D11Aw}}:;; :779992?2-06  J[_e   $/"O W 6 ? ? G G W/E,K,K,K)";<U"V"V&,B)";<U"V"V& o'@ooo   3?23PdSYddd]cddd f233 	$FN##4+?+???2v~7O 2 2%+^%=2 2 2   ^F/V0EFF
!7Hf7V4 4# 	 	5G5O;+;<Ym\bmmmflmmm%1=-=>[o^dooohnooo$:  
 f0@ f f+/994b4bIZI_I_IaIa4b4b4b+b+bf f f
 
 	
rt  NFc                    ||t          d          |$t          |t                    rt          d          |$t          |t                    rt          d          |=|;t          |t                    r&|j        |k    rt          d|j         d| d          | t
          j        v rt
          |          \  }}||}||}t
                              | ||f|           dS )	a  
        Register a new tokenizer in this mapping.


        Args:
            config_class ([`PretrainedConfig`]):
                The configuration corresponding to the model to register.
            slow_tokenizer_class ([`PretrainedTokenizer`], *optional*):
                The slow tokenizer to register.
            fast_tokenizer_class ([`PretrainedTokenizerFast`], *optional*):
                The fast tokenizer to register.
        NzKYou need to pass either a `slow_tokenizer_class` or a `fast_tokenizer_classz:You passed a fast tokenizer in the `slow_tokenizer_class`.z:You passed a slow tokenizer in the `fast_tokenizer_class`.zThe fast tokenizer class you are passing has a `slow_tokenizer_class` attribute that is not consistent with the slow tokenizer class you passed (fast tokenizer has z and you passed z!. Fix one of those so they match!)exist_ok)r  
issubclassr   r   slow_tokenizer_classr  r  register)config_classr  fast_tokenizer_classr  existing_slowexisting_fasts         rr  r  zAutoTokenizer.register  s6     ',@,Hjkkk+
;OQh0i0i+YZZZ+
;OQd0e0e+YZZZ !,$0/1HII 1$9=QQQ!'<! !Nb! ! !   ,;;;+<\+J(M=#+'4$#+'4$""<2FH\1]hp"qqqqqrt  )NNF)
rx  
__module____qualname____doc__r  classmethodr   r    r  r  rn  rt  rr  r  r    s~         
 
 
 &&'>??\
 \
 @? [\
|)r )r )r )r )r )rrt  r  )NFNNNNFr  )9r  r{  r  r  r  collectionsr   typingr   r   r   r   r   configuration_utilsr
   dynamic_module_utilsr   r   modeling_gguf_pytorch_utilsr   tokenization_utilsr   tokenization_utils_baser   utilsr   r   r   r   r   r   encoder_decoderr   auto_factoryr   configuration_autor   r   r   r   r   tokenization_utils_fastr   
get_loggerrx  r  r    str__annotations__r  rz  CONFIG_TO_TYPEr  PathLikeboolr  r  rn  rt  rr  <module>r     s%           				  # # # # # # > > > > > > > > > > > > > > 3 3 3 3 3 3 \ \ \ \ \ \ \ \ ? ? ? ? ? ? 5 5 5 5 5 5 < < < < < <                3 2 2 2 2 2 * * * * * *               #BBBBBBB" 
	H	%	% B VaU`UbUb[eHSM8C=4P.Q)QRbbbb)k{	
)C)C)E)EO%%4-D-D-F-FP))D{	
 ?V?V?X?X(b(;(;^bcd{	
 o>U>U>W>W'a':':]abc{	
 ={	
 *D*D*F*FP&&D.E.E.G.GQ**T{	
$ 4%{	
& o>U>U>W>W'a':':]abc'{	
( >X>X>Z>Z!d!:!:`dfj kl){	
* ?+{	
, 6-{	
0 *D*D*F*FP&&D.E.E.G.GQ**T/{	
< !3OfOfOhOh5r5K5Knr st={	
> 2?{	
@ OA{	
B EC{	
D o>U>U>W>W'a':':]abcE{	
F @W@W@Y@Y)c)<)<_cdeG{	
H t5L5L5N5NX11TXYZI{	
J /KbKbKdKd1n1G1GjnopK{	
L o>U>U>W>W'a':':]abcM{	
N .O{	
R ,F,F,H,HR((d0G0G0I0IS,,tQ{	
^ 2_{	
b (B(B(D(DN$$$,C,C,E,EO((4a{	
n oF]F]F_F_/i/B/Beijko{	
r &.E.E.G.GQ**Tq{	
@ #+B+B+D+DN''${	
N #+B+B+D+DN''$M{	
Z .[{	
^ ,F,F,H,HR((d0G0G0I0IS,,t]{	
j +G^G^G`G`-j-C-Cfjklk{	
l 7N7N7P7PZ33VZ[\m{	
n -JaJaJcJc/m/F/Fimnoo{	
r &@&@&B&BLNN*A*A*C*CM&&q{	
~ 2{	
@ .A{	
B ?C{	
D 1MdMdMfMf3p3I3IlpqrE{	
F o>U>U>W>W'a':':]abcG{	
H +G^G^G`G`-j-C-CfjklI{	
L ,F,F,H,HR((d0G0G0I0IS,,tK{	
X 1PgPgPiPi3s3L3LostuY{	
\ 19P9P9R9R\55X\[{	
h +G^G^G`G`-j-C-Cfjkli{	
j ?V?V?X?X(b(;(;^bcdk{	
l .H.H.J.JT**PTVZ[\m{	
n ,o{	
p ;R;R;T;T^77Z^_`q{	
r d>U>U>W>W$a$:$:]abcs{	
v (4G4G4I4IS00tUYZu{	
| 6}{	
~ o>U>U>W>W'a':':]abc{	
@ .A{	
B )D[D[D]D]+g+@+@cghiC{	
F (B(B(D(DN$$$,C,C,E,EO((4E{	
T (B(B(D(DN$$$,C,C,E,EO((4S{	
` _=T=T=V=V&`&9&9\`aba{	
b T8O8O8Q8Q[44W[\]c{	
d .H.H.J.JT**PTVZ[\e{	
f o>U>U>W>W'a':':]abcg{	
h _E\E\E^E^.h.A.Adhiji{	
j AXAXAZAZ*d*=*=`defk{	
l $:Q:Q:S:S ] 6 6Y]^_m{	
n Fo{	
p o>U>U>W>W'a':':]abcq{	
r Cs{	
t H_H_HaHa1k1D1Dgklmu{	
v /BYBYB[B[+e+>+>aefgw{	
x +G^G^G`G`-j-C-Cfjkly{	
z 7{{	
| )E\E\E^E^+h+A+Adhij}{	
~ 7N7N7P7PZ33VZ[\{	
@ *D[D[D]D],g,@,@cghiA{	
B *D[D[D]D],g,@,@cghiC{	
D oF]F]F_F_/i/B/BeijkE{	
F !?KbKbKdKd4n4G4Gjn"opG{	
J (B(B(D(DN$$$,C,C,E,EO((4I{	
X (B(B(D(DN$$$,C,C,E,EO((4W{	
d 4e{	
h -G-G-I-IS))t1H1H1J1JT--PTg{	
t -JaJaJcJc/m/F/Fimnou{	
v 1PgPgPiPi3s3L3Lostuw{	
x 1PgPgPiPi3s3L3Lostuy{	
z /MdMdMfMf1p1I1Ilpqr{{	
| ^;R;R;T;T%^%7%7Z^_`}{	
~ +JaJaJcJc-m-F-Fimno{	
B (B(B(D(DN$$$,C,C,E,EO((4A{	
N 'AXAXAZAZ)d)=)=`defO{	
P ,F]F]F_F_.i.B.BeijkQ{	
R  "2LcLcLeLe4o4H4Hko!pqS{	
T !1KbKbKdKd3n3G3Gjn opU{	
V 1PgPgPiPi3s3L3LostuW{	
Z %?%?%A%AKMMt)@)@)B)BL%%Y{	
f .g{	
h )D[D[D]D]+g+@+@cghii{	
j .H.H.J.JT**PTVZ[\k{	
l t7N7N7P7PZ33VZ[\m{	
n 8O8O8Q8Q[44W[\]o{	
p -G-G-I-IS))tUYZ[q{	
t (B(B(D(DN$$$,C,C,E,EO((4s{	
B *D*D*F*FP&&D.E.E.G.GQ**TA{	
N (D[D[D]D]*g*@*@cghiO{	
P G^G^G`G`0j0C0CfjklQ{	
R 3S{	
V (B(B(D(DN$$$,C,C,E,EO((4U{	
d (B(B(D(DN$$$,C,C,E,EO((4c{	
p (BYBYB[B[*e*>*>aefgq{	
r +E+E+G+GQ''TSWXYs{	
t 1PgPgPiPi3s3L3Lostuu{	
v t:Q:Q:S:S]66Y]^_w{	
x 'AXAXAZAZ)d)=)=`defy{	
z T5L5L5N5NX11TXYZ{{	
| 'CZCZC\C\)f)?)?bfgh}{	
@ &@&@&B&BLNN*A*A*C*CM&&{	
L ->U>U>W>W)a):):]abcM{	
N E\E\E^E^0h0A0Adh ijO{	
P ^;R;R;T;T%^%7%7Z^_`Q{	
R .S{	
T ?V?V?X?X(b(;(;^bcdU{	
X 'A'A'C'CMOO+B+B+D+DN''$W{	
f 'A'A'C'CMOO+B+B+D+DN''$e{	
t  )C)C)E)EO%%4-D-D-F-FP))Ds{	
@	 d6M6M6O6OY22UYZ[A	{	
B	 t7N7N7P7PZ33VZ[\C	{	
F	  9P9P9R9R"\"5"5X\]E	{	
L	 ?CZCZC\C\,f,?,?bfghM	{	
P	 %CZCZC\C\'f'?'?bfgO	{	
V	 _=T=T=V=V&`&9&9\`abW	{	
X	 ?V?V?X?X(b(;(;^bcdY	{	
Z	 @W@W@Y@Y)c)<)<_cde[	{	
\	 +E\E\E^E^-h-A-Adhij]	{	
`	 *D*D*F*FP&&D.E.E.G.GQ**T_	{	
n	 *D*D*F*FP&&D.E.E.G.GQ**Tm	{	
z	{	{	
J
 (B(B(D(DN$$$,C,C,E,EO((4I
{	
V
 'CZCZC\C\)f)?)?bfghW
{	
X
 &@W@W@Y@Y(c(<(<_cdeY
{	
Z
 (BYBYB[B[*e*>*>aefg[
{	
\
 4]
{	
^
 M@W@W@Y@Y+c+<+<_cde_
{	
`
 <S<S<U<U_88[_`aa
{	
b
 -G-G-I-IS))tUYZ[c
{	
d
 :e
{	
f
 AXAXAZAZ*d*=*=`defg
{	
j
 $,C,C,E,EO((4i
{	
v
 -G^G^G`G`/j/C/Cfjklw
{	
z
 $,C,C,E,EO((4y
{	
F *D[D[D]D],g,@,@cghiG{	
H ,I{	
J 'AXAXAZAZ)d)=)=`defK{	
N "(B(B(D(DN$$$,C,C,E,EO((4M{	
\ +E+E+G+GQ''T/F/F/H/HR++d[{	
j *D*D*F*FP&&D.E.E.G.GQ**Ti{	
v /MdMdMfMf1p1I1Ilpqrw{	
x +G^G^G`G`-j-C-Cfjkly{	
| '#?V?V?X?X%b%;%;^bc{{	
B 5C{	
D -JaJaJcJc/m/F/FimnoE{	
F d6M6M6O6OY22UYZ[G{	
J .H.H.J.JT**PT2I2I2K2KU..QUI{	
X ".H.H.J.JT**PT2I2I2K2KU..QUW{	
d -G-G-I-IS))tUYZ[e{	
f :T:T:V:V ` 6 6\`bfghg{	
h Bi{	
j 1K1K1M1MW--SWY]^_k{	
l Im{	
p 'G^G^G`G`)j)C)Cfjko{	
v $:Q:Q:S:S ] 6 6Y]^_w{	
x OD[D[D]D]-g-@-@cghiy{	
| &%?%?%A%AKMMt)@)@)B)BL%%{{	
J %?%?%A%AKMMt)@)@)B)BL%%I{	
V 0W{	
X 0Y{	
Z 9[{	
\ _=T=T=V=V&`&9&9\`ab]{	
` 'A'A'C'CMOO+B+B+D+DN''$_{	
n %?%?%A%AKMMt)@)@)B)BL%%m{	
z -G^G^G`G`/j/C/Cfjkl{{	
| o>U>U>W>W'a':':]abc}{	
~ *D[D[D]D],g,@,@cghi{	
@ _E\E\E^E^.h.A.AdhijA{	
B .C{	
D 9E{	
F >G{	
H CI{	
J HK{	
L +G^G^G`G`-j-C-CfjklM{	
N ?V?V?X?X(b(;(;^bcdO{	
R 'A'A'C'CMOO+B+B+D+DN''$Q{	
^ ,_{	
` <V<V<X<X b 8 8^bdhija{	
d -G-G-I-IS))t1H1H1J1JT--PTc{	
r !-G-G-I-IS))t1H1H1J1JT--PTq{	
@ (B(B(D(DN$$$,C,C,E,EO((4{	
N -G-G-I-IS))t1H1H1J1JT--PTM{	
\ )C)C)E)EO%%4-D-D-F-FP))D[{	
j (B(B(D(DN$$$,C,C,E,EO((4i{	
} }~ %$%9;RSS @@#=#7#=#?#?@@@#    < 48 &*(,(,""l l#(bk)9#:lc2;./0l l d^	l
 d38n%l E$)$%l sml l l l l l^Wr Wr Wr Wr Wr Wr Wr Wr Wr Wrrt  