
    Ngx\                        d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	m
Z
 d dlZd dlZd dlmZ d dlmZmZmZmZmZ d dlmZ d d	lmZ  ej        e          ZddZ G d dej                  ZdS )    )annotationsNfnmatch)Path)AnyCallable)nn)
AutoConfig	AutoModelAutoTokenizer	MT5ConfigT5Config)is_peft_available)find_adapter_config_file_save_pretrained_fnr   	subfolderstrreturnCallable[..., None]c                     d fd}|S )Nsave_directory
str | Pathr   Nonec                    t          j        t          |           z  d            t          |           z  fi |S )NT)exist_ok)osmakedirsr   )r   kwargsr   r   s     d/var/www/html/ai-engine/env/lib/python3.11/site-packages/sentence_transformers/models/Transformer.pywrapperz)_save_pretrained_wrapper.<locals>.wrapper   sK    
D((94tDDDD""4#7#7)#CNNvNNN    )r   r   r   r    )r   r   r    s   `` r   _save_pretrained_wrapperr#      s5    O O O O O O O Nr!   c                       e Zd ZU dZdZded<   	 	 	 	 	 	 	 	 d:d; fdZd<dZd=dZd=dZ	d=dZ
d=dZd>d%Zd?d&Zd=d'Zd=d(Zd@d)ZdAd,ZdBd.Z	 dCdDd3ZdEd4ZdCdFd7ZedGd9            Z xZS )HTransformera  Hugging Face AutoModel to generate token embeddings.
    Loads the correct class, e.g. BERT / RoBERTa etc.

    Args:
        model_name_or_path: Hugging Face models name
            (https://huggingface.co/models)
        max_seq_length: Truncate any inputs longer than max_seq_length
        model_args: Keyword arguments passed to the Hugging Face
            Transformers model
        tokenizer_args: Keyword arguments passed to the Hugging Face
            Transformers tokenizer
        config_args: Keyword arguments passed to the Hugging Face
            Transformers config
        cache_dir: Cache dir for Hugging Face Transformers to store/load
            models
        do_lower_case: If true, lowercases the input (independent if the
            model is cased or not)
        tokenizer_name_or_path: Name or path of the tokenizer. When
            None, then model_name_or_path is used
        backend: Backend used for model inference. Can be `torch`, `onnx`,
            or `openvino`. Default is `torch`.
    Tboolsave_in_rootNFtorchmodel_name_or_pathr   max_seq_length
int | None
model_argsdict[str, Any] | Nonetokenizer_argsconfig_args	cache_dir
str | Nonedo_lower_casetokenizer_name_or_pathbackendr   r   c
                d   t                                                       ddg| _        || _        |	| _        |i }|i }|i }|                     |||	|          }
 | j        ||
||	fi | |	d|vr||d<   t          j        ||n|fd|i|| _	        |mt          | j        d          rXt          | j        j        d          r>t          | j	        d          r)t          | j        j        j        | j	        j                  }|| _        |"| j	        j        j        | j        j        _        d S d S )Nr*   r2   model_max_lengthr0   configmax_position_embeddings)super__init__config_keysr2   r4   _load_config_load_modelr   from_pretrained	tokenizerhasattr
auto_modelr7   minr8   r6   r*   	__class____name__tokenizer_class)selfr)   r*   r,   r.   r/   r0   r2   r3   r4   r7   rC   s              r   r:   zTransformer.__init__6   s    	,o>*J!NK""#5y';WW+VYVV:VVV%*<N*R*R1?N-.&6&<&H""N`
 

 
 
 !22vDO24MNNv DN,>??v
 "%T_%;%SUYUcUt!u!u,!-59^5M5VDO"222 .-r!   dict[str, Any]c           	     ^   t          ||                    d          |                    d          |                    dd                    	 Jt                      st          d          |dk    rt	          d	          d
dlm}  |j        |fi |d|iS t          j        |fi |d|iS )z"Loads the configuration of a modeltokenrevisionlocal_files_onlyF)rI   rJ   rK   NzgLoading a PEFT model requires installing the `peft` package. You can install it via `pip install peft`.r(   a  PEFT models can currently only be loaded with the `torch` backend. To use other backends, load the model with `backend="torch"`, call `model[0].auto_model.merge_and_unload()`, save that model with `model.save_pretrained()` and then load the model with the desired backend.r   )
PeftConfigr0   )	r   getr   	Exception
ValueErrorpeftrL   r>   r
   )rF   r)   r0   r4   r/   rL   s         r   r<   zTransformer._load_configf   s     %"!oog..$44!,1CU!K!K	    %&& }   '!! w  
 ('''''-:-.@eeKee[deeee)*<aaaaW`aaaar!   c                   |dk    rxt          |t                    r | j        |||fi | n@t          |t                    r | j        |||fi | nt          j        |f||d|| _         | j        |||fi | dS |dk    r | j	        |||fi | dS |dk    r | j
        |||fi | dS t          d| d          )zLoads the transformer modelr(   r7   r0   onnxopenvinozUnsupported backend 'z6'. `backend` should be `torch`, `onnx`, or `openvino`.N)
isinstancer   _load_t5_modelr   _load_mt5_modelr   r>   rA   _load_peft_model_load_onnx_model_load_openvino_modelrO   )rF   r)   r7   r0   r4   r,   s         r   r=   zTransformer._load_model   sI   g&(++ ##$6	XXZXXXXFI.. $$%7YYjYYYY"+";&#/5# #NX# # "D!"4fiVV:VVVVV!D!"4fiVV:VVVVV
""%D%&8&)ZZzZZZZZtWtttuuur!   c                    t                      r6ddlm}m} t	          ||          r  |j        | j        |f||d|| _        d S d S d S )Nr   )rL   	PeftModelrR   )r   rP   rL   r\   rU   r>   rA   )rF   r)   r7   r0   r,   rL   r\   s          r   rX   zTransformer._load_peft_model   s     	22222222&*-- ";)";O%7#@FR[# #_i# #		 	 r!   c                j   t          |t                    st          |t                    rt          d          	 ddlm} ddlm} n# t          $ r t          d          w xY wt          |          }|                                }d}	d}
|                     |||||
|	          \  }}|r|                    dd            d	|v r|d	         }t          |t                    spt          |                                          st          d
          t          |d          5 }t!          j        |          |d	<   d d d            n# 1 swxY w Y   ni |d	<    |j        |f|||d|| _        t)          | j        j        | j                  | j        _        |r|                     |||	           d S d S )Nz8T5 models are not yet supported by the OpenVINO backend.r   )OVModelForFeatureExtraction)OV_XML_FILE_NAMEzUsing the OpenVINO backend requires installing Optimum and OpenVINO. You can install them with pip: `pip install optimum[openvino]`.OpenVINOzopenvino*.xml	file_name	ov_configzXov_config should be a dictionary or a path to a .json file containing an OpenVINO configzutf-8)encodingr7   r0   export)rU   r   r   rO   optimum.intelr^   optimum.intel.openvinor_   ModuleNotFoundErrorrN   r   exists_backend_should_exportpopdictopenjsonloadr>   rA   r#   _save_pretrainedr4   _backend_warn_to_save)rF   r)   r7   r0   r,   r^   r_   	load_pathis_localbackend_nametarget_file_globre   rb   fs                 r   rZ   z Transformer._load_openvino_model   sy   fh'' 	Y:fi+H+H 	YWXXX	AAAAAA???????" 	 	 	R  	 +,,	##%%!* "88x-=?OQ]
 


  	.NN;--- *$$";/Ii.. ;I--// $r   )g666 ;!.2illJ{+; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ')J{# 8c7R7b8
	8
 8

 8
 8
 ,DDODdfjfr+s+s(  	S&&'98\RRRRR	S 	Ss   A A"#EEEc                ,   	 dd l }ddlm}m} n# t          $ r t          d          w xY w|                    d|                                d                   |d<   t          |          }|	                                }	d}
d}| 
                    ||	||||
          \  }}|r|                    dd             |j        |f|||d|| _        t          | j        j        | j                  | j        _        |r|                     ||	|
           d S d S )	Nr   )ONNX_WEIGHTS_NAMEORTModelForFeatureExtractionzUsing the ONNX backend requires installing Optimum and ONNX Runtime. You can install them with pip: `pip install optimum[onnxruntime]` or `pip install optimum[onnxruntime-gpu]`providerONNXz*.onnxra   rd   )onnxruntimeoptimum.onnxruntimerx   ry   rh   rN   rk   get_available_providersr   ri   rj   r>   rA   r#   rp   r4   rq   )rF   r)   r7   r0   r,   ortrx   ry   rr   rs   rt   ru   re   s                r   rY   zTransformer._load_onnx_model   s   	%%%%[[[[[[[[[" 	 	 	<  	 ",
C<W<W<Y<YZ[<\!]!]
:+,,	##%%# "88x->@PR^
 


  	.NN;--- 9e8T8d9
	9
 9

 9
 9
 ,DDODdfjfr+s+s(  	S&&'98\RRRRR	S 	Ss    )rr   r   rs   target_file_nameru   rt   tuple[bool, dict[str, Any]]c                ~   |                     dd          }|r||fS |                    d|          }|                    dd          }	|	r"t          |	|                                          n t          |                                          }
|	r(t          |	| j        |                                          n&t          | j        |                                          }|	r|	 d| nd| |r"fd                              D             }n`t          j                                        d|                    d	d          |                    d
d                    }fd|D             }|
|v }|sid|vre||v }|r_t          |          dk    r=d|vr9t          
                    d| d                                d| d|d	           | j        |d<   ||d<   || }t          |          j        }t          |          dk    rI|d         |d<   t          |                    dd          g|dd         R                                  |d<   |rbt          
                    d|d                                d| d           |r*t          
                    d| d| d|d          d           ||fS )a  
        Determines whether the model should be exported to the backend, or if it can be loaded directly.
        Also update the `file_name` and `subfolder` model_args if necessary.

        These are the cases:

        1. If export is set in model_args, just return export
        2. If `<subfolder>/<file_name>` exists; set export to False
        3. If `<backend>/<file_name>` exists; set export to False and set subfolder to the backend (e.g. "onnx")
        4. If `<file_name>` contains a folder, add those folders to the subfolder and set the file_name to the last part

        We will warn if:

        1. The expected file does not exist in the model directory given the optional file_name and subfolder.
           If there are valid files for this backend, but they're don't align with file_name, then we give a useful warning.
        2. Multiple files are found in the model directory that match the target file name and the user did not
           specify the desired file name via `model_kwargs={"file_name": "<file_name>"}`

        Args:
            load_path: The model repository or directory, as a Path instance
            is_local: Whether the model is local or remote, i.e. whether load_path is a local directory
            model_args: The model_args dictionary. Notable keys are "export", "file_name", and "subfolder"
            target_file_name: The expected file name in the model directory, e.g. "model.onnx" or "openvino_model.xml"
            target_file_glob: The glob pattern to match the target file name, e.g. "*.onnx" or "openvino*.xml"
            backend_name: The human-readable name of the backend for use in warnings, e.g. "ONNX" or "OpenVINO"

        Returns:
            Tuple[bool, dict[str, Any]]: A tuple of the export boolean and the updated model_args dictionary.
        re   Nra   r   z/**/z**/c                ^    g | ])}|                                                               *S r"   )relative_toas_posix).0pathrr   s     r   
<listcomp>z6Transformer._backend_should_export.<locals>.<listcomp>8  s3    ppp4 0 0 ; ; D D F Fpppr!   modelrJ   rI   )	repo_typerJ   rI   c                4    g | ]}t          |          |S r"   r   )r   fnameglob_patterns     r   r   z6Transformer._backend_should_export.<locals>.<listcomp>@  s(    ]]]%|@\@\]]]]r!      z	Multiple z files found in z: z, defaulting to zW. Please specify the desired file name via `model_kwargs={"file_name": "<file_name>"}`. zNo z
 found in z. Exporting the model to .z#If you intended to load one of the  zN files, please specify the desired file name via `model_kwargs={"file_name": "r   z"}`.)rk   rM   r   r   r4   globhuggingface_hublist_repo_fileslenloggerwarningparts)rF   rr   rs   r,   r   ru   rt   re   ra   r   primary_full_pathsecondary_full_pathmodel_file_names	all_filesmodel_foundfile_name_partsr   s    `              @r   rj   z"Transformer._backend_should_export  s   N $// 	&:%%NN;0@AA	NN;55	ENnDI66??AAATXYbTcTcTlTlTnTn :DDL)44==???dlI..7799 	
 @If);;)9;;;NfTdNfNf  		^ppppS\SaSabnSoSoppp'7""$$!#
D99 nnWd33	  I  ^]]]9]]]
 (+;; 		4{*<<-1AAK 4'((1,,J1N1NNNsL s s)BTBTBVBV s s\l s s  R s s s   +/,
;'*3
;'>$_F y///!##&5b&9J{#&*:>>+r+J+J&b_]`^`]`Ma&b&b&b&k&k&m&mJ{# 		NNmimmY-?-?-A-Amm^jmmm     y:J y y\ y y^nop^qy y y  
 z!!r!   c                p    d| d}|r
|d|dz  }n	|d|dz  }t                               |           d S )NzSaving the exported zA model is heavily recommended to avoid having to export it again.z# Do so with `model.save_pretrained(z)`.z Do so with `model.push_to_hub(z, create_pr=True)`.)r   r   )rF   r)   rs   rt   to_logs        r   rq   z!Transformer._backend_warn_to_savef  sc    wwww 	bU<NUUUUFFa8JaaaaFvr!   c                N    ddl m} dg|_         |j        |f||d|| _        dS )Loads the encoder model from T5r   )T5EncoderModel	decoder.*rR   N)transformersr   "_keys_to_ignore_on_load_unexpectedr>   rA   )rF   r)   r7   r0   r,   r   s         r   rV   zTransformer._load_t5_modeln  sR    //////=HM98.8
'-
 
FP
 
r!   c                N    ddl m} dg|_         |j        |f||d|| _        dS )r   r   )MT5EncoderModelr   rR   N)r   r   r   r>   rA   )rF   r)   r7   r0   r,   r   s         r   rW   zTransformer._load_mt5_modelw  sR    000000>I]:9/9
'-
 
FP
 
r!   c                V    d|                                   d| j        j        j         dS )NzTransformer(z) with Transformer model: r   )get_config_dictrA   rC   rD   rF   s    r   __repr__zTransformer.__repr__  s1    ud2244uuPTP_PiPruuuur!   featuresdict[str, torch.Tensor]c                P   |d         |d         d}d|v r|d         |d<    | j         di ||ddi}|d         }t                      rddlm} t	          | j         |          rt| j         j        j        rc|                    d          }|d         }t          j	        || j         j        j
        |j        	          }	t          j        |	|fd
          |d<   ||d<   | j         j        j        r7t          |          dk    r$d}
t          |          dk     rd
}
||
         }||d<   |S )z#Returns token_embeddings, cls_token	input_idsattention_mask)r   r   token_type_idsreturn_dictFr   )PeftModelForFeatureExtraction)devicer   )dimtoken_embeddings      all_layer_embeddingsr"   )rA   r   rP   r   rU   active_peft_configis_prompt_learningsizer(   onesnum_virtual_tokensr   catr7   output_hidden_statesr   )rF   r   r   trans_featuresoutput_statesoutput_tokensr   
batch_sizer   prefix_attention_maskall_layer_idxhidden_statess               r   forwardzTransformer.forward  s   '/'<PXYiPjkkx''/78H/IN+,'VV.VFVVPUVVV%a(  	g:::::: 4?,IJJ	gO6I	g +//22
!)*:!;(-
 B U^l^s) ) )% .3Y8M~7^de-f-f-f)*'4#$?!6 	=3};M;MPQ;Q;QM=!!A%% !)-8M/<H+,r!   intc                $    | j         j        j        S )N)rA   r7   hidden_sizer   s    r   get_word_embedding_dimensionz(Transformer.get_word_embedding_dimension  s    %11r!   texts.list[str] | list[dict] | list[tuple[str, str]]padding
str | boolc           
     t   i }t          |d         t                    r|g}nt          |d         t                    rqg }g |d<   |D ]c}t          t	          |                                                    \  }}|                    |           |d                             |           d|g}nCg g }	}|D ]8}
|                    |
d                    |	                    |
d                    9||	g}d |D             }| j        rd |D             }|                     | j	        ||dd| j
        d           |S )	z-Tokenizes a text and maps tokens to token-idsr   	text_keysr   c                &    g | ]}d  |D             S )c                P    g | ]#}t          |                                          $S r"   )r   stripr   ss     r   r   z3Transformer.tokenize.<locals>.<listcomp>.<listcomp>  s&    4441A444r!   r"   r   cols     r   r   z(Transformer.tokenize.<locals>.<listcomp>  s'    LLL44444LLLr!   c                &    g | ]}d  |D             S )c                6    g | ]}|                                 S r"   )lowerr   s     r   r   z3Transformer.tokenize.<locals>.<listcomp>.<listcomp>  s     333!AGGII333r!   r"   r   s     r   r   z(Transformer.tokenize.<locals>.<listcomp>  s'    KKK33s333KKKr!   longest_firstpt)r   
truncationreturn_tensors
max_length)rU   r   rl   nextiteritemsappendr2   updater?   r*   )rF   r   r   outputto_tokenizelookuptext_keytextbatch1batch2
text_tuples              r   tokenizezTransformer.tokenize  s    eAh$$ 	+ 'KKa$'' 	+K"$F; 5 5!%d6<<>>&:&:!;!;$""4((({#**84444&-KKFF# - -
jm,,,jm,,,,!6*K MLLLL  	LKK{KKKKDN*#.  	
 	
 	
 r!   c                *      fd j         D             S )Nc                ,    i | ]}|j         |         S r"   )__dict__)r   keyrF   s     r   
<dictcomp>z/Transformer.get_config_dict.<locals>.<dictcomp>  s"    DDDCT]3'DDDr!   )r;   r   s   `r   r   zTransformer.get_config_dict  s     DDDD43CDDDDr!   output_pathsafe_serializationc                R   | j                             ||           | j                            |           t          t          j                            |d          d          5 }t          j        | 	                                |d           d d d            d S # 1 swxY w Y   d S )N)r   sentence_bert_config.jsonwr   )indent)
rA   save_pretrainedr?   rm   r   r   joinrn   dumpr   )rF   r   r   fOuts       r   savezTransformer.save  s    ''HZ'[[[&&{333"',,{,GHH#NN 	>RVId**,,d1====	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	>s   %*BB #B 
input_pathc                   dD ]C}t           j                            ||          }t           j                            |          r nDt	          |          5 }t          j        |          }d d d            n# 1 swxY w Y   d|v r%d|d         v r|d                             d           d|v r%d|d         v r|d                             d           d|v r%d|d         v r|d                             d            | dd|i|S )N)r   zsentence_roberta_config.jsonzsentence_distilbert_config.jsonzsentence_camembert_config.jsonzsentence_albert_config.jsonz sentence_xlm-roberta_config.jsonzsentence_xlnet_config.jsonr,   trust_remote_coder.   r/   r)   r"   )r   r   r   ri   rm   rn   ro   rk   )clsr  config_namesbert_config_pathfInr7   s         r   ro   zTransformer.load  sz   
 	 	K !#Z E Ew~~/00  #$$ 	$Ys^^F	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 6!!&9VL=Q&Q&Q< $$%8999v%%*=HXAY*Y*Y#$(()<===F""':f]>S'S'S=!%%&9:::s;;j;F;;;s   A77A;>A;)NNNNNFNr(   )r)   r   r*   r+   r,   r-   r.   r-   r/   r-   r0   r1   r2   r&   r3   r   r4   r   r   r   )r)   r   r0   r1   r4   r   r/   rG   )r   r   )rr   r   rs   r&   r,   rG   r   r   ru   r   rt   r   r   r   )r)   r   rs   r   rt   r   r   r   )r   r   )r   r   r   r   )r   r   )T)r   r   r   r   r   r   )r   rG   )r   r   r   r&   r   r   )r  r   r   r%   )rD   
__module____qualname____doc__r'   __annotations__r:   r<   r=   rX   rZ   rY   rj   rq   rV   rW   r   r   r   r   r   r  classmethodro   __classcell__)rC   s   @r   r%   r%      s         . L
 &*,004-1 $#&*.W .W .W .W .W .W .W`b b b b8v v v v&   5S 5S 5S 5Sn*S *S *S *SXc" c" c" c"J   
 
 
 

 
 
 
v v v v# # # #J2 2 2 2 \`& & & & &PE E E E> > > > > < < < [< < < < <r!   r%   )r   r   r   r   r   r   )
__future__r   rn   loggingr   r   pathlibr   typingr   r   r   r(   r	   r   r
   r   r   r   r   transformers.utils.import_utilsr   transformers.utils.peft_utilsr   	getLoggerrD   r   r#   Moduler%   r"   r!   r   <module>r     sI   " " " " " "   				                                        R R R R R R R R R R R R R R = = = = = = B B B B B B		8	$	$   Z< Z< Z< Z< Z<") Z< Z< Z< Z< Z<r!   