
    Ng0                        d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dl	m
Z d dl	mZ d dlmZ d dlmZ d dlmZmZmZ dd	lmZmZ  ej        e          Z G d
 dej                  ZdS )    )annotationsN)	load_file)	save_file)nn)tqdm)fullnamehttp_getimport_from_string   )WhitespaceTokenizerWordTokenizerc                      e Zd Z	 	 ddd	Zd
 Zd dZd!dZd"d#dZd Ze	d$d            Z
e	dd e            dfd%d            ZdS )&WordEmbeddingsF@B 	tokenizerr   update_embeddingsboolmax_seq_lengthintc                   t           j                            |            t          |t                    rt          j        |          }t          |t
          j                  rt          j	        |          }|
                                \  }}|| _        t          j        ||          | _        | j                            d|i           || j        j        _        || _        || _        || _        d S )Nweight)r   Module__init__
isinstancelistnpasarrayndarraytorch
from_numpysizeembeddings_dimension	Embedding	emb_layerload_state_dictr   requires_gradr   r   r   )selfr   embedding_weightsr   r   num_embeddingsr"   s          g/var/www/html/ai-engine/env/lib/python3.11/site-packages/sentence_transformers/models/WordEmbeddings.pyr   zWordEmbeddings.__init__   s     		4   '.. 	> "
+< = ='44 	D % 01B C C/@/E/E/G/G,,$8!n6JKK&&2C'DEEE.?+"!2,    c                ~    |                      |d                   }d }|                    |||d         d           |S )N	input_idsattention_mask)token_embeddingscls_token_embeddingsr.   )r$   update)r'   featuresr/   
cls_tokenss       r*   forwardzWordEmbeddings.forward.   sT    >>(;*?@@
$4(2"*+;"< 	
 	
 	
 r+   texts	list[str]c                     fd|D             }d |D             }t          |          }g }g }|D ]Y}dg|t          |          z
  z  }	|                    ||	z              |                    dgt          |          z  |	z              Zt          j        |t          j                  t          j        |t          j                  t          j        |t          j                  d}
|
S )Nc                6    g | ]} j         j        |fi S  )r   tokenize).0textkwargsr'   s     r*   
<listcomp>z+WordEmbeddings.tokenize.<locals>.<listcomp>;   s1    UUUt24>24BB6BBUUUr+   c                ,    g | ]}t          |          S r9   )len)r;   tokenss     r*   r>   z+WordEmbeddings.tokenize.<locals>.<listcomp><   s    FFFFCKKFFFr+   r   r   )dtype)r-   r.   sentence_lengths)maxr@   appendr   tensorlong)r'   r5   r=   tokenized_textsrC   max_lenr-   attention_masksrA   paddingoutputs   ` `        r*   r:   zWordEmbeddings.tokenize:   s   UUUUUuUUUFFoFFF&''	% 	@ 	@FcWs6{{23GVg-...""A3V#4w#>???? iuzBBB#l?%*MMM %-=UZ P P P
 
 r+   returnc                    | j         S )N)r"   r'   s    r*   get_word_embedding_dimensionz+WordEmbeddings.get_word_embedding_dimensionN   s    ((r+   Toutput_pathstrsafe_serializationc                (   t          t          j                            |d          d          5 }t	          j        |                                 |d           d d d            n# 1 swxY w Y   |rAt          |                                 t          j                            |d                     nEt          j
        |                                 t          j                            |d                     | j        
                    |           d S )Nwordembedding_config.jsonw   )indentmodel.safetensorspytorch_model.bin)openospathjoinjsondumpget_config_dictsave_safetensors_file
state_dictr   saver   )r'   rQ   rS   fOuts       r*   rd   zWordEmbeddings.saveQ   s   "',,{,GHH#NN 	>RVId**,,d1====	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	>  	Z!$//"3"3RW\\+Ob5c5cddddJt(("',,{DW*X*XYYYK(((((s   *A%%A),A)c                F    t          | j                  | j        | j        dS )N)tokenizer_classr   r   )r   r   r   r   rO   s    r*   ra   zWordEmbeddings.get_config_dict[   s*    '77!%!7"1
 
 	
r+   
input_pathc                   t          t          j                            | d                    5 }t	          j        |          }d d d            n# 1 swxY w Y   t          |d                   }|                    |           }t          j                            t          j                            | d                    r.t          t          j                            | d                    }nGt          j        t          j                            | d          t          j
        d          d          }|d         }t          |||d	         
          }|S )NrU   rg   rY   rZ   cpuT)map_locationweights_onlyzemb_layer.weightr   r   r(   r   )r[   r\   r]   r^   r_   loadr
   existsload_safetensors_filer   devicer   )rh   fInconfigrg   r   weightsr(   models           r*   rn   zWordEmbeddings.loadb   sa   "',,z+FGGHH 	$CYs^^F	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ -V4E-FGG#((44	7>>"',,z3FGGHH 	+BGLLEX,Y,YZZGGjZ)<==ELY^L_L_nr  G $$673DX^_rXs
 
 
 s   AAA Nembeddings_file_pathitem_separatormax_vocab_sizec                   t                               d|             t          j                            |           sLt                               |  d           d| v sd| v rt          d|            d| z   }t          ||            d }g }g }|                     d          rt          j	        | dd	
          nt          | d	
          5 }	t          |	dd          }
|
D ]6}|                                                    |          }|st          |          dk    r@|d         }|Nt          |          dz
  }|                    d           |                    t          j        |                     t          |          dz
  |k    rt                               d           t          j        d |dd          D                       }|                    |           |                    |           ||dk    rt          |          |k    r n8t          j        |          }|                    |           t+          |||          cd d d            S # 1 swxY w Y   d S )NzRead in embeddings file z, does not exist, try to download from server/\zEmbeddings file not found: zAhttps://public.ukp.informatik.tu-darmstadt.de/reimers/embeddings/z.gzrtutf8)encodingzLoad Word Embeddings
Embeddings)descunitrW   r   r   PADDING_TOKENz\ERROR: A line in the embeddings file had more or less  dimensions than expected. Skip token.c                ,    g | ]}t          |          S r9   )float)r;   nums     r*   r>   z1WordEmbeddings.from_text_file.<locals>.<listcomp>   s    "C"C"C#5::"C"C"Cr+   rm   )loggerinfor\   r]   ro   
ValueErrorr	   endswithgzipr[   r   rstripsplitr@   rE   r   zeroserrorarrayr   	set_vocabr   )rw   r   rx   r   ry   urlr"   vocab
embeddingsrr   iteratorliner   wordvectors                  r*   from_text_filezWordEmbeddings.from_text_fileu   s    	E/CEEFFFw~~233 	0KK/]]]^^^***d6J.J.J !U?S!U!UVVVUXllCS.///#
 $,,U33=DI*D6BBBB*V<<<'	 ADC&<<PPPH   ++N;; UqQx'/+.u::>(LL111%%bh/C&D&DEEE JJN)* * LLv   "C"Cqrr"C"C"CDD!!&)))T"""!-.12D2DUVdIdIdEJ//J&&&!#zUf  K'	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	s   FI  I$'I$)Fr   )r   r   r   r   r   r   )r5   r6   )rM   r   )T)rQ   rR   rS   r   )rh   rR   )rw   rR   r   r   rx   rR   ry   r   )__name__
__module____qualname__r   r4   r:   rP   rd   ra   staticmethodrn   r   r   r9   r+   r*   r   r      s        
 #(%- - - - -.
 
 
   () ) ) )) ) ) ) )
 
 
    \$  #(!%%''"= = = = \= = =r+   r   )
__future__r   r   r_   loggingr\   numpyr   r   safetensors.torchr   rp   r   rb   r   r   sentence_transformers.utilr   r	   r
   r   r   r   	getLoggerr   r   r   r   r9   r+   r*   <module>r      s    " " " " " "    				      @ @ @ @ @ @ @ @ @ @ @ @             M M M M M M M M M M 9 9 9 9 9 9 9 9		8	$	$] ] ] ] ]RY ] ] ] ] ]r+   