
    Ng|                        d dl mZ d dlZd dlZd dlZd dlZd dlmZmZ  ej        e	          Z
 G d dej                  ZdS )    )annotationsN)Tensornnc                  P     e Zd ZdZdd fd	ZddZd Zd Zed             Z	 xZ
S )WordWeightszDThis model can weight word embeddings, for example, with idf-values.   vocab	list[str]word_weightsdict[str, float]unknown_word_weightfloatc                x   t                                                       g d| _        || _        || _        || _        g }d}|D ]\}|}||v r	||         }n6|                                |v r||                                         }n|dz  }|                    |           ]t          	                    | dt          |           d|            t          j        t          |          d          | _        | j                            dt          j        |                              d          i           dS )aZ  
        Initializes the WordWeights class.

        Args:
            vocab (List[str]): Vocabulary of the tokenizer.
            word_weights (Dict[str, float]): Mapping of tokens to a float weight value. Word embeddings are multiplied
                by this float value. Tokens in word_weights must not be equal to the vocab (can contain more or less values).
            unknown_word_weight (float, optional): Weight for words in vocab that do not appear in the word_weights lookup.
                These can be, for example, rare words in the vocab where no weight exists. Defaults to 1.
        )r	   r   r   r   r   z of z0 words without a weighting value. Set weight to weightN)super__init__config_keysr	   r   r   lowerappendloggerinfolenr   	Embedding	emb_layerload_state_dicttorchFloatTensor	unsqueeze)	selfr	   r   r   weightsnum_unknown_wordswordr   	__class__s	           d/var/www/html/ai-engine/env/lib/python3.11/site-packages/sentence_transformers/models/WordWeights.pyr   zWordWeights.__init__   sI    	KKK
(#6  	# 	#D(F|##%d+--%djjll3!Q&!NN6"""" wwc%jjwwbuww	
 	
 	
 c%jj!44&&%2CG2L2L2V2VWX2Y2Y'Z[[[[[    featuresdict[str, Tensor]c                   |d         }|d         }|                      |d                                       d          }||                                z  }t          j        |d          }|                    d                              |                                          }||z  }|                    ||d           |S )Nattention_masktoken_embeddings	input_idsr   )r*   token_weights_sum)	r   squeezer   r   sumr   expandsizeupdate)r   r&   r)   r*   token_weights_rawtoken_weightsr-   token_weights_expandeds           r$   forwardzWordWeights.forward4   s    !"23#$67 !NN8K+@AAII"MM)N,@,@,B,BB!ImQ77 "/!8!8!<!<!C!CDTDYDYD[D[!\!\+.DD-=Teffgggr%   c                *      fd j         D             S )Nc                ,    i | ]}|j         |         S  )__dict__).0keyr   s     r$   
<dictcomp>z/WordWeights.get_config_dict.<locals>.<dictcomp>E   s"    DDDCT]3'DDDr%   )r   )r   s   `r$   get_config_dictzWordWeights.get_config_dictD   s     DDDD43CDDDDr%   c                    t          t          j                            |d          d          5 }t	          j        |                                 |d           d d d            d S # 1 swxY w Y   d S )Nconfig.jsonw   )indent)openospathjoinjsondumpr>   )r   output_pathfOuts      r$   savezWordWeights.saveG   s    "',,{M::C@@ 	>DId**,,d1====	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	>s   *A&&A*-A*c                    t          t          j                            | d                    5 }t	          j        |          }d d d            n# 1 swxY w Y   t          di |S )Nr@   r9   )rD   rE   rF   rG   rH   loadr   )
input_pathfInconfigs      r$   rN   zWordWeights.loadK   s    "',,z=99:: 	$cYs^^F	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ $$V$$$s   AAA)r   )r	   r
   r   r   r   r   )r&   r'   )__name__
__module____qualname____doc__r   r6   r>   rL   staticmethodrN   __classcell__)r#   s   @r$   r   r      s        NN"\ "\ "\ "\ "\ "\ "\H    E E E> > > % % \% % % % %r%   r   )
__future__r   rH   loggingrE   r   r   r   	getLoggerrR   r   Moduler   r9   r%   r$   <module>r\      s    " " " " " "   				         		8	$	$C% C% C% C% C%") C% C% C% C% C%r%   