
    Ngc,                        d dl mZ d dlZd dlZd dlZd dlmZ d dlmZm	Z	 d dl
Zd dlmZmZ d dlmZmZmZ d dlmZ d dlmZ d d	lmZ erd d
lmZ  ej        e          Z G d de          ZdS )    )annotationsN)nullcontext)TYPE_CHECKINGLiteral)pearsonr	spearmanr)paired_cosine_distancespaired_euclidean_distancespaired_manhattan_distances)SentenceEvaluator)InputExample)SimilarityFunction)SentenceTransformerc                  z     e Zd ZdZ	 	 	 	 	 	 	 	 d+d, fdZd-dZed.d!            Z	 d/d0d)Ze	d1d*            Z
 xZS )2EmbeddingSimilarityEvaluatora  
    Evaluate a model based on the similarity of the embeddings by calculating the Spearman and Pearson rank correlation
    in comparison to the gold standard labels.
    The metrics are the cosine similarity as well as euclidean and Manhattan distance
    The returned score is the Spearman correlation with a specified metric.

    Example:
        ::

            from datasets import load_dataset
            from sentence_transformers import SentenceTransformer
            from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator, SimilarityFunction

            # Load a model
            model = SentenceTransformer('all-mpnet-base-v2')

            # Load the STSB dataset (https://huggingface.co/datasets/sentence-transformers/stsb)
            eval_dataset = load_dataset("sentence-transformers/stsb", split="validation")

            # Initialize the evaluator
            dev_evaluator = EmbeddingSimilarityEvaluator(
                sentences1=eval_dataset["sentence1"],
                sentences2=eval_dataset["sentence2"],
                scores=eval_dataset["score"],
                name="sts_dev",
            )
            results = dev_evaluator(model)
            '''
            EmbeddingSimilarityEvaluator: Evaluating the model on the sts-dev dataset:
            Cosine-Similarity :  Pearson: 0.8806 Spearman: 0.8810
            '''
            print(dev_evaluator.primary_metric)
            # => "sts_dev_pearson_cosine"
            print(results[dev_evaluator.primary_metric])
            # => 0.881019449484294
       N FT
sentences1	list[str]
sentences2scoreslist[float]
batch_sizeintmain_similaritystr | SimilarityFunction | Nonesimilarity_fn_names?list[Literal['cosine', 'euclidean', 'manhattan', 'dot']] | Nonenamestrshow_progress_barbool	write_csv	precision?Literal['float32', 'int8', 'uint8', 'binary', 'ubinary'] | Nonetruncate_dim
int | Nonec                   t                                                       || _        || _        || _        |	| _        |
| _        || _        t          | j                  t          | j                  k    sJ t          | j                  t          | j                  k    sJ |rt          |          nd| _
        |pg | _        || _        || _        |Nt                                          t           j        k    p&t                                          t           j        k    }|| _        d|rd|z   ndz   |
rd|
z   ndz   dz   | _        ddg| _        |                     | j                   dS )a-  
        Constructs an evaluator based for the dataset.

        Args:
            sentences1 (List[str]): List with the first sentence in a pair.
            sentences2 (List[str]): List with the second sentence in a pair.
            scores (List[float]): Similarity score between sentences1[i] and sentences2[i].
            batch_size (int, optional): The batch size for processing the sentences. Defaults to 16.
            main_similarity (Optional[Union[str, SimilarityFunction]], optional): The main similarity function to use.
                Can be a string (e.g. "cosine", "dot") or a SimilarityFunction object. Defaults to None.
            similarity_fn_names (List[str], optional): List of similarity function names to use. If None, the
                ``similarity_fn_name`` attribute of the model is used. Defaults to None.
            name (str, optional): The name of the evaluator. Defaults to "".
            show_progress_bar (bool, optional): Whether to show a progress bar during evaluation. Defaults to False.
            write_csv (bool, optional): Whether to write the evaluation results to a CSV file. Defaults to True.
            precision (Optional[Literal["float32", "int8", "uint8", "binary", "ubinary"]], optional): The precision
                to use for the embeddings. Can be "float32", "int8", "uint8", "binary", or "ubinary". Defaults to None.
            truncate_dim (Optional[int], optional): The dimension to truncate sentence embeddings to. `None` uses the
                model's current truncation dimension. Defaults to None.
        Nsimilarity_evaluation_r   z_results.csvepochsteps)super__init__r   r   r   r#   r$   r&   lenr   r   r   r   r   loggergetEffectiveLevelloggingINFODEBUGr!   csv_filecsv_headers_append_csv_headers)selfr   r   r   r   r   r   r   r!   r#   r$   r&   	__class__s               y/var/www/html/ai-engine/env/lib/python3.11/site-packages/sentence_transformers/evaluation/EmbeddingSimilarityEvaluator.pyr.   z%EmbeddingSimilarityEvaluator.__init__=   s~   D 	$$""(4?##s4?';';;;;;4?##s4;'7'77777FU_1/BBB[_#6#<" 	$$((**gl:if>V>V>X>X\c\i>i  "3 $!)sTzzr+"+3sY5  	 

 	  !9:::::    returnNonec                `    ddg}|D ]&}|D ]!}| j                             | d|            "'d S )Npearsonspearmanr*   )r6   append)r8   r   metricsvms        r:   r7   z0EmbeddingSimilarityEvaluator._append_csv_headers   s]    j)$ 	4 	4A 4 4 ''1

q

33334	4 	4r;   exampleslist[InputExample]c                    g }g }g }|D ]\}|                     |j        d                    |                     |j        d                    |                     |j                   ] | |||fi |S )Nr      )rA   textslabel)clsrE   kwargsr   r   r   examples          r:   from_input_examplesz0EmbeddingSimilarityEvaluator.from_input_examples   s    

 	) 	)GgmA.///gmA.///MM'-((((s:z6<<V<<<r;   modelr   output_pathr+   r,   dict[str, float]c                   |dk    r|dk    rd| }nd| d| d}nd}| j         |d| j          dz  }t                              d	| j         d
| d           | j         t	                      n|                    | j                   5  |                    | j        | j        | j	        d| j
        t          | j
                            }|                    | j        | j        | j	        d| j
        t          | j
                            }d d d            n# 1 swxY w Y   | j
        dk    rD|dz                       t          j                  }|dz                       t          j                  }| j
        dv r,t          j        |d          }t          j        |d          }| j        }| j        s'|j        g| _        |                     | j                   d d d d d}	i | j        D ]}
|
|	v r~ |	|
         ||          }t+          ||          \  }}t-          ||          \  }}|d|
 <   |d|
 <   t                              |
                                 d|dd|d           || j        rt2          j                            || j                  }t2          j                            |          }t=          |d|rdndd           5 }t?          j         |          }|s|!                    | j"                   |!                    ||gfd!| j        D             z              d d d            n# 1 swxY w Y   tG          | j                  dk    rFtI          fd"| j        D                       d#<   tI          fd$| j        D                       d%<   | j%        rPtL          j'        d&tL          j(        d'tL          j)        d(tL          j*        d)i+                    | j%                  | _,        n5tG          | j                  dk    rd%| _,        nd| j        d*          | _,        | -                    | j                  | .                    |           S )+NrO   z after epoch z
 in epoch z after z stepsr   z (truncated to )z:EmbeddingSimilarityEvaluator: Evaluating the model on the z dataset:T)r   r!   convert_to_numpyr$   normalize_embeddingsbinary   )ubinaryrX   rH   )axisc                (    dt          | |          z
  S )NrH   )r	   xys     r:   <lambda>z7EmbeddingSimilarityEvaluator.__call__.<locals>.<lambda>   s    1'>q!'D'D#D r;   c                $    t          | |           S N)r   r]   s     r:   r`   z7EmbeddingSimilarityEvaluator.__call__.<locals>.<lambda>       'A!Q'G'G&G r;   c                $    t          | |           S rb   )r
   r]   s     r:   r`   z7EmbeddingSimilarityEvaluator.__call__.<locals>.<lambda>   rc   r;   c                6    d t          | |          D             S )Nc                >    g | ]\  }}t          j        ||          S  )npdot).0emb1emb2s      r:   
<listcomp>zKEmbeddingSimilarityEvaluator.__call__.<locals>.<lambda>.<locals>.<listcomp>   s(     P P P
dd!3!3 P P Pr;   )zipr]   s     r:   r`   z7EmbeddingSimilarityEvaluator.__call__.<locals>.<lambda>   s     P Pc!Qii P P P r;   )cosine	manhattan	euclideanri   pearson_	spearman_z-Similarity :	Pearson: z.4fz	Spearman: awzutf-8)newlinemodeencodingc                4    g | ]}d D ]}| d|          S ))r?   r@   r*   rg   )rj   fn_namerD   rB   s      r:   rm   z9EmbeddingSimilarityEvaluator.__call__.<locals>.<listcomp>   sS       #!8    7 0 0Q 0 01   r;   c              3  .   K   | ]}d |          V  dS )rr   Nrg   rj   rz   rB   s     r:   	<genexpr>z8EmbeddingSimilarityEvaluator.__call__.<locals>.<genexpr>   s1      (o(o71EG1E1E)F(o(o(o(o(o(or;   pearson_maxc              3  .   K   | ]}d |          V  dS )rs   Nrg   r|   s     r:   r}   z8EmbeddingSimilarityEvaluator.__call__.<locals>.<genexpr>   s1      )q)qW'2Gg2G2G*H)q)q)q)q)q)qr;   spearman_maxspearman_cosinespearman_euclideanspearman_manhattanspearman_dotr   )/r&   r0   infor   r   truncate_sentence_embeddingsencoder   r   r!   r$   r"   r   astyperh   uint8
unpackbitsr   r   similarity_fn_namer7   r   r   
capitalizer#   ospathjoinr5   isfileopencsvwriterwriterowr6   r/   maxr   r   COSINE	EUCLIDEAN	MANHATTANDOT_PRODUCTgetprimary_metricprefix_name_to_metrics store_metrics_in_model_card_data)r8   rP   rQ   r+   r,   out_txtembeddings1embeddings2labelssimilarity_functionsrz   r   eval_pearsonr*   eval_spearmancsv_pathoutput_file_existsfr   rB   s                      @r:   __call__z%EmbeddingSimilarityEvaluator.__call__   s    B;;{{1%11BuBBUBBBG(=):====GnQUQZnndknnnooo"/7[]]]U=_=_`d`q=r=r 	 	,,?"&"8!%.%)$.%9%9 '  K  ,,?"&"8!%.%)$.%9%9 '  K	 	 	 	 	 	 	 	 	 	 	 	 	 	 	$ >X%%&,44RX>>K&,44RX>>K>222-!<<<K-!<<<K' 	?(-(@'AD$$$T%=>>> EDGGGGPP	 
  
 / 		 		G...6-g6{KPP"*66":":a#,VV#<#< q0<,7,,-1>-G--.))++vv\vvvcpvvv   "t~"w||K??H!#!9!9h8J1SPS^efff jkA) 6OOD$4555   '+'?  	
 
 
              " t'((1,,%((o(o(o(oVZVn(o(o(o%o%oGM"&))q)q)q)qX\Xp)q)q)q&q&qGN# 	P")+<",.B",.B".	#
 c$&''  4+,,q00&4##&O$2J12M&O&O#--gtyAA--eW===s&   
BDD DAMM	Mc                    dS )NzSemantic Similarityrg   )r8   s    r:   descriptionz(EmbeddingSimilarityEvaluator.description   s    $$r;   )r   NNr   FTNN)r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r"   r$   r%   r&   r'   )r   r   r<   r=   )rE   rF   )NrO   rO   )
rP   r   rQ   r    r+   r   r,   r   r<   rR   )r<   r    )__name__
__module____qualname____doc__r.   r7   classmethodrN   r   propertyr   __classcell__)r9   s   @r:   r   r      s        # #T ;?_c"'UY#'C; C; C; C; C; C; C;J4 4 4 4 	= 	= 	= [	= bdh h h h hT % % % X% % % % %r;   r   )
__future__r   r   r2   r   
contextlibr   typingr   r   numpyrh   scipy.statsr   r   sklearn.metrics.pairwiser	   r
   r   2sentence_transformers.evaluation.SentenceEvaluatorr   sentence_transformers.readersr   *sentence_transformers.similarity_functionsr   )sentence_transformers.SentenceTransformerr   	getLoggerr   r0   r   rg   r;   r:   <module>r      s@   " " " " " " 



  				 " " " " " " ) ) ) ) ) ) ) )     + + + + + + + + t t t t t t t t t t P P P P P P 6 6 6 6 6 6 I I I I I I NMMMMMM		8	$	$j% j% j% j% j%#4 j% j% j% j% j%r;   