
    Ng                        d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 erd dlmZ  ej        e          Z G d de
          ZdS )	    )annotationsN)nullcontext)TYPE_CHECKING)SentenceEvaluator)SentenceTransformerc                  T     e Zd ZdZ	 	 	 	 	 	 dd fdZdddZed d            Z xZS )!MSEEvaluatora
  
    Computes the mean squared error (x100) between the computed sentence embedding
    and some target sentence embedding.

    The MSE is computed between ||teacher.encode(source_sentences) - student.encode(target_sentences)||.

    For multilingual knowledge distillation (https://arxiv.org/abs/2004.09813), source_sentences are in English
    and target_sentences are in a different language like German, Chinese, Spanish...

    Args:
        source_sentences (List[str]): Source sentences to embed with the teacher model.
        target_sentences (List[str]): Target sentences to embed with the student model.
        teacher_model (SentenceTransformer, optional): The teacher model to compute the source sentence embeddings.
        show_progress_bar (bool, optional): Show progress bar when computing embeddings. Defaults to False.
        batch_size (int, optional): Batch size to compute sentence embeddings. Defaults to 32.
        name (str, optional): Name of the evaluator. Defaults to "".
        write_csv (bool, optional): Write results to CSV file. Defaults to True.
        truncate_dim (int, optional): The dimension to truncate sentence embeddings to. `None` uses the model's current truncation
            dimension. Defaults to None.

    Example:
        ::

            from sentence_transformers import SentenceTransformer
            from sentence_transformers.evaluation import MSEEvaluator
            from datasets import load_dataset

            # Load a model
            student_model = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')
            teacher_model = SentenceTransformer('all-mpnet-base-v2')

            # Load any dataset with some texts
            dataset = load_dataset("sentence-transformers/stsb", split="validation")
            sentences = dataset["sentence1"] + dataset["sentence2"]

            # Given queries, a corpus and a mapping with relevant documents, the InformationRetrievalEvaluator computes different IR metrics.
            mse_evaluator = MSEEvaluator(
                source_sentences=sentences,
                target_sentences=sentences,
                teacher_model=teacher_model,
                name="stsb-dev",
            )
            results = mse_evaluator(student_model)
            '''
            MSE evaluation (lower = better) on the stsb-dev dataset:
            MSE (*100):  0.805045
            '''
            print(mse_evaluator.primary_metric)
            # => "stsb-dev_negative_mse"
            print(results[mse_evaluator.primary_metric])
            # => -0.8050452917814255
    NF     Tsource_sentences	list[str]target_sentencesshow_progress_barbool
batch_sizeintnamestr	write_csvtruncate_dim
int | Nonec	                   t                                                       || _        | j        t                      n|                    | j                  5  |                    |||d          | _        d d d            n# 1 swxY w Y   || _        || _        || _	        || _
        d|z   dz   | _        g d| _        || _        d| _        d S )NTr   r   convert_to_numpymse_evaluation_z_results.csv)epochstepsMSEnegative_mse)super__init__r   r   truncate_sentence_embeddingsencodesource_embeddingsr   r   r   r   csv_filecsv_headersr   primary_metric)
selfr   r   teacher_modelr   r   r   r   r   	__class__s
            i/var/www/html/ai-engine/env/lib/python3.11/site-packages/sentence_transformers/evaluation/MSEEvaluator.pyr!   zMSEEvaluator.__init__G   s$    	(  ( MMM;;D<MNN	 	
 &3%9%9 4ER\os &: & &D"	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 !1!2$	)D0>A444",s   BB
Bmodelr   output_pathreturndict[str, float]c                   |dk    r|dk    rd| }nd| d| d}nd}| j         |d| j          dz  }| j         t                      n|                    | j                   5  |                    | j        | j        | j        d	
          }d d d            n# 1 swxY w Y   | j        |z
  dz                                  }|dz  }t          
                    d| j         d| d           t          
                    d|d           || j        rt          j                            || j                  }t          j                            |          }	t%          |d|	rdndd          5 }
t'          j        |
          }|	s|                    | j                   |                    |||g           d d d            n# 1 swxY w Y   d| i}|                     || j                  }|                     ||           |S )Nr,   z after epoch z
 in epoch z after z stepsr   z (truncated to )Tr      d   z'MSE evaluation (lower = better) on the z dataset:zMSE (*100):	4fawzutf-8)newlinemodeencodingr   )r   r   r"   r#   r   r   r   r$   meanloggerinfor   r   ospathjoinr%   isfileopencsvwriterwriterowr&   prefix_name_to_metrics store_metrics_in_model_card_data)r(   r-   r.   r   r   out_txttarget_embeddingsmsecsv_pathoutput_file_existsfrE   metricss                r+   __call__zMSEEvaluator.__call__g   s   B;;{{1%11BuBBUBBBG(=):====G"/7[]]]U=_=_`d`q=r=r 	 	 %%"&"8?!%	 !- ! !	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 &)::q@FFHHs
[di[[QX[[[\\\,C,,,---"t~"w||K??H!#!9!9h8J1SPS^efff 5jkA) 6OOD$4555s 34445 5 5 5 5 5 5 5 5 5 5 5 5 5 5 "C4(--gtyAA--eW===s%   #)BBB/A	GGGc                    dS )NzKnowledge Distillation )r(   s    r+   descriptionzMSEEvaluator.description   s    ''    )NFr
   r   TN)r   r   r   r   r   r   r   r   r   r   r   r   r   r   )Nr,   r,   )r-   r   r.   r   r/   r0   )r/   r   )	__name__
__module____qualname____doc__r!   rP   propertyrS   __classcell__)r*   s   @r+   r	   r	      s        3 3r "'#'- - - - - - -@' ' ' ' 'R ( ( ( X( ( ( ( (rT   r	   )
__future__r   rD   loggingr?   
contextlibr   typingr   2sentence_transformers.evaluation.SentenceEvaluatorr   )sentence_transformers.SentenceTransformerr   	getLoggerrU   r=   r	   rR   rT   r+   <module>rb      s    " " " " " " 



  				 " " " " " "             P P P P P P NMMMMMM		8	$	$A( A( A( A( A($ A( A( A( A( A(rT   