
    Ng                        d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	Z
d dlmZ erd dlmZ  ej        e          Z G d de          ZdS )	    )annotationsN)nullcontext)TYPE_CHECKING)SentenceEvaluator)SentenceTransformerc                  R     e Zd ZdZ	 	 	 	 dd fdZ	 d d!dZed"d            Z xZS )#MSEEvaluatorFromDataFrameu  
    Computes the mean squared error (x100) between the computed sentence embedding and some target sentence embedding.

    Args:
        dataframe (List[Dict[str, str]]): It must have the following format. Rows contains different, parallel sentences.
            Columns are the respective language codes::

            [{'en': 'My sentence in English', 'es': 'Oración en español', 'fr': 'Phrase en français'...},
             {'en': 'My second sentence', ...}]
        teacher_model (SentenceTransformer): The teacher model used to compute the sentence embeddings.
        combinations (List[Tuple[str, str]]): Must be of the format ``[('en', 'es'), ('en', 'fr'), ...]``.
            First entry in a tuple is the source language. The sentence in the respective language will be fetched from
            the dataframe and passed to the teacher model. Second entry in a tuple the the target language. Sentence
            will be fetched from the dataframe and passed to the student model
        batch_size (int, optional): The batch size to compute sentence embeddings. Defaults to 8.
        name (str, optional): The name of the evaluator. Defaults to "".
        write_csv (bool, optional): Whether to write the results to a CSV file. Defaults to True.
        truncate_dim (Optional[int], optional): The dimension to truncate sentence embeddings to. If None, uses the model's
            current truncation dimension. Defaults to None.
        TN	dataframelist[dict[str, str]]teacher_modelr   combinationslist[tuple[str, str]]
batch_sizeintnamestr	write_csvbooltruncate_dim
int | Nonec                   t                                                       || _        || _        || _        |rd|z   }d|z   dz   | _        ddg| _        d| _        || _        || _	        i | _
        t                              d           t                      }| j        D ]\  }	}
g }g }|D ]}||	                                         dk    ro||
                                         dk    rQ|                    ||	                    |                    ||	                    |                    ||
                    ||f| j
        |	|
f<   | j                            |	 d	|
            t#          |          }| j	        t%                      n|                    | j	                  5  |                    || j        
          }d d d            n# 1 swxY w Y   d t+          ||          D             | _        d S )N_mse_evaluationz_results.csvepochstepsnegative_msezCompute teacher embeddingsr   -r   c                    i | ]\  }}||	S  r"   ).0sentembs      v/var/www/html/ai-engine/env/lib/python3.11/site-packages/sentence_transformers/evaluation/MSEEvaluatorFromDataFrame.py
<dictcomp>z6MSEEvaluatorFromDataFrame.__init__.<locals>.<dictcomp>X   s    "l"l"ls4"l"l"l    )super__init__r   r   r   csv_filecsv_headersprimary_metricr   r   dataloggerinfosetstripaddappendlistr   truncate_sentence_embeddingsencodezipteacher_embeddings)selfr   r   r   r   r   r   r   all_source_sentencessrc_langtrg_langsrc_sentencestrg_sentencesrowall_src_embeddings	__class__s                  r&   r*   z"MSEEvaluatorFromDataFrame.__init__)   st    	(	$ 	:D(4/.@#W-,"(	0111"uu"&"3 	> 	>HhMM  8 8x=&&((B..3x=3F3F3H3HB3N3N(,,S];;;!((X777!((X777/<m.LDIx*+##x$<$<($<$<====#$899  ( MMM;;D<MNN	h 	h
 "/!5!56JW[Wf!5!g!g	h 	h 	h 	h 	h 	h 	h 	h 	h 	h 	h 	h 	h 	h 	h #m"lcBVXj>k>k"l"l"ls   &GGGmodeloutput_pathr   r   returndict[str, float]c           
         |                                  g } j        D ](\  }} j        ||f         \  }}	t          j         fd|D                       }
 j        t                      n|                     j                  5  t          j        |                    |	 j	                            }d d d            n# 1 swxY w Y   |
|z
  dz  
                                }|dz  }|                    |           t                              d j         d| d| d           t                              d	|d
           *|ĉ j        rt           j                            | j                  }t           j                            |          }t+          |d|rdndd          5 }t-          j        |          }|s|                     j                   |                    ||g|z              d d d            n# 1 swxY w Y   dt          j
        |                                           i}                     | j                  }                     ||           |S )Nc                *    g | ]}j         |         S r"   )r9   )r#   r$   r:   s     r&   
<listcomp>z6MSEEvaluatorFromDataFrame.__call__.<locals>.<listcomp>c   s!    (a(a(a4)@)F(a(a(ar(   r       d   zMSE evaluation on z dataset - r   :zMSE (*100):	4fr   awzutf-8)newlinemodeencodingr   )evalr   r.   npasarrayr   r   r6   r7   r   meanr4   r/   r0   r   r   ospathjoinr+   isfileopencsvwriterwriterowr,   itemprefix_name_to_metrics store_metrics_in_model_card_data)r:   rD   rE   r   r   
mse_scoresr<   r=   r>   r?   src_embeddingstrg_embeddingsmsecsv_pathoutput_file_existsfr^   metricss   `                 r&   __call__z"MSEEvaluatorFromDataFrame.__call__Z   s    	


"&"3 	2 	2Hh+/9h5I+J(M=Z(a(a(a(aS`(a(a(abbN"&"3";AcAcdhduAvAv e e!#ELLSWSbL,c,c!d!de e e e e e e e e e e e e e e #^39??AAC3JCc"""KKYTYYY8YYhYYYZZZKK00001111"t~"w||K??H!#!9!9h8J1SPS^efff =jkA) 6OOD$4555 ;<<<= = = = = = = = = = = = = = = "BGJ$7$7$<$<$>$>#>?--gtyAA--eW===s%   /CC	C	-AHHHc                    dS )NzKnowledge Distillationr"   )r:   s    r&   descriptionz%MSEEvaluatorFromDataFrame.description~   s    ''r(   )r
   r   TN)r   r   r   r   r   r   r   r   r   r   r   r   r   r   )NrC   rC   )
rD   r   rE   r   r   r   r   r   rF   rG   )rF   r   )	__name__
__module____qualname____doc__r*   rk   propertyrm   __classcell__)rB   s   @r&   r	   r	      s         4 #'/m /m /m /m /m /m /md bd" " " " "H ( ( ( X( ( ( ( (r(   r	   )
__future__r   r]   loggingrX   
contextlibr   typingr   numpyrU   2sentence_transformers.evaluation.SentenceEvaluatorr   )sentence_transformers.SentenceTransformerr   	getLoggerrn   r/   r	   r"   r(   r&   <module>r|      s    " " " " " " 



  				 " " " " " "                 P P P P P P NMMMMMM		8	$	$m( m( m( m( m( 1 m( m( m( m( m(r(   