
    Ng~                        d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	Z
d dlZd dlmZ d dlmZ erd dlmZ  ej        e          Z G d d	e          ZdS )
    )annotationsN)nullcontext)TYPE_CHECKING)SentenceEvaluator)pytorch_cos_sim)SentenceTransformerc                  >     e Zd ZdZ	 	 	 	 	 	 dd fdZ	 d d!dZ xZS )"TranslationEvaluatora  
    Given two sets of sentences in different languages, e.g. (en_1, en_2, en_3...) and (fr_1, fr_2, fr_3, ...),
    and assuming that fr_i is the translation of en_i.
    Checks if vec(en_i) has the highest similarity to vec(fr_i). Computes the accuracy in both directions

    Example:
        ::

            from sentence_transformers import SentenceTransformer
            from sentence_transformers.evaluation import TranslationEvaluator
            from datasets import load_dataset

            # Load a model
            model = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')

            # Load a parallel sentences dataset
            dataset = load_dataset("sentence-transformers/parallel-sentences-news-commentary", "en-nl", split="train[:1000]")

            # Initialize the TranslationEvaluator using the same texts from two languages
            translation_evaluator = TranslationEvaluator(
                source_sentences=dataset["english"],
                target_sentences=dataset["non_english"],
                name="news-commentary-en-nl",
            )
            results = translation_evaluator(model)
            '''
            Evaluating translation matching Accuracy of the model on the news-commentary-en-nl dataset:
            Accuracy src2trg: 90.80
            Accuracy trg2src: 90.40
            '''
            print(translation_evaluator.primary_metric)
            # => "news-commentary-en-nl_mean_accuracy"
            print(results[translation_evaluator.primary_metric])
            # => 0.906
    F    TNsource_sentences	list[str]target_sentencesshow_progress_barbool
batch_sizeintnamestrprint_wrong_matches	write_csvtruncate_dim
int | Nonec	                X   t                                                       || _        || _        || _        || _        || _        || _        || _        t          | j                  t          | j                  k    sJ |rd|z   }d|z   dz   | _
        g d| _        || _        d| _        dS )a  
        Constructs an evaluator based for the dataset

        The labels need to indicate the similarity between the sentences.

        Args:
            source_sentences (List[str]): List of sentences in the source language.
            target_sentences (List[str]): List of sentences in the target language.
            show_progress_bar (bool): Whether to show a progress bar when computing embeddings. Defaults to False.
            batch_size (int): The batch size to compute sentence embeddings. Defaults to 16.
            name (str): The name of the evaluator. Defaults to an empty string.
            print_wrong_matches (bool): Whether to print incorrect matches. Defaults to False.
            write_csv (bool): Whether to write the evaluation results to a CSV file. Defaults to True.
            truncate_dim (int, optional): The dimension to truncate sentence embeddings to. If None, the model's
                current truncation dimension will be used. Defaults to None.
        _translation_evaluationz_results.csv)epochstepssrc2trgtrg2srcmean_accuracyN)super__init__r   r   r   r   r   r   r   lencsv_filecsv_headersr   primary_metric)
selfr   r   r   r   r   r   r   r   	__class__s
            q/var/www/html/ai-engine/env/lib/python3.11/site-packages/sentence_transformers/evaluation/TranslationEvaluator.pyr#   zTranslationEvaluator.__init__:   s    6 	 0 0	$!2#6 (4())S1F-G-GGGGG 	:D047.HCCC"-    modelr   output_pathr   r   returndict[str, float]c           	        |dk    r|dk    rd| }nd| d| d}nd}| j         |d| j          dz  }t                              d	| j         d
| d           | j         t	                      n|                    | j                   5  t          j        |                    | j	        | j
        | j        d                    }t          j        |                    | j        | j
        | j        d                    }d d d            n# 1 swxY w Y   t          ||                                                                                                          }d}	d}
t#          t%          |                    D ]}t'          j        ||                   }||k    r|	dz  }	)| j        rt-          d|d|d|           t-          d| j	        |                    t-          d| j        |         d||         |         dd           t-          d| j        |         d||         |         dd           t/          ||                   }t1          |d d          }|d d         D ]'\  }}t-          d|d|dd| j        |                    (|j        }t#          t%          |                    D ]'}t'          j        ||                   }||k    r|
dz  }
(|	t%          |          z  }|
t%          |          z  }t                              d|dz  d           t                              d |dz  d           || j        rt6          j                            || j                  }t6          j                            |          }tA          |d|rd!nd"d#$          5 }tC          j"        |          }|s|#                    | j$                   |#                    ||||g           d d d            n# 1 swxY w Y   ||||z   d%z  d&}| %                    || j                  }| &                    ||           |S )'Nr,   z after epoch z
 in epoch z after z stepsr   z (truncated to )z=Evaluating translation matching Accuracy of the model on the z dataset:F)r   r   convert_to_numpyr      z
Incorrect  : Sourcezis most similar to targetzinstead of targetzSource     :zPred Target:z(Score: z.4fzTrue Target:c                    | d         S )Nr5    )xs    r*   <lambda>z/TranslationEvaluator.__call__.<locals>.<lambda>   s
    ! r+   T)keyreverse   	zAccuracy src2trg: d   z.2fzAccuracy trg2src: awzutf-8)newlinemodeencoding   )src2trg_accuracytrg2src_accuracyr!   )'r   loggerinfor   r   truncate_sentence_embeddingstorchstackencoder   r   r   r   r   detachcpunumpyranger$   npargmaxr   print	enumeratesortedTr   ospathjoinr%   isfileopencsvwriterwriterowr&   prefix_name_to_metrics store_metrics_in_model_card_data)r(   r-   r.   r   r   out_txtembeddings1embeddings2cos_simscorrect_src2trgcorrect_trg2srcimax_idxresultsidxscoreacc_src2trgacc_trg2srccsv_pathoutput_file_existsfr]   metricss                          r*   __call__zTranslationEvaluator.__call__h   s2    B;;{{1%11BuBBUBBBG(=):====GqTXT]qqgnqqqrrr"/7[]]]U=_=_`d`q=r=r 	 	+)&*&<#%*	    K  +)&*&<#%*	    K	 	 	 	 	 	 	 	 	 	 	 	 	 	 	$ #;<<CCEEIIKKQQSSs8}}%% 	Z 	ZAi,,GG||1$) 	Z-q2MwXkmnooond&;A&>???nd&;G&DFlQYZ[Q\]dQeFlFlFlFlmmmnd&;A&>@`8TU;WX>@`@`@`@`aaa#HQK00 nndKKK")"1"+ Z ZJC$%<%<%<%<%<d>STW>XYYYY:s8}}%% 	% 	%Ai,,GG||1$%H5%H5@s):@@@AAA@s):@@@AAA"t~"w||K??H!#!9!9h8J1SPS^efff JjkA) 6OOD$4555{K HIIIJ J J J J J J J J J J J J J J !, +)K71<
 

 --gtyAA--eW===s&   	A5D

DD%A
O;;O?O?)Fr   r   FTN)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )Nr,   r,   )
r-   r   r.   r   r   r   r   r   r/   r0   )__name__
__module____qualname____doc__r#   rr   __classcell__)r)   s   @r*   r
   r
      s        " "P #($)#',. ,. ,. ,. ,. ,. ,.^ bdS S S S S S S S Sr+   r
   )
__future__r   r\   loggingrW   
contextlibr   typingr   rO   rQ   rJ   2sentence_transformers.evaluation.SentenceEvaluatorr   sentence_transformers.utilr   )sentence_transformers.SentenceTransformerr   	getLoggerrs   rG   r
   r7   r+   r*   <module>r      s    " " " " " " 



  				 " " " " " "                  P P P P P P 6 6 6 6 6 6 NMMMMMM		8	$	$f f f f f, f f f f fr+   