
    NgN                    .   d dl mZ d dlZd dlZd dlmZmZmZ d dlZ	d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ erd dlmZ  ej        e          Zed         ZddddddddddddddZdddddddd d!d"d#d$d%dZ G d& d'e          ZdS )(    )annotationsN)TYPE_CHECKINGCallableLiteral)Tensor)tqdm)SentenceTransformer)InformationRetrievalEvaluator)SentenceEvaluator)SimilarityFunction)is_datasets_available)climatefeverdbpediafeverfiqa2018hotpotqamsmarconfcorpusnqquoraretrievalscidocsarguanascifact
touche2020zzeta-alpha-ai/NanoClimateFEVERzzeta-alpha-ai/NanoDBPediazzeta-alpha-ai/NanoFEVERzzeta-alpha-ai/NanoFiQA2018zzeta-alpha-ai/NanoHotpotQAzzeta-alpha-ai/NanoMSMARCOzzeta-alpha-ai/NanoNFCorpuszzeta-alpha-ai/NanoNQz zeta-alpha-ai/NanoQuoraRetrievalzzeta-alpha-ai/NanoSCIDOCSzzeta-alpha-ai/NanoArguAnazzeta-alpha-ai/NanoSciFactzzeta-alpha-ai/NanoTouche2020ClimateFEVERDBPediaFEVERFiQA2018HotpotQAMSMARCONFCorpusNQQuoraRetrievalSCIDOCSArguAnaSciFact
Touche2020c                       e Zd ZdZddgdgg dg ddgddddddej        d	ddfd6 fd$Zd% Z	 d7d8d.Zd9d1Z	d:d3Z
d4 Zd5 Z xZS );NanoBEIREvaluatora  
    This class evaluates the performance of a SentenceTransformer Model on the NanoBEIR collection of datasets.

    The collection is a set of datasets based on the BEIR collection, but with a significantly smaller size, so it can be used for quickly evaluating the retrieval performance of a model before commiting to a full evaluation.
    The datasets are available on HuggingFace at https://huggingface.co/collections/zeta-alpha-ai/nanobeir-66e1a0af21dfd93e620cd9f6
    The Evaluator will return the same metrics as the InformationRetrievalEvaluator (i.e., MRR, nDCG, Recall@k), for each dataset and on average.


    Example:
        ::

            from sentence_transformers import SentenceTransformer
            from sentence_transformers.evaluation import NanoBEIREvaluator

            model = SentenceTransformer('intfloat/multilingual-e5-large-instruct')

            datasets = ["QuoraRetrieval", "MSMARCO"]
            query_prompts = {
                "QuoraRetrieval": "Instruct: Given a question, retrieve questions that are semantically equivalent to the given question\nQuery: ",
                "MSMARCO": "Instruct: Given a web search query, retrieve relevant passages that answer the query\nQuery: "
            }

            evaluator = NanoBEIREvaluator(
                dataset_names=datasets,
                query_prompts=query_prompts,
            )

            results = evaluator(model)
            '''
            NanoBEIR Evaluation of the model on ['QuoraRetrieval', 'MSMARCO'] dataset:
            Evaluating NanoQuoraRetrieval
            Information Retrieval Evaluation of the model on the NanoQuoraRetrieval dataset:
            Queries: 50
            Corpus: 5046

            Score-Function: cosine
            Accuracy@1: 92.00%
            Accuracy@3: 98.00%
            Accuracy@5: 100.00%
            Accuracy@10: 100.00%
            Precision@1: 92.00%
            Precision@3: 40.67%
            Precision@5: 26.00%
            Precision@10: 14.00%
            Recall@1: 81.73%
            Recall@3: 94.20%
            Recall@5: 97.93%
            Recall@10: 100.00%
            MRR@10: 0.9540
            NDCG@10: 0.9597
            MAP@100: 0.9395

            Evaluating NanoMSMARCO
            Information Retrieval Evaluation of the model on the NanoMSMARCO dataset:
            Queries: 50
            Corpus: 5043

            Score-Function: cosine
            Accuracy@1: 40.00%
            Accuracy@3: 74.00%
            Accuracy@5: 78.00%
            Accuracy@10: 88.00%
            Precision@1: 40.00%
            Precision@3: 24.67%
            Precision@5: 15.60%
            Precision@10: 8.80%
            Recall@1: 40.00%
            Recall@3: 74.00%
            Recall@5: 78.00%
            Recall@10: 88.00%
            MRR@10: 0.5849
            NDCG@10: 0.6572
            MAP@100: 0.5892
            Average Queries: 50.0
            Average Corpus: 5044.5

            Aggregated for Score Function: cosine
            Accuracy@1: 66.00%
            Accuracy@3: 86.00%
            Accuracy@5: 89.00%
            Accuracy@10: 94.00%
            Precision@1: 66.00%
            Recall@1: 60.87%
            Precision@3: 32.67%
            Recall@3: 84.10%
            Precision@5: 20.80%
            Recall@5: 87.97%
            Precision@10: 11.40%
            Recall@10: 94.00%
            MRR@10: 0.7694
            NDCG@10: 0.8085
            '''
            print(evaluator.primary_metric)
            # => "NanoBEIR_mean_cosine_ndcg@10"
            print(results[evaluator.primary_metric])
            # => 0.8084508771660436
    N
   )         r*   d   F    Tmeandataset_nameslist[DatasetNameType] | Nonemrr_at_k	list[int]	ndcg_at_kaccuracy_at_kprecision_recall_at_kmap_at_kshow_progress_barbool
batch_sizeint	write_csvtruncate_dim
int | Nonescore_functions-dict[str, Callable[[Tensor, Tensor], Tensor]]main_score_functionstr | SimilarityFunction | Noneaggregate_fnCallable[[list[float]], float]aggregate_keystrquery_promptsstr | dict[str, str] | Nonecorpus_promptsc                @    t                                                       |&t          t                                                    }| _        | _        | _        |	 _        | _	        | _
        | _        |	 _        | _        |r3t          t           j                                                            ng  _        | _        |
 _        d|  _         j        r xj        d j         z  c_        | _        | _        | _        | _        | _                                                                            ||||||||	|
||d fd j        D              _        d| d _        dd	g _                              j                   dS )
a  
        Initializes the NanoBEIREvaluator.

        Args:
            dataset_names (List[str]): The names of the datasets to evaluate on.
            mrr_at_k (List[int]): A list of integers representing the values of k for MRR calculation. Defaults to [10].
            ndcg_at_k (List[int]): A list of integers representing the values of k for NDCG calculation. Defaults to [10].
            accuracy_at_k (List[int]): A list of integers representing the values of k for accuracy calculation. Defaults to [1, 3, 5, 10].
            precision_recall_at_k (List[int]): A list of integers representing the values of k for precision and recall calculation. Defaults to [1, 3, 5, 10].
            map_at_k (List[int]): A list of integers representing the values of k for MAP calculation. Defaults to [100].
            show_progress_bar (bool): Whether to show a progress bar during evaluation. Defaults to False.
            batch_size (int): The batch size for evaluation. Defaults to 32.
            write_csv (bool): Whether to write the evaluation results to a CSV file. Defaults to True.
            truncate_dim (int, optional): The dimension to truncate the embeddings to. Defaults to None.
            score_functions (Dict[str, Callable[[Tensor, Tensor], Tensor]]): A dictionary mapping score function names to score functions. Defaults to {SimilarityFunction.COSINE.value: cos_sim, SimilarityFunction.DOT_PRODUCT.value: dot_score}.
            main_score_function (Union[str, SimilarityFunction], optional): The main score function to use for evaluation. Defaults to None.
            aggregate_fn (Callable[[list[float]], float]): The function to aggregate the scores. Defaults to np.mean.
            aggregate_key (str): The key to use for the aggregated score. Defaults to "mean".
            query_prompts (str | dict[str, str], optional): The prompts to add to the queries. If a string, will add the same prompt to all queries. If a dict, expects that all datasets in dataset_names are keys.
            corpus_prompts (str | dict[str, str], optional): The prompts to add to the corpus. If a string, will add the same prompt to all corpus. If a dict, expects that all datasets in dataset_names are keys.
        N	NanoBEIR__)r3   r5   r6   r7   r8   r9   r;   r=   r>   r@   rB   c                ,    g | ]} j         |fi S  )_load_dataset).0nameir_evaluator_kwargsselfs     n/var/www/html/ai-engine/env/lib/python3.11/site-packages/sentence_transformers/evaluation/NanoBEIREvaluator.py
<listcomp>z.NanoBEIREvaluator.__init__.<locals>.<listcomp>   s0    jjjt-4-dJJ6IJJjjj    NanoBEIR_evaluation_z_results.csvepochsteps)super__init__listdataset_name_to_idkeysr1   rD   rF   r=   rH   rJ   r9   r@   sortedscore_function_namesrB   r>   rR   r3   r5   r6   r7   r8   _validate_dataset_names_validate_prompts
evaluatorscsv_filecsv_headers_append_csv_headers)rT   r1   r3   r5   r6   r7   r8   r9   r;   r=   r>   r@   rB   rD   rF   rH   rJ   rS   	__class__s   `                @rU   r\   zNanoBEIREvaluator.__init__   s   P 	  !3!8!8!:!:;;M*(*"*,!2".Q`$hF40D0I0I0K0K+L+L$M$M$Mfh!#6 (///	 	1II0T.000II "*%:" $$&&&    !"*%: !2$"(.#6
 
 kjjjjW[WijjjOMOOO#W-  !:;;;;;rW   c                   |D ]}| j         D ]!}| j                            | d|            "| j        D ]@}| j                            | d|            | j                            | d|            A| j        D ]!}| j                            | d|            "| j        D ]!}| j                            | d|            "| j        D ]!}| j                            | d|            "d S )Nz
-Accuracy@z-Precision@z-Recall@z-MRR@z-NDCG@z-MAP@)r6   rf   appendr7   r3   r5   r8   )rT   ra   
score_nameks       rU   rg   z%NanoBEIREvaluator._append_csv_headers  ss   . 	A 	AJ' F F '':(D(D(D(DEEEE/ D D '':(E(E!(E(EFFF '':(B(Bq(B(BCCCC] A A '':(?(?A(?(?@@@@^ B B '':(@(@Q(@(@AAAA] A A '':(?(?A(?(?@@@@A	A 	ArW   modelr	   output_pathrY   rZ   returndict[str, float]c                0    i }i }|dk    r|dk    rd| }	nd| d| d}	nd}	 j         |	d j          dz  }	t                              d	 j         d
|	 d            j        :|j        |j        i _        |j        g _                              j                   t           j
        d j                   D ]}
t                              d|
j                     |
||||          }|D ]z} j         r|                    dd          \  }}}n|                    dd          \  }}||vrg ||<   ||         ||dz   |z   <   ||                             ||                    {i |D ] }                     ||                   |<   !|C j        r;t"          j                            | j                  }t"          j                            |          sUt-          |dd          }|                    d                     j                             |                    d           nt-          |dd          }||g} j        D ]} j        D ])}|                    ||         d         |                    * j        D ]P}|                    ||         d         |                    |                    ||         d         |                    Q j        D ])}|                    ||         d         |                    * j        D ])}|                    ||         d         |                    * j        D ])}|                    ||         d         |                    *!|                    d                    t=          t>          |                               |                    d           |                                   j!        sx j"        ItG           fd j        D             d  !          d"         }| d#tG           j                    _!        n( j"        j$         d#tG           j                    _!        tK          j&        d$  j
        D                       }tK          j&        d%  j
        D                       }t                              d&|            t                              d'| d            j        D ]n}t                              d(|             j        D ]>}t                              d)'                    || d*|          d+z                       ? j        D ]z}t                              d,'                    || d-|          d+z                       t                              d.'                    || d/|          d+z                       { j        D ];}t                              d0'                    || d1|                               < j        D ];}t                              d2'                    || d#|                               <p (                     j                   )                    |           |*                               |S )3Nrm   z after epoch z
 in epoch z after z steps z (truncated to )z$NanoBEIR Evaluation of the model on z dataset:zEvaluating datasets)descdisablezEvaluating rM      )maxsplitr+   wzutf-8)modeencoding,
az
accuracy@kzprecision@kzrecall@kzmrr@kzndcg@kzmap@kc           	     R    g | ]#}|| d t          j                            f$S )_ndcg@)maxr5   )rQ   rR   agg_resultsrT   s     rU   rV   z.NanoBEIREvaluator.__call__.<locals>.<listcomp>_  s<    uuuSWdK4(L(Ls4>7J7J(L(LMNuuurW   c                    | d         S )Nr+   rO   )xs    rU   <lambda>z,NanoBEIREvaluator.__call__.<locals>.<lambda>`  s
    !A$ rW   )keyr   r   c                6    g | ]}t          |j                  S rO   )lenqueriesrQ   	evaluators     rU   rV   z.NanoBEIREvaluator.__call__.<locals>.<listcomp>f  s#    WWW)s9#455WWWrW   c                6    g | ]}t          |j                  S rO   )r   corpusr   s     rU   rV   z.NanoBEIREvaluator.__call__.<locals>.<listcomp>g  s#    UUU	c)"233UUUrW   zAverage Queries: zAverage Corpus: zAggregated for Score Function: zAccuracy@{}: {:.2f}%z
_accuracy@r.   zPrecision@{}: {:.2f}%z_precision@zRecall@{}: {:.2f}%z_recall@zMRR@{}: {:.4f}z_mrr@zNDCG@{}: {:.4f})+r>   loggerinfor1   r@   similarity_fn_name
similarityra   rg   r   rd   r9   rR   splitrj   rD   r=   ospathjoinre   isfileopenwriterf   r6   r7   r3   r5   r8   maprG   closeprimary_metricrB   r   valuenpr0   formatprefix_name_to_metrics store_metrics_in_model_card_dataupdate)rT   rn   ro   rY   rZ   argskwargsper_metric_resultsper_dataset_resultsout_txtr   
evaluationrl   datasetrM   metriccsv_pathfOutoutput_datarR   score_functionavg_queries
avg_corpusr   s   `                      @rU   __call__zNanoBEIREvaluator.__call__  s      B;;{{1%11BuBBUBBBG(=):====Ga4;MaaW^aaabbb'$)$<e>N#OD ).)A(BD%$$T%>???do4IW[WmSmnnn 	A 	AIKK6in66777"5+ueDDJ A A$ ?)*q)A)A&GQ&'ggcAg&>&>OGV!33313&v.>Hm#GcMF$:;"6*11*Q-@@@@A ( 	P 	PF"&"3"34Fv4N"O"OK"t~"w||K??H7>>(++ BH3AAA

388D$455666

4     H3AAA %.K1 N N+ S SA&&':4'@'Nq'QRRRR3 Q QA&&':4'@'OPQ'RSSS&&':4'@'LQ'OPPPP N NA&&':4'@'I!'LMMMM O OA&&':4'@'J1'MNNNN N NA&&':4'@'I!'LMMMMN JJsxxC 5 566777JJtJJLLL" 	e'/!$uuuuu[_[tuuu&" " " " *8&T&Ts4>?R?R&T&T##)-)A)G&d&dsSWSaObOb&d&d#gWWtWWWXXWUUT_UUUVV
5556665z555666- 	Z 	ZDKK@$@@AAA' i i299![DI_I_\]I_I_=`cf=fgghhhh/ e e3::1kTJaJa^_JaJa>beh>hiijjj077;$G[G[XYG[G[;\_b;bccdddd] X X,33A{dCTCTQRCTCT7UVVWWWW^ Z Z-44QtDVDVSTDVDV8WXXYYYYZ 11+tyII--e[AAA"";///""rW   dataset_nameDatasetNameTypec                r    dt           |                                          }| j        |d| j         z  }|S )NNanorM   )dataset_name_to_human_readablelowerr>   )rT   r   human_readable_names      rU   _get_human_readable_namez*NanoBEIREvaluator._get_human_readable_name  sH    [%CLDVDVDXDX%Y[[(#:t'8#:#::""rW   r
   c                   t                      st          d          ddlm} t          |                                         } ||dd          } ||dd          } ||dd          }d	 |D             }d
 |D             }	i }
|D ]J}|d         |
vrt                      |
|d         <   |
|d                                      |d                    K| j        | j        	                    |d           |d<   | j
        | j
        	                    |d           |d<   |                     |          }t          d|	||
|d|S )NzJdatasets is not available. Please install it to use the NanoBEIREvaluator.r   )load_datasetr   train)r   r   qrelsc                ^    i | ]*}t          |d                    dk    |d         |d          +S textr   _idr   rQ   samples     rU   
<dictcomp>z3NanoBEIREvaluator._load_dataset.<locals>.<dictcomp>  s<    eeeSQWX^Q_M`M`cdMdMdve}fVnMdMdMdrW   c                ^    i | ]*}t          |d                    dk    |d         |d          +S r   r   r   s     rU   r   z3NanoBEIREvaluator._load_dataset.<locals>.<dictcomp>  s<    ggg&sSYZ`SaObObefOfOfuvf~OfOfOfrW   zquery-idz	corpus-idquery_promptcorpus_prompt)r   r   relevant_docsrR   rO   )r   
ValueErrordatasetsr   r^   r   setaddrH   getrJ   r   r
   )rT   r   rS   r   dataset_pathr   r   r   corpus_dictqueries_dict
qrels_dictr   r   s                rU   rP   zNanoBEIREvaluator._load_dataset  s   $&& 	kijjj))))))),*<*<*>*>?lHGDDD,|YgFFF\7'BBBee6eeeggGggg
 	D 	DFj!3314
6*-.vj)*..vk/BCCCC)262D2H2HW[2\2\/*373F3J3J<Y]3^3^0";;LII, 
 $$	
 

 "
 
 	
rW   c           	         d | j         D             x}r9t          d| dt          t                                                               d S )Nc                H    g | ]}|                                 t          v| S rO   )r   r^   )rQ   r   s     rU   rV   z=NanoBEIREvaluator._validate_dataset_names.<locals>.<listcomp>  s6      
  
  
)<CUCUCWCW_qCqCqLCqCqCqrW   zDataset(s) z? not found in the NanoBEIR collection.Valid dataset names are: )r1   r   r]   r^   r_   )rT   missing_datasetss     rU   rb   z)NanoBEIREvaluator._validate_dataset_names  s     
  
-1-? 
  
  
 
 	 N. N N,01C1H1H1J1J,K,KN N  	 	rW   c                    d} j         Qt           j         t                    r fd j        D              _         n fd j        D             x}r	|d| dz  } j        Qt           j        t                    r fd j        D              _        n fd j        D             x}r	|d| dz  }|r!t          |                                          d S )	Nrs   c                     i | ]
}|j         S rO   rH   rQ   r   rT   s     rU   r   z7NanoBEIREvaluator._validate_prompts.<locals>.<dictcomp>  s    %n%n%n<lD4F%n%n%nrW   c                &    g | ]}|j         v|S rO   r   r   s     rU   rV   z7NanoBEIREvaluator._validate_prompts.<locals>.<listcomp>  s.     + + +!-|[_[mGmGmGmGmGmrW   z2The following datasets are missing query prompts: r~   c                     i | ]
}|j         S rO   rJ   r   s     rU   r   z7NanoBEIREvaluator._validate_prompts.<locals>.<dictcomp>  s    &p&p&p\|T5H&p&p&prW   c                &    g | ]}|j         v|S rO   r   r   s     rU   rV   z7NanoBEIREvaluator._validate_prompts.<locals>.<listcomp>  s.     , , ,!-|[_[nGnGnGnGnGnrW   z3The following datasets are missing corpus prompts: )rH   
isinstancerG   r1   rJ   r   strip)rT   	error_msgmissing_query_promptsmissing_corpus_promptss   `   rU   rc   z#NanoBEIREvaluator._validate_prompts  sV   	)$,c22 l%n%n%n%n[_[m%n%n%n""+ + + +151C+ + + & l kRgkkkk	*$-s33 n&p&p&p&p]a]o&p&p&p##, , , ,151C, , , ' n mSimmmm	 	0Y__..///	0 	0rW   ) r1   r2   r3   r4   r5   r4   r6   r4   r7   r4   r8   r4   r9   r:   r;   r<   r=   r:   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rI   )Nrm   rm   )
rn   r	   ro   rG   rY   r<   rZ   r<   rp   rq   )r   r   rp   rG   )r   r   rp   r
   )__name__
__module____qualname____doc__r   r0   r\   rg   r   r   rP   rb   rc   __classcell__)rh   s   @rU   r)   r)   H   s       ` `H 7;!d "t#0==+8=="e"'#'IM?C79w#596:#W< W< W< W< W< W< W<rA A A& bdj# j# j# j# j#X# # # #
 
 
 
<  0 0 0 0 0 0 0rW   r)   )
__future__r   loggingr   typingr   r   r   numpyr   torchr   r   sentence_transformersr	   >sentence_transformers.evaluation.InformationRetrievalEvaluatorr
   2sentence_transformers.evaluation.SentenceEvaluatorr   *sentence_transformers.similarity_functionsr   sentence_transformers.utilr   )sentence_transformers.SentenceTransformer	getLoggerr   r   r   r^   r   r)   rO   rW   rU   <module>r      s   " " " " " "  				 3 3 3 3 3 3 3 3 3 3                 5 5 5 5 5 5 h h h h h h P P P P P P I I I I I I < < < < < < NMMMMMM		8	$	$$ 5*&,,*,
 8***0  " #
&" " "z0 z0 z0 z0 z0) z0 z0 z0 z0 z0rW   