
    Ng                    R   d dl mZ d dlZd dlZd dlZd dlZd dlmZmZ d dl	m	Z	 d dl
mZmZmZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZ d dlZd dlZd d
lmZmZ d dlmZ d dlmZ  d dl!m"Z"m#Z# d dl$m%Z% d dlm&Z& d dl'm(Z( d dlm)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/m0Z0 d dl1m2Z3 d dl4m5Z5m6Z6 d dl7m8Z8 d dl9m:Z:m;Z;m<Z<  e<            rd dl=m>Z>m?Z?m@Z@mAZA  ejB        eC          ZDerd dlEmFZF d dlGmHZH d dlImJZJ  G d de)          ZKg dZLg d ZMd*d#ZNe G d$ d%e                      ZOd+d)ZPdS ),    )annotationsN)Counterdefaultdict)copy)	dataclassfieldfields)Path)python_versionindent)TYPE_CHECKINGAnyLiteral)CardData	ModelCard)dataset_info)
model_info)
EvalResulteval_results_to_model_index)	yaml_dump)nn)tqdm)TrainerCallback)CodeCarbonCallback)make_markdown_table)TrainerControlTrainerState__version__)StaticEmbeddingTransformer)$SentenceTransformerTrainingArguments)fullnameis_accelerate_availableis_datasets_available)DatasetDatasetDictIterableDatasetValue)SentenceEvaluator)SentenceTransformer)SentenceTransformerTrainerc                  @     e Zd Zd fdZddZddZddZddZ xZS )ModelCardCallbacktrainerr-   default_args_dictdict[str, Any]returnNonec                ,   t                                                       || _        || _        d | j        j        j        D             }|r|d         |j        j        _        ||j        j        _        |j        j        	                    d           d S )Nc                <    g | ]}t          |t                    |S  )
isinstancer   ).0callbacks     \/var/www/html/ai-engine/env/lib/python3.11/site-packages/sentence_transformers/model_card.py
<listcomp>z.ModelCardCallback.__init__.<locals>.<listcomp>3   s9     
 
 
($677

 
 
    r   generated_from_trainer)
super__init__r0   r1   callback_handler	callbacksmodelmodel_card_datacode_carbon_callbackadd_tags)selfr0   r1   rB   	__class__s       r;   r@   zModelCardCallback.__init__.   s    !2
 
 L9C
 
 
	
  	NAJ1GM)>07%-%../GHHHHHr=   argsr#   stater   controlr   rC   r,   c                   ddl m}m}m} | j        j        r:|j                            | j        j        |j        j        d          |j        _        | j        j	        r:|j                            | j        j	        |j        j
        d          |j        _
        t          | j        j        t                    r,t          | j        j                                                  }	n| j        j        g}	d}
|
t!          |	          k     rf|	|
         }t          ||||f          r3t#          |d          r#|j        |	vr|	                    |j                   |
dz  }
|
t!          |	          k     f|j                            |	           d S )Nr   )AdaptiveLayerLossMatryoshka2dLossMatryoshkaLosstrainevalloss   )sentence_transformers.lossesrM   rN   rO   r0   train_datasetrD   extract_dataset_metadatatrain_datasetseval_dataseteval_datasetsr8   rR   dictlistvalueslenhasattrappend
set_losses)rG   rI   rJ   rK   rC   kwargsrM   rN   rO   lossesloss_idxrR   s               r;   on_init_endzModelCardCallback.on_init_end>   s    	eddddddddd <% 	383H3a3a*E,A,PRY4 4E!0 <$ 	272G2`2`)5+@+NPV3 3E!/ dl'.. 	)$,+224455FFl'(F V$$(#D4.2CEU!VWW)D&))) IV++di(((MH V$$ 	((00000r=   c                     h d|                                 }fd|                                D             |j        _         fd|                                D             |j        _        d S )N>   do_evaldo_testdo_trainrun_name	hub_token	report_to
eval_delay
eval_steps
output_dir
save_stepslogging_dirlogging_stepssave_strategylogging_strategysave_total_limitgreater_is_betterpush_to_hub_tokensamples_per_labelshow_progress_barlogging_first_stepevaluation_strategymetric_for_best_modelc                $    i | ]\  }}|v	||S r7   r7   )r9   keyvalueignore_keyss      r;   
<dictcomp>z4ModelCardCallback.on_train_begin.<locals>.<dictcomp>   s0     5
 5
 5
%3c>T>TC>T>T>Tr=   c                X    i | ]&\  }}|v	|j         v |j         |         k    #||'S r7   )r1   )r9   r}   r~   r   rG   s      r;   r   z4ModelCardCallback.on_train_begin.<locals>.<dictcomp>   sU     =
 =
 =
U+%%#1G*G*GEUYUkloUpLpLp LpLpLpr=   )to_dictitemsrD   all_hyperparametersnon_default_hyperparameters)rG   rI   rJ   rK   rC   ra   	args_dictr   s   `      @r;   on_train_beginz ModelCardCallback.on_train_beginf   s    
 
 
0 LLNN	5
 5
 5
 5
)2):):5
 5
 5
1=
 =
 =
 =
 =
'oo//=
 =
 =
999r=   metricsdict[str, float]c                j   fdD             }t          |          dk    rd|v r
d|d         i}|j        j        rH|j        j        d         d         |j        k    r'|j        j        d                             |           d S |j        j                            |j        |j        d|           d S )Nc                    i | ]N}|                     d           d                    |                    d          dd                   |         OS )_loss _rS   N)endswithjoinsplit)r9   r}   r   s     r;   r   z1ModelCardCallback.on_evaluate.<locals>.<dictcomp>   sS    lllCVYVbVbcjVkVklSXXciinnQRR01173<lllr=   rS   rR   Validation LossStepEpochr   )r]   rD   training_logsglobal_stepupdater_   epoch)rG   rI   rJ   rK   rC   r   ra   	loss_dicts        `  r;   on_evaluatezModelCardCallback.on_evaluate   s     mlll7lll	y>>Q6Y#6#6*If,=>I!/	%3B7?5CTTT!/3::9EEEEE!/66"[!-       r=   logsc                v   dht          |          z  }|r|j        j        rP|j        j        d         d         |j        k    r/||                                         |j        j        d         d<   d S |j        j                            |j        |j        ||                                         d           d S d S )NrR   r   r   Training Loss)r   r   r   )setrD   r   r   popr_   r   )rG   rI   rJ   rK   rC   r   ra   keyss           r;   on_logzModelCardCallback.on_log   s     x#d))# 	%3)7;FCuGXXXKOPTPXPXPZPZK[%3B7HHH%3::!& % 1)-dhhjj)9     	 	r=   )r0   r-   r1   r2   r3   r4   )
rI   r#   rJ   r   rK   r   rC   r,   r3   r4   )rI   r#   rJ   r   rK   r   rC   r,   r   r   r3   r4   )rI   r#   rJ   r   rK   r   rC   r,   r   r   r3   r4   )	__name__
__module____qualname__r@   rd   r   r   r   __classcell__)rH   s   @r;   r/   r/   -   s        I I I I I I &1 &1 &1 &1P)
 )
 )
 )
V   4       r=   r/   )languagelicenselibrary_nametagsdatasetsr   pipeline_tagwidgetmodel-indexco2_eq_emissions
base_model)rC   r0   eval_results_dictr3   r2   c                     t                      t          t          j        t          j        d} t                      rddlm} || d<   t                      rddlm} || d<   ddl	m} || d<   | S )N)pythonsentence_transformerstransformerstorchr   r   
accelerater   
tokenizers)
r   sentence_transformers_versionr   r    r   r%   r   r&   r   r   )versionsaccelerate_versiondatasets_versiontokenizers_versions       r;   get_versionsr      s     ""!>$0"	 H    4@@@@@@!3 0<<<<<</<<<<<</H\Or=   c                  p   e Zd ZU dZ ee          Zded<   dZded<   dZ	ded<   dZ
ded	<    ee          Zd
ed<    ee          Zd
ed<   dZded<    ed           Zded<   dZded<    edd          Zded<    edd          Zded<    eed          Zded<    eed          Zded<    eed          Zded<    eed          Zd ed!<    eed          Zd
ed"<    edd          Zded#<    eed          Zd
ed$<    edd          Zd%ed&<    eed          Zd'ed(<    edd          Zd)ed*<    eddd+          Zd,ed-<    eedd.          Zd/ed0<    ed1d          Zd2ed3<    ed4d          Z d5ed6<    ed7d          Z!ded8<    ed9d          Z"ded:<    ee#d          Z$d'ed;<    eddd+          Z%d<ed=<   dkd@Z&dldmdBZ'dndEZ(dodGZ)dpdJZ*dqdNZ+drdPZ,dsdtdRZ-dudWZ.dvdZZ/dwd\Z0dxd]Z1dsdyd_Z2dzdaZ3d{dbZ4d|dcZ5dkddZ6d}deZ7df Z8d~dhZ9d}diZ:dsddjZ;dS ) SentenceTransformerModelCardDataa  A dataclass storing data used in the model card.

    Args:
        language (`Optional[Union[str, List[str]]]`): The model language, either a string or a list,
            e.g. "en" or ["en", "de", "nl"]
        license (`Optional[str]`): The license of the model, e.g. "apache-2.0", "mit",
            or "cc-by-nc-sa-4.0"
        model_name (`Optional[str]`): The pretty name of the model, e.g. "SentenceTransformer based on microsoft/mpnet-base".
        model_id (`Optional[str]`): The model ID when pushing the model to the Hub,
            e.g. "tomaarsen/sbert-mpnet-base-allnli".
        train_datasets (`List[Dict[str, str]]`): A list of the names and/or Hugging Face dataset IDs of the training datasets.
            e.g. [{"name": "SNLI", "id": "stanfordnlp/snli"}, {"name": "MultiNLI", "id": "nyu-mll/multi_nli"}, {"name": "STSB"}]
        eval_datasets (`List[Dict[str, str]]`): A list of the names and/or Hugging Face dataset IDs of the evaluation datasets.
            e.g. [{"name": "SNLI", "id": "stanfordnlp/snli"}, {"id": "mteb/stsbenchmark-sts"}]
        task_name (`str`): The human-readable task the model is trained on,
            e.g. "semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more".
        tags (`Optional[List[str]]`): A list of tags for the model,
            e.g. ["sentence-transformers", "sentence-similarity", "feature-extraction"].

    .. tip::

        Install `codecarbon <https://github.com/mlco2/codecarbon>`_ to automatically track carbon emission usage and
        include it in your model cards.

    Example::

        >>> model = SentenceTransformer(
        ...     "microsoft/mpnet-base",
        ...     model_card_data=SentenceTransformerModelCardData(
        ...         model_id="tomaarsen/sbert-mpnet-base-allnli",
        ...         train_datasets=[{"name": "SNLI", "id": "stanfordnlp/snli"}, {"name": "MultiNLI", "id": "nyu-mll/multi_nli"}],
        ...         eval_datasets=[{"name": "SNLI", "id": "stanfordnlp/snli"}, {"name": "MultiNLI", "id": "nyu-mll/multi_nli"}],
        ...         license="apache-2.0",
        ...         language="en",
        ...     ),
        ... )
    )default_factoryzstr | list[str] | Noner   N
str | Noner   
model_namemodel_idlist[dict[str, str]]rW   rY   zjsemantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and morestr	task_namec                 
    g dS )N)sentence-transformerssentence-similarityzfeature-extractionr7   r7   r=   r;   <lambda>z)SentenceTransformerModelCardData.<lambda>  s     !
 !
 !
 r=   zlist[str] | Noner   
deprecatedzLiteral['deprecated']generate_widget_examplesF)defaultinitr   base_model_revision)r   r   r2   r   r   z.dict[SentenceEvaluator, dict[str, Any]] | Noner   zlist[dict[str, float]]r   r   predict_examplelabel_example_listzCodeCarbonCallback | NonerE   dict[str, str]	citationsz
int | Nonebest_model_step)r   r   reprz!SentenceTransformerTrainer | Noner0   )r   r   r   	list[str]r   Tbool
first_saver   intwidget_stepr   r   r   r   versionzSentenceTransformer | NonerC   r3   r4   c                   | j          }t          | j         t                    r| j         g| _         |                     | j        |          | _        |                     | j        |          | _        | j        rJ| j                            d          dk    r.t          	                    d| j        d           d | _        d S d S d S )N)infer_languages/rS   zThe provided z} model ID should include the organization or user, such as "tomaarsen/mpnet-base-nli-matryoshka". Setting `model_id` to None.)
r   r8   r   validate_datasetsrW   rY   r   countloggerwarning)rG   r   s     r;   __post_init__z.SentenceTransformerModelCardData.__post_init__B  s    "m+dmS)) 	,!]ODM"44T5HZi4jj!33D4FXg3hh= 	!T]0055::NN^ ^ ^ ^   !DMMM	! 	!::r=   r   c                B   g }|D ]}d|vrd|v r|d         |d<   d|v r	 t          |d                   }|j        rg|red|j        v r\|j                            d          }|@t          |t                    r|g}|D ]%}|| j        vr| j                            |           &|j        | j        vr| j                            |j                   n7# t          $ r* t                              d|d         d           |d= Y nw xY w|                    |           |S )Nnameidr   zThe dataset `id` z5 does not exist on the Hub. Setting the `id` to None.)get_dataset_infocardDatagetr8   r   r   r_   r   r   	Exceptionr   r   )rG   dataset_listr   output_dataset_listdatasetinfodataset_languager   s           r;   r   z2SentenceTransformerModelCardData.validate_datasetsR  s    # 	0 	0GW$$7??&-dmGFOw6+GDM::D } C CZ4==X=X+/=+<+<Z+H+H(+7)*:C@@ F4D3E 0,< C C#+4=#@#@$(M$8$8$B$B$B wdm33,,TW555# ! & & &NNrGDMrrr    	&&  &&w////""s   C1DDrb   list[nn.Module]c                   ddi}|D ]'}	 |j         ||j        j        <   # t          $ r Y $w xY wt	          t
                    }|                                D ] \  }}||                             |           !ddfd|                                D             | _        | 	                    d	 d
 |D             D                        d S )NzSentence Transformersa  
@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}
rb   r   r3   r   c                    t          |           dk    r)d                    | d d                   dz   | d         z   S | d         S )NrS   z, r   z and r   )r]   r   )rb   s    r;   	join_listz>SentenceTransformerModelCardData.set_losses.<locals>.join_list  sC    6{{Qyy--7&*DD!9r=   c                .    i | ]\  }} |          |S r7   r7   )r9   citationrb   r   s      r;   r   z?SentenceTransformerModelCardData.set_losses.<locals>.<dictcomp>  s*    iii:J(F))F++Xiiir=   c                    g | ]}d | S )zloss:r7   r9   rR   s     r;   r<   z?SentenceTransformerModelCardData.set_losses.<locals>.<listcomp>  s    eee$~t~~eeer=   c                (    i | ]}|j         j        |S r7   )rH   r   r   s     r;   r   z?SentenceTransformerModelCardData.set_losses.<locals>.<dictcomp>  s     2d2d2dUY4>3JD2d2d2dr=   )rb   r   r3   r   )
r   rH   r   r   r   r[   r   r_   r   rF   )rG   rb   r   rR   inverted_citationsr   r   s         @r;   r`   z+SentenceTransformerModelCardData.set_lossess  s   # 
&
	  	 	D59]	$.122   (..'oo// 	6 	6ND(x(//5555	 	 	 	
 jiiiN`NfNfNhNhiiiee2d2d]c2d2d2deeefffffs    
--stepc                    || _         d S N)r   )rG   r   s     r;   set_best_model_stepz4SentenceTransformerModelCardData.set_best_model_step  s    #r=   r   Dataset | DatasetDictc                f   t          |t                    rd S t          |t                    rt          |          }g | _        t          t          j        t          |	                                          d                    }d}t          |                                ddd          D ]\  }}d	 ||         j                                        D             }||                             |          }t          |          }|d
k    r`i }	t          |                    t          j        t%          |          t'          ||                                        D ]3\  }
}t)          d |                                D                       |	|
<   4t+          t-          |	                                d            \  }}|d |         t          ||d          d d d                   }}|D ]?}
d ||
                                         D             }t          |          dk     r|r|                                }d ||                                         D             }t          |          dk    r|                    |           n|                    |d                    t          |          dk     r|t          |          dk     r| j                            |d
         t          j        |dd          t          |          dz
            d           |d d         | _        Ad S )N)r      )k  zComputing widget examplesexampleF)descunitleavec                T    g | ]%\  }}t          |t                    r|j        d v #|&S )>   stringlarge_string)r8   r*   dtype)r9   columnfeatures      r;   r<   zHSentenceTransformerModelCardData.set_widget_examples.<locals>.<listcomp>  sH       #FGgu-- 3:-C]2]2] 2]2]2]r=   r   c              3  p   K   | ]1\  }}|d k    |                     d           t          |          V  2dS )dataset_name_prompt_lengthN)r   r]   r9   r}   r~   s      r;   	<genexpr>zGSentenceTransformerModelCardData.set_widget_examples.<locals>.<genexpr>  sU       # #"Un,,S\\BR5S5S, JJ,,,,# #r=   c                    | d         S )NrS   r7   )xs    r;   r   zFSentenceTransformerModelCardData.set_widget_examples.<locals>.<lambda>  s
    AaD r=   r}   r   c                N    g | ]"\  }}|d k    |                     d           |#S r  r  r   r9   r}   sentences      r;   r<   zHSentenceTransformerModelCardData.set_widget_examples.<locals>.<listcomp>  sC       %Xn,,S\\BR5S5S, ,,,r=      c                N    g | ]"\  }}|d k    |                     d           |#S r  r  r  s      r;   r<   zHSentenceTransformerModelCardData.set_widget_examples.<locals>.<listcomp>  sC     % % %)C.00FV9W9W0 !000r=   rS   )source_sentence	sentences   )r8   r)   r'   r(   r   r   randomchoicesr[   r   r   r   featuresselect_columnsr]   	enumerateselectsamplerangeminsumzipsortedr   extendr_   r   )rG   r   dataset_namesnum_samples_to_checkr  num_samplescolumnsstr_datasetdataset_sizelengthsidxr'  indicesr   target_indicesbackup_indicesr  
backup_idxbackup_samples                      r;   set_widget_examplesz4SentenceTransformerModelCardData.set_widget_examples  s   g// 	Fgw'' 	3!'222GtGLLNN/C/Cq I I IJJ#)-!!(C)[`*
 *
 *
 8	5 8	5%L+
 '.|'<'E'K'K'M'M  G
 ",/>>wGGK{++Lq  G(""6=|1D1DL`bnHoHo#p#p#pqq     V  # # #&,llnn# # #     fW]]__..IIIJJGQ-4\k\-BDQ\Q]Q]I^_c_cac_cIdDeDeNN & 5 5 )4S)9)?)?)A)A  	
 )nnq((^(!/!3!3!5!5J% %-8-D-J-J-L-L% % %M
 =))Q..!((7777 "((q)9::: )nnq((^( y>>A%%""(1!6=QZ[\[][]Q^befobpbpstbtCuCuCuvv   (1!}$$75;8	5 8	5r=   	evaluatorr+   r   c                J   ddl m} t          |          | j        |<   t	          |d          r|j        xrt          ||          rd |j        D             nt          t                    rg| j	        d}d}n"| j	        j
        j        }| j	        j
        j        }fd|                                D             }| j        r9| j        d         d         |k    r"| j        d                             |           d S | j                            ||d|           d S d S d S )	Nr   )SequentialEvaluatorprimary_metricc                    g | ]	}|j         
S r7   )r?  )r9   sub_evaluators     r;   r<   zKSentenceTransformerModelCardData.set_evaluation_metrics.<locals>.<listcomp>  s    "j"j"jM=#?"j"j"jr=   c                $    i | ]\  }}|v 	||S r7   r7   )r9   r}   r~   primary_metricss      r;   r   zKSentenceTransformerModelCardData.set_evaluation_metrics.<locals>.<dictcomp>  s+    #k#k#k:3TW[jTjTjCTjTjTjr=   r   r   r   ) sentence_transformers.evaluationr>  r   r   r^   r?  r8   
evaluatorsr   r0   rJ   r   r   r   r   r   r_   )rG   r<  r   r>  r   r   training_log_metricsrC  s          @r;   set_evaluation_metricsz7SentenceTransformerModelCardData.set_evaluation_metrics  sz   HHHHHH,0MMy) 9.// 		H`5`_ 	)%899 4"j"jU^Ui"j"j"jOS11 4#2"3|#|)5*0#k#k#k#k#k#k#k ! 	d&8&<V&D&L&L"2&--.BCCCCC"))!& $  /    #	 	 	 	r=   r'   c                    d}t          t                    }t                      }|D ]}|d         }|d         }||vrZ||                             dt	          |           d           t          ||                   |k    r|                    |           t          |           j        k    r n fd|                                D              _	        d S )Nr   textlabelz<li>z</li>c                    g | ]V\  }}j         j        r't          |t                    rj         j        |         n|d d                    |          z   dz   dWS )z<ul> z</ul>)LabelExamples)rC   labelsr8   r   r   )r9   rJ  example_setrG   s      r;   r<   zGSentenceTransformerModelCardData.set_label_examples.<locals>.<listcomp>  s}     #
 #
 #

 #{ 6:Z5Fl:V[]`KaKal*511gl"RWW[%9%99GC #
 #
 #
r=   )
r   r[   r   r_   r   r]   addnum_classesr   r   )rG   r   num_examples_per_labelexamplesfinished_labelsr'  rI  rJ  s   `       r;   set_label_examplesz3SentenceTransformerModelCardData.set_label_examples  s    !"t$$%% 	 	F&>D7OEO++&&'?d4jj'?'?'?@@@x''+AAA#''...?##t'777 8#
 #
 #
 #

 '/nn&6&6#
 #
 #
r=   r  c                    t          |t                    r  fd|                                D             S |rt          j        d|          rd }|p|j        j        t          |j                  d}|j        j	        r2|j        |j        j	        v r|j        j	        |j                 j
        |d<   |j        x}rt          |                                          d         }|                    d          rtd|v rp|t          d          d                              d          }|d         |d<   |d	                             d
          d         x}rt          |          dk    r||d<   |gS )Nc                N    g | ]!\  }}                     ||           D ]}|"S ))r  )infer_datasets)r9   r  sub_datasetr   rG   s       r;   r<   zCSentenceTransformerModelCardData.infer_datasets.<locals>.<listcomp>  sW       -L+#22;\2ZZ      r=   z_dataset_\d+)r   r   sizer   zhf://datasets/@r   rS   r   (   revision)r8   r(   r   rematchr   r  r   r   splitsnum_examplesdownload_checksumsr[   r   
startswithr]   )rG   r   r  dataset_output	checksumssourcesource_partsr^  s   `       r;   rY  z/SentenceTransformerModelCardData.infer_datasets  s   g{++ 	   18     	 BH_lCC 	 L !=GL$=''
 
 < 	U7=GL4G#G#G%,\%8%G%TN6"  229 	:)..**++A.F  !122 :sf}}%c*:&;&;&=&=>DDSII'3At$ ,Q 5 5c : :1 ==H :3x==TVCVCV19N:.r=    Dataset | IterableDataset | Noner   rR   'dict[str, nn.Module] | nn.Module | Nonec           	        |si S d|vr't          |t                    rt          |          |d<   d |j        D             |d<   i |d<   t          |t                    r|j        D ]}|dd         |         d         }t          |t                    r| j                                      }t          |t                    r5d|v r1|d                             d	
          	                                }d}nd D             }d}dt          t          |          d           d| t          t          |          t          |          z  d           d| t          t          |          d           d| dd|d         |<   &t          |t          t          f          r9t                    dfdt!                    D             d|d         |<   {t          |t"                    rudt          t                    d          t          t                    t                    z  d          t          t                    d          dd|d         |<   t          |t$                    rt          d D                       t                    d	k    r"ddt          |           did|d         |<   hdt                     dt                    t                    z  ddt                     ddd|d         |<   t'          |          i d|d         |<   d9dddid |d                                         D             dd ifd!|d                                         D             g}	t+          t-          |	                              d"d#          d$          |d%<   |dd&         |d'<   t          |d'         t%          |d'                   d                            }
g }t1          |
          D ]}i }|j        D ]}|d'         |         |         }t          |t$                    r5t          |          d(k    r"t	          |dd(                   dd)         d*z   }t          |t                    r t          |          dk    r|dd         d+z   }t	          |                              d,d-          }d.| d/||<   |                    |           t+          t-          |                              d"d#          d$          |d0<   d1t'          |          i|d2<   t5          |d3          rh|                                }	 t9          j        |d45          }n# t<          $ r t	          |          }Y nw xY wt+          d6| d7d$          |d2         d8<   |S ):a  
        Given a dataset, compute the following:
        * Dataset Size
        * Dataset Columns
        * Dataset Stats
            - Strings: min, mean, max word count/token length
            - Integers: Counter() instance
            - Floats: min, mean, max range
            - List: number of elements or min, mean, max number of elements
        * 3 Example samples
        * Loss function name
            - Loss function config
        r[  c                    g | ]}d | d	S )<code></code>r7   )r9   r  s     r;   r<   zLSentenceTransformerModelCardData.compute_dataset_metrics.<locals>.<listcomp>F  s$    "_"_"_#;F#;#;#;"_"_"_r=   r1  statsNr  r   attention_maskrS   )dimtokensc                ,    g | ]}t          |          S r7   r]   )r9   r  s     r;   r<   zLSentenceTransformerModelCardData.compute_dataset_metrics.<locals>.<listcomp>R  s    "L"L"LX3x=="L"L"Lr=   
charactersr
     r   )r)  meanmax)r  datar   c                v    i | ]5}|t                    d k    rdnd |         t                    z  d6S )rS   ~rL  z.2%rt  )r9   r}   counter
subsections     r;   r   zLSentenceTransformerModelCardData.compute_dataset_metrics.<locals>.<dictcomp>`  s`     ! ! ! #  3w<<!+;+;CC!iWS\TWXbTcTcEc!i!i!i! ! !r=   floatc                ,    g | ]}t          |          S r7   rt  )r9   lsts     r;   r<   zLSentenceTransformerModelCardData.compute_dataset_metrics.<locals>.<listcomp>o  s    &F&F&FCs3xx&F&F&Fr=   r[   z	 elementsz.2fry  rZ   c                p    dd                     d |                                 D                       z   dz   S )Nz<ul><li>z	</li><li>c              3  *   K   | ]\  }}| d | V  dS )z: Nr7   r  s      r;   r  zaSentenceTransformerModelCardData.compute_dataset_metrics.<locals>.to_html_list.<locals>.<genexpr>  s5      4f4f:3PU5F5Fu5F5F4f4f4f4f4f4fr=   z
</li></ul>)r   r   ry  s    r;   to_html_listzNSentenceTransformerModelCardData.compute_dataset_metrics.<locals>.to_html_list  s=    !K$4$44f4fY]YcYcYeYe4f4f4f$f$ffiuuur=   rL  typec                &    i | ]\  }}||d          S )r  r7   r  s      r;   r   zLSentenceTransformerModelCardData.compute_dataset_metrics.<locals>.<dictcomp>  s"    ddd
UU7^dddr=   detailsc                :    i | ]\  }}| |d                    S r  r7   )r9   r}   r~   r  s      r;   r   zLSentenceTransformerModelCardData.compute_dataset_metrics.<locals>.<dictcomp>  s-    "t"t"t
U3U6](C(C"t"t"tr=   -:|--|z  stats_tabler   rT  r  r   z, ...]z...
z<br>rm  rn  examples_tabler$   rR   get_config_dictr  r   z```json
z
```config_code)ry  rZ   )r8   r'   r]   column_namesr   rC   tokenizerZ   r*  tolistroundr)  rx  r   r   r   r,  r~  r[   r$   r   r   r   replacer(  r_   r^   r  jsondumps	TypeError)rG   r   r   rR   r  first	tokenizedr4  suffixstats_linesr0  examples_lines
sample_idxr1  r~   config
str_configr|  r}  r  s                    @@@r;   compute_dataset_metricsz8SentenceTransformerModelCardData.compute_dataset_metrics.  s   &  	I%%*Wg*F*F%#&w<<L "_"_'J^"_"_"_Y "Wgw'' U	u!. 8[ 8[$UdU^F3
"1eS)) 5[ $
 3 3J ? ?I!)T22 .7G97T7T"+,<"="A"Aa"A"H"H"O"O"Q"Q!)"L"L"L"L"L!-!)&+CLL!&<&<#G#Gv#G#G',S\\CLL-H!'L'L$W$Wv$W$W&+CLL!&<&<#G#Gv#G#G! !5 5L)&11  T{33 %[%j11G!&! ! ! ! !'-g! ! !5 5L)&11  u-- [!(#(Z!#<#<$)#j//C
OO*KQ$O$O#(Z!#<#<! !5 5L)&11  t,, [%&F&F:&F&F&FGGG7||q((%+ &3u::(@(@(@%9 9W-f55 &,*-g,,'A'A'A+.w<<#g,,+F(U(U(U(U*-g,,'A'A'A% %9 9W-f55 ?GuooWY4Z4ZL)&11v v v v VeddlSZF[FaFaFcFcdddeYu"t"t"t"tVbcjVkVqVqVsVs"t"t"tuK +11D[1Q1Q1Y1YZ_af1g1gim*n*nL''.rr{L$l:6tL<T7U7UVW7XYZZKN#K00 / /
%2 
> 
>F(4V<ZHE!%.. ?3u::>> #E"1"Iss 3h >!%-- 5#e**t2C2C %eteu 4JJ..tV<<E&=u&=&=&=GFOO%%g....-34G4W4W4_4_`egl4m4mos-t-tL)*  
V 4*++ 	^))++F)!Zq999

 ) ) ) [[


)289VZ9V9V9VX\2]2]L /s   /V V"!V"dataset_typeLiteral['train', 'eval']c           	         |r|rt          |t                    r t          |          t          |          k    s(t          |t                    r9t          |          dk    r&t                              d| d| d| d           g }|s                     |          }t          |t                    rB fdt          |                                |	                                |          D             }n( 
                    ||d          j        j                  g}|dk    r3t          d	 |D                       }|r                     d
|                                 |          S )NrS   zThe number of `z?_datasets` in the model card data does not match the number of z1 datasets in the Trainer. Removing the provided `z$_datasets` from the model card data.c           
         g | ]X\  }}}                     ||t          j        j        t                    rj        j        |         nj        j                  YS r7   )r  r8   r0   rR   rZ   )r9   r  dataset_valuer   rG   s       r;   r<   zMSentenceTransformerModelCardData.extract_dataset_metadata.<locals>.<listcomp>  sv     	$ 	$ 	$ Bm\ 00%$;EdlFWY];^;^u),77dhdpdu 	$ 	$ 	$r=   r   rP   c                :    g | ]}|                     d d          S )r[  r   )r   )r9   metadatas     r;   r<   zMSentenceTransformerModelCardData.extract_dataset_metadata.<locals>.<listcomp>  s&    'a'a'aHVQ(?(?'a'a'ar=   zdataset_size:)r8   r(   r]   r'   r   r   rY  r+  r   r\   r  r0   rR   r*  rF   r   )rG   r   dataset_metadatar  num_training_sampless   `    r;   rV   z9SentenceTransformerModelCardData.extract_dataset_metadata  s     	s &G[11&69:J6K6KsSZ||6[6[w00 7\589I5J5Ja5O5Oal a as a a.:a a a   $& # @#'#6#6w#?#? ';// s	$ 	$ 	$ 	$ FI(8(8:JF F	$ 	$ 	$   %)$@$@JZ[\J]_c_k_p$q$q#r  7""#&'a'aP`'a'a'a#b#b # FD.BDDEEE%%&6777r=   r,   c                    || _         d S r   )rC   )rG   rC   s     r;   register_modelz/SentenceTransformerModelCardData.register_model  s    


r=   c                    || _         d S r   )r   )rG   r   s     r;   set_model_idz-SentenceTransformerModelCardData.set_model_id  s     r=   r^  c                    	 t          |          }n# t          $ r Y dS w xY w|j        | _        ||dk    r|j        }|| _        dS )NFmainT)get_model_infor   r   r   shar   )rG   r   r^  r   s       r;   set_base_modelz/SentenceTransformerModelCardData.set_base_model  sg    	'11JJ 	 	 	55	 %-x611!~H#+ ts    
  str | list[str]c                D    t          |t                    r|g}|| _        d S r   )r8   r   r   )rG   r   s     r;   set_languagez-SentenceTransformerModelCardData.set_language  s&    h$$ 	" zH r=   c                    || _         d S r   )r   )rG   r   s     r;   set_licensez,SentenceTransformerModelCardData.set_license  s    r=   c                    t          |t                    r|g}|D ]%}|| j        vr| j                            |           &d S r   )r8   r   r   r_   )rG   r   tags      r;   rF   z)SentenceTransformerModelCardData.add_tags  sX    dC   	6D 	& 	&C$)##	  %%%	& 	&r=   c                d   t          | j        d         t                    r| j        d         j        j        j        }t          |          }d                    |j        dd                    g}|j	        
                    d          |fdt          dt                              D             z  }|D ]}|                     |          r d S d S t          | j        d         t                    r9| j        d         j        r)|                     | j        d         j                   d S d S d S )Nr   r   r   c                    g | ]@}d                      d|                   dz   d                      |d                   z   AS )r   Nr   )r   )r9   r5  ra  s     r;   r<   zJSentenceTransformerModelCardData.try_to_set_base_model.<locals>.<listcomp>  sX     $ $ $JM&&,sxxstt/E/EE$ $ $r=   rS   )r8   rC   r"   
auto_modelr  _name_or_pathr
   r   partsr   r   r(  r]   r  r!   r   )rG   r   base_model_pathcandidate_model_idsr   ra  s        @r;   try_to_set_base_modelz6SentenceTransformerModelCardData.try_to_set_base_model  s\   djm[11 	>A18FJ":..O $'88O,A"##,F#G#G"H
 %)//44F $ $ $ $QVWXZ]^dZeZeQfQf$ $ $  0  &&x00 EE  
177 	>z!}' >##DJqM$<=====	> 	>> >r=   c                ^   g }i }g }| j                                         D ]w\  }}t          |dd          t          |dd          rt          fd|                                D                       rTfd|                                D             }r2                    dz             rt                    dz   d         ddfd|                                D             }fd|                                D             }|j        t          |dd          |                    t          |          |d           |
                    fd|                                D                        |                    |           yg }|D ]}d |d         D             }	t          |	          }
|D ]}t          d |d         D                       }|d         |d         k    r|
|k    r|d         |d         k    r|d         D ];}d|v r|                    d          ||d         <   |	|d                  ||d         <   <t          |d         t                    s|d         g|d<   |d                             |d                     n|                    |           |D ];}t!          |                    d                                        dd          |d<   <|t          |                                          t%          | j        |          dS )au  Format the evaluation metrics for the model card.

        The following keys will be returned:
        - eval_metrics: A list of dictionaries containing the class name, description, dataset name, and a markdown table
          This is used to display the evaluation metrics in the model card.
        - metrics: A list of all metric keys. This is used in the model card metadata.
        - model-index: A list of dictionaries containing the task name, task type, dataset type, dataset name, metric name,
          metric type, and metric value. This is used to display the evaluation metrics in the model card metadata.
        r   Nr?  c              3  H   K   | ]}|                     d z             V  dS )r   N)rd  )r9   r}   r   s     r;   r  zGSentenceTransformerModelCardData.format_eval_metrics.<locals>.<genexpr>  s3      QQ3CNN4#:66QQQQQQr=   c                L    i | ] \  }}|t                    d z   d         |!S )rS   Nrt  )r9   r}   r~   r   s      r;   r   zHSentenceTransformerModelCardData.format_eval_metrics.<locals>.<dictcomp>  s2    YYY:33s4yy1}/YYYr=   r   rS   r~   r   r3   c                r    	 t          | d          r|                                 S n# t          $ r Y nw xY w| S )z^Try to convert a value from a Numpy or Torch scalar to pure Python, if not already pure Pythonr  )r^   itemr   )r~   s    r;   try_to_pure_pythonzPSentenceTransformerModelCardData.format_eval_metrics.<locals>.try_to_pure_python  sQ    ug.. ,$zz||+,    Ds   #' 
44c                .    i | ]\  }}| |          S r7   r7   )r9   r}   r~   r  s      r;   r   zHSentenceTransformerModelCardData.format_eval_metrics.<locals>.<dictcomp>'  s+    XXX*#us..u55XXXr=   c                    g | ]>\  }}|k    rd | d n||k    rd t          |d           d nt          |d          d?S )**r  Metricr*   )r  )r9   
metric_keymetric_valuer?  s      r;   r<   zHSentenceTransformerModelCardData.format_eval_metrics.<locals>.<listcomp>)  s        -J 6@>5Q5Q1:1111Wa!^33 =%a"8"8<<<<|Q//	   r=   )
class_namedescriptionr  table_linesc                n   g | ]\  }}t          |t          t          f          !t                                                              d d          pdr*                    dd                               dd           nd|                    dd                                           ||          S )r   -unknownr   Unknown)r   	task_typer  r  metric_namemetric_typer  )r8   r   r~  r   lowerr  title)r9   r  r  r  r  s      r;   r<   zHSentenceTransformerModelCardData.format_eval_metrics.<locals>.<listcomp>?  s        1
L!,e=="-"-"3"3"5"5"="=c3"G"G%1%>YYe%t\%9%9#s%C%C%K%KCQT%U%U%Ukt$.$6$6sC$@$@$F$F$H$H$.%1    r=   c                ,    i | ]}|d          |d         S r  r7   r9   lines     r;   r   zHSentenceTransformerModelCardData.format_eval_metrics.<locals>.<dictcomp>R  s"    "h"h"hT4>4="h"h"hr=   r  c              3  &   K   | ]}|d          V  dS )r  Nr7   r  s     r;   r  zGSentenceTransformerModelCardData.format_eval_metrics.<locals>.<genexpr>U  s&      1p1pT$x.1p1p1p1p1p1pr=   r  r  r*   r  r  r  table)eval_metricsr   r   )r~   r   r3   r   )r   r   getattrallr   rd  r]   r  r_   r$   r-  r   r   r   r8   r[   r   r  r   r   )rG   r  all_metricseval_resultsr<  r   r  grouped_eval_metricseval_metriceval_metric_mappingeval_metric_metricsgrouped_eval_metricgrouped_eval_metric_metricsr  r  r  r   r?  r  s                 @@@@@r;   format_eval_metricsz4SentenceTransformerModelCardData.format_eval_metrics	  s    "&"8">">"@"@ 7	( 7	(Iw9fd33D$Y0@$GGN EQQQQ',,..QQQQQ EYYYYYYY! En&?&?s
&K&K E%3CIIMOO%DN    YXXXXXXG    18  K $/K"9fd;;L"*9"5"5#.$0#.	         5<MMOO     w''''  "' 	9 	9K"h"h[YfMg"h"h"h"%&9":":'; 9 9#.11p1pM`anMo1p1p1p.p.p+-1D\1RRR+/JJJ#N37J>7ZZZ !4M B ` `"d??HLQXHYHYD!4^!DE<OPTU]P^<_[899%&9.&I4PP d?RSa?b>c+N;'7>>{>?Z[[[E$++K888#7 	 	+>?R?V?VWd?e?e+f+f+n+nu, ,((
 1K,,..//6tUU
 
 	
r=   c                    g  j         D ]2}|                                D ]}|vr                    |           3d	fd}t          |           fd j         D             }t	          |          }|d|v dS )
Nr}   r   r3   c                    | dk    rdS | dk    rdS | dk    rdS | dk    rdS |                      d	          rd
S                     |           dz   S )Nr   r   r   rS   r   rv  r   r   rR   r  r  )r   index)r}   eval_lines_keyss    r;   sort_metricszKSentenceTransformerModelCardData.format_training_logs.<locals>.sort_metrics}  sr    g~~qf}}qo%%q'''q||F## q"((--11r=   r  c                0    g | ]fd D             S )c                    i | ]J}|d          j         k    r d|v rt          |         d          nd dn                    |d          KS )r   r  r  r  )r   r  r   )r9   r}   r  rG   s     r;   r   zTSentenceTransformerModelCardData.format_training_logs.<locals>.<listcomp>.<dictcomp>  s}         <4#777 J%S	1---#IIIIXXc3''  r=   r7   )r9   r  rG   sorted_eval_lines_keyss    @r;   r<   zISentenceTransformerModelCardData.format_training_logs.<locals>.<listcomp>  sT     
 
 
      2	  
 
 
r=   r  )
eval_linesexplain_bold_in_eval)r}   r   r3   r   )r   r   r_   r,  r   )rG   linesr}   r  r   r  r  r  s   `     @@r;   format_training_logsz5SentenceTransformerModelCardData.format_training_logst  s    ' 	0 	0Ezz|| 0 0o--#**3///0
	2 	2 	2 	2 	2 	2 "(\!J!J!J
 
 
 
 
 *
 
 
 )77
$$(J$6
 
 	
r=   1dict[Literal['co2_eq_emissions'], dict[str, Any]]c                *   | j         j                                        }dt          |j                  dz  t          |j                  dd|j        dk    |j        |j        t          |j
        dz  d          di}|j        r|j        |d         d	<   |S )
Nr   r  
codecarbonzfine-tuningYi  r   )	emissionsenergy_consumedrg  training_typeon_cloud	cpu_modelram_total_size
hours_usedhardware_used)rE   tracker_prepare_emissions_datar~  r  r  r   r  r  r  duration	gpu_model)rG   emissions_dataresultss      r;   get_codecarbon_dataz4SentenceTransformerModelCardData.get_codecarbon_data  s    2:RRTT">#;<<tC#()G#H#H&!.*3s:+5"0"?#N$;d$BAFF
! 
!
 # 	T;I;SG&'8r=   c                     j         s6 j        / j        j        p j        j        x}r                     |            j        r- j        s&	                                   n# t          $ r Y nw xY w j	        s j        rd j          _	        nd _	         fdt                     D             } j        rZ	 |                                                                n1# t          $ r$}t                              d|            |d }~ww xY w j        r]	 |                                                                n4# t          $ r'}t                              d|            Y d }~nd }~ww xY wt%           j                  dk    |d<    j        rD j        j        r8 j        j        j        '|                                                                 j                                        |d<    j                                        |d	<   t5           j                  |d
<    j        j        rYddddd                     j        j         j        j                            dd                                                    |d<   nd|d<   d _        t>          D ]}|                     |d            |S )NzSentenceTransformer based on r,   c                F    i | ]}|j         t          |j                   S r7   )r   r  )r9   r   rG   s     r;   r   z<SentenceTransformerModelCardData.to_dict.<locals>.<dictcomp>  s)    VVVej'$
";";VVVr=   z+Error while formatting evaluation metrics: z&Error while formatting training logs: d   hide_eval_linesmodel_max_lengthoutput_dimensionalitymodel_stringzCosine SimilarityzDot ProductzEuclidean DistancezManhattan Distance)cosinedot	euclidean	manhattanr   r   similarity_fn_nameF)!r   r0   rX   rU   r;  r   r   r  r   r   r	   r   r   r  r   r   r   r  r]   rE   r  _start_timer  rC   get_max_seq_length get_sentence_embedding_dimensionr   r  r   r  r  IGNORED_FIELDSr   )rG   r   
super_dictexcr}   s   `    r;   r   z(SentenceTransformerModelCardData.to_dict  s;    	.( L5S9SS ) $$W--- ? 	4? 	**,,,,     	8 8"S$/"S"S"7VVVVPTVVV
 ! 	!!$":":"<"<====   RSRRSSS	
  	OO!!$";";"="=>>>> O O OMMMNNNNNNNNO ),D,>(?(?#(E
$% %	:)1	: )1=Id6688999 *.)F)F)H)H
%&.2j.Y.Y.[.[
*+%(__
>":( 	C-$11	0 0
 c$*/1N1V1VWZ\_1`1`1f1f1h1hii +,, 0CJ+,! 	& 	&CNN3%%%%sB   A# #
A0/A0;'C# #
D-DD'E 
E5E00E5c                    t          d |                                                                 D             d|                                          S )Nc                8    i | ]\  }}|t           v |d g fv||S r   )YAML_FIELDSr  s      r;   r   z<SentenceTransformerModelCardData.to_yaml.<locals>.<dictcomp>  s:    sssJCC;DVDV[`imoqhr[r[rS%[r[r[rr=   F)	sort_keys
line_break)r   r   r   strip)rG   r"  s     r;   to_yamlz(SentenceTransformerModelCardData.to_yaml  sO    ss$,,..*>*>*@*@sss!
 
 
 %''		r=   )r3   r4   )T)r   r   r3   r4   )rb   r   r3   r4   )r   r   r3   r4   )r   r   r3   r4   )r<  r+   r   r2   r3   r4   )r   r'   r3   r4   r   )r   r   r  r   r3   r   )r   ri  r   r2   rR   rj  r3   r   )r   r   r  r  r3   r2   )rC   r,   r3   r4   )r   r   r3   r4   )r   r   r^  r   r3   r4   )r   r  r3   r4   )r   r   r3   r4   )r   r  r3   r4   r3   r2   )r3   r  )r3   r   )<r   r   r   __doc__r   r[   r   __annotations__r   r   r   rW   rY   r   r   r   r   r   rZ   r   r   r   r   r   r   r   rE   r   r   r0   r   r   r   r   r   r   r   rC   r   r   r`   r   r;  rG  rV  rY  r  rV   r  r  r  r  r  rF   r  r  r  r  r   r$  r7   r=   r;   r   r      s        $ $N (-uT'B'B'BHBBBBG!J!!!!H+05+F+F+FNFFFF*/%*E*E*EMEEEEt      #U
 
  D     7CBBBB #U4e<<<J<<<<&+eDu&E&E&EEEEE27%SX2Y2Y2YYYYY*/%5*Q*Q*QQQQQHM^binHoHoHooooo,1E$U,S,S,SMSSSS#(5E#J#J#JFJJJJ"'%5"A"A"AOAAAA/4uTPU/V/V/VVVVV6;eDu6U6U6UUUUU %d G G GIGGGG"'%5"A"A"AOAAAA16t%V[1\1\1\G\\\\%5uMMMHMMMM uT666J6666uRe444K4444 &;%HHHLHHHH&=EJJJLJJJJ#eLuMMMGMMMM ).dU(S(S(SESSSS! ! ! ! # # # # #Bg g g g>$ $ $ $D5 D5 D5 D5L   >
 
 
 
*         >{ { { {z%8 %8 %8 %8N   ! ! ! !
 
 
 
 
! ! ! !
   & & & &> > > >,i
 i
 i
 i
V$
 $
 $
L   &D D D DL      r=   r   rC   r,   r   c                    t          t                    j        dz  }t          j        | j        |d          }|j        S )Nzmodel_card_template.mdu   🤗)	card_datatemplate_pathhf_emoji)r
   __file__parentr   from_templaterD   content)rC   r*  
model_cards      r;   generate_model_cardr1    s;    NN),DDM(53HXepvwwwJr=   r%  )rC   r,   r3   r   )Q
__future__r   r  loggingr!  r_  collectionsr   r   r   dataclassesr   r   r	   pathlibr
   platformr   textwrapr   typingr   r   r   r   r   huggingface_hubr   r   r   r   r   r  huggingface_hub.repocard_datar   r   huggingface_hub.utilsr   r   tqdm.autonotebookr   r   transformers.integrationsr   transformers.modelcardr   transformers.trainer_callbackr   r   r   r    r   sentence_transformers.modelsr!   r"   #sentence_transformers.training_argsr#   sentence_transformers.utilr$   r%   r&   r   r'   r(   r)   r*   	getLoggerr   r   2sentence_transformers.evaluation.SentenceEvaluatorr+   )sentence_transformers.SentenceTransformerr,   sentence_transformers.trainerr-   r/   r   r  r   r   r1  r7   r=   r;   <module>rH     sg   " " " " " "    				 , , , , , , , ,       0 0 0 0 0 0 0 0 0 0       # # # # # #       . . . . . . . . . .      / / / / / / / / < < < < < < 8 8 8 8 8 8 Q Q Q Q Q Q Q Q + + + + + +       " " " " " " ( ( ( ( ( ( 8 8 8 8 8 8 6 6 6 6 6 6 F F F F F F F F N N N N N N E E E E E E E E T T T T T T _ _ _ _ _ _ _ _ _ _ FEEEEEEEEEEEE		8	$	$ ITTTTTTMMMMMMHHHHHHU U U U U U U Up   ;::   , L L L L Lx L L L^     r=   