
    Ngy                       d dl mZ d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZmZmZ d dlZd dlZd dlZd dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZmZm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z&m'Z'm(Z( d dl)m*Z*m+Z+m,Z, ddl-m.Z. ddl/m0Z0  e,            rd dl1m2Z2m3Z3  ej4        e5          Z6erd dl7m8Z8 d dl9m:Z:  G d de          Z; G d de          Z< G d de          Z= G d d          Z>dS )    )annotationsN)Iterable)Path)TYPE_CHECKINGAnyCallable)version)Tensornn)	Optimizer)LambdaLR)
DataLoader)trange)TrainerCallbackTrainerControlTrainerState)NoDuplicatesDataLoader)SentenceLabelDataset)BatchSamplersMultiDatasetBatchSamplers$SentenceTransformerTrainingArguments)batch_to_devicefullnameis_datasets_available   )SentenceEvaluator)ModelCardTemplate)DatasetDatasetDict)InputExample)SentenceTransformerc                  <     e Zd ZdZd fd
ZddZddZddZ xZS )SaveModelCallbackaa  A Callback to save the model to the `output_dir`.

    There are two cases:
    1. save_best_model is True and evaluator is defined:
        We save on evaluate, but only if the new model is better than the currently saved one
        according to the evaluator.
    2. If evaluator is not defined:
        We save after the model has been trained.
    
output_dirstr	evaluatorSentenceEvaluator | Nonesave_best_modelboolreturnNonec                    t                                                       || _        || _        || _        d | _        d S N)super__init__r$   r&   r(   best_metric)selfr$   r&   r(   	__class__s       [/var/www/html/ai-engine/env/lib/python3.11/site-packages/sentence_transformers/fit_mixin.pyr/   zSaveModelCallback.__init__7   s=    $".    
new_metricfloatc                Z    t          | j        dd          r|| j        k    S || j        k     S )Ngreater_is_betterT)getattrr&   r0   )r1   r5   s     r3   	is_betterzSaveModelCallback.is_better>   s5    4>#6== 	1 000D,,,r4   argsr   stater   controlr   metricsdict[str, Any]modelr!   c                .   | j         | j        rt          | j         dd          }|                                D ][\  }}	|                    |          r=| j        |                     |	          r!|	| _        |                    | j                   Xd S d S d S Nprimary_metricr&   )	r&   r(   r9   itemsendswithr0   r:   saver$   )
r1   r;   r<   r=   r>   r@   kwargs
metric_keykeyvalues
             r3   on_evaluatezSaveModelCallback.on_evaluateC   s     >%$*>% 1A;OOJ%mmoo 4 4
U<<
++ 4'/4>>%3H3H/+0(

4?333 &%%%4 4r4   c                L    | j         |                    | j                   d S d S r-   )r&   rF   r$   )r1   r;   r<   r=   r@   rG   s         r3   on_train_endzSaveModelCallback.on_train_endT   s.     >!JJt''''' "!r4   )r$   r%   r&   r'   r(   r)   r*   r+   )r5   r6   r*   r)   )r;   r   r<   r   r=   r   r>   r?   r@   r!   r*   r+   
r;   r   r<   r   r=   r   r@   r!   r*   r+   )	__name__
__module____qualname____doc__r/   r:   rK   rM   __classcell__r2   s   @r3   r#   r#   ,   s                    - - - -
4 4 4 4"	( 	( 	( 	( 	( 	( 	( 	(r4   r#   c                  ,     e Zd ZdZd fdZddZ xZS )EvaluatorCallbackzThe SentenceTransformers.fit method always ran the evaluator on every epoch,
    in addition to every "evaluation_steps". This callback is responsible for that.

    The `.trainer` must be provided after the trainer has been created.
    r&   r   r*   r+   c                r    t                                                       || _        d| _        d | _        d S )Neval)r.   r/   r&   metric_key_prefixtrainer)r1   r&   r2   s     r3   r/   zEvaluatorCallback.__init__g   s3    "!'r4   r;   r   r<   r   r=   r   r@   r!   c                   |                      ||j                  }t          |t                    sd|i}t	          |                                          D ]A}|                    | j         d          s"|                    |          || j         d| <   B| j	        %| j	        j
                            ||||           d S d S )N)epochr&   _)r>   )r&   r\   
isinstancedictlistkeys
startswithrY   poprZ   callback_handlerrK   )r1   r;   r<   r=   r@   rG   evaluator_metricsrI   s           r3   on_epoch_endzEvaluatorCallback.on_epoch_endm   s     !NN5NDD+T22 	A!,.? @ )..0011 	b 	bC>>T%;">">">?? bGXG\G\]`GaGa!T%;"C"Cc"C"CD<#L)55dE7Te5fffff $#r4   )r&   r   r*   r+   rN   )rO   rP   rQ   rR   r/   rf   rS   rT   s   @r3   rV   rV   `   sc              g g g g g g g gr4   rV   c                  ,     e Zd ZdZd fdZddZ xZS )OriginalCallbackzA Callback to invoke the original callback function that was provided to SentenceTransformer.fit()

    This callback has the following signature: `(score: float, epoch: int, steps: int) -> None`
    callback!Callable[[float, int, int], None]r&   r   r*   r+   c                d    t                                                       || _        || _        d S r-   )r.   r/   ri   r&   )r1   ri   r&   r2   s      r3   r/   zOriginalCallback.__init__   s+     "r4   r;   transformers.TrainingArgumentsr<   r   r=   r   r>   r?   c                    t          | j        dd          }|                                D ]=\  }}|                    |          r#|                     ||j        |j                  c S >d S rB   )r9   r&   rD   rE   ri   r\   global_step)	r1   r;   r<   r=   r>   rG   rH   rI   rJ   s	            r3   rK   zOriginalCallback.on_evaluate   s}     T^-={KK
!--// 	L 	LJC||J'' L}}UEK9JKKKKKL	L 	Lr4   )ri   rj   r&   r   r*   r+   )
r;   rl   r<   r   r=   r   r>   r?   r*   r+   )rO   rP   rQ   rR   r/   rK   rS   rT   s   @r3   rh   rh      sc         
# # # # # #
L L L L L L L Lr4   rh   c                      e Zd ZdZdddddej        j        ddidd	dd
dddd
ddd	fd5d*Zed6d-            Z	d7d1Z
	 dddddej        j        ddidd	dd
dddd
ddd	fd5d2Zd8d3Zd8d4ZdS )9FitMixinzEMixin class for injecting the `fit` method into Sentence TransformersNr   WarmupLineari'  lrgh㈵>g{Gz?r   TFi  train_objectives&Iterable[tuple[DataLoader, nn.Module]]r&   r   epochsint	schedulerr%   warmup_stepsoptimizer_classtype[Optimizer]optimizer_paramsdict[str, object]weight_decayr6   evaluation_stepsoutput_pathr(   r)   max_grad_normuse_ampri   rj   show_progress_barcheckpoint_pathcheckpoint_save_stepscheckpoint_save_total_limitr*   r+   c                  /0 t                      st          d          ddlm} t	          | \  }}d }|D ]	}||_        
d/t          j        }i }t          |d          D ]\  }}t          |t                    rt          j        }n6t          |d          r&t          |j        t                    rt          j        }t!          |d	/          /g }g }|D ]"}t	          d
 |D              \  }} ||z  }|| z  }#t#          j        d t          t	          |           D                       }!d}"	 t'          |          dhk    rd}"n# t(          $ r Y nw xY w|"r|!                    d|          }!|!|d| <   t-          |          }d&d}#d t          |d          D             }$d}%|+|dk    r%|dk    r|}%nt.                              d           d}t3          j        t6          j                  t3          j        d          k    rdnd}&t;          d'|p	 |#            |t<          j        //||%d|&|
|
dk    rdndi|
||| |dnd||d}'||dk    r-tA          /fd|!                                D                       }tE          ||z            }(tG          | $                                          })g d00fd |)D             |	d!0fd"|)D             d#d!g}* ||*fi |}+| %                    |+|||($          },g }-|G|-&                    tO          |                     |#|-&                    tQ          ||                      || |'|d|$||+|,f|-%          }.|.j)        j*        D ]}t          |tN                    r|.|_+        |$|.,                    t[          |||                     |..                                 dS )(a   
        Deprecated training method from before Sentence Transformers v3.0, it is recommended to use
        :class:`~sentence_transformers.trainer.SentenceTransformerTrainer` instead. This method uses
        :class:`~sentence_transformers.trainer.SentenceTransformerTrainer` behind the scenes, but does
        not provide as much flexibility as the Trainer itself.

        This training approach uses a list of DataLoaders and Loss functions to train the model. Each DataLoader
        is sampled in turn for one batch. We sample only as many batches from each DataLoader as there are in the
        smallest one to make sure of equal training with each dataset, i.e. round robin sampling.

        This method should produce equivalent results in v3.0+ as before v3.0, but if you encounter any issues
        with your existing training scripts, then you may wish to use
        :meth:`SentenceTransformer.old_fit <sentence_transformers.SentenceTransformer.old_fit>` instead.
        That uses the old training method from before v3.0.

        Args:
            train_objectives: Tuples of (DataLoader, LossFunction). Pass
                more than one for multi-task learning
            evaluator: An evaluator (sentence_transformers.evaluation)
                evaluates the model performance during training on held-
                out dev data. It is used to determine the best model
                that is saved to disc.
            epochs: Number of epochs for training
            steps_per_epoch: Number of training steps per epoch. If set
                to None (default), one epoch is equal the DataLoader
                size from train_objectives.
            scheduler: Learning rate scheduler. Available schedulers:
                constantlr, warmupconstant, warmuplinear, warmupcosine,
                warmupcosinewithhardrestarts
            warmup_steps: Behavior depends on the scheduler. For
                WarmupLinear (default), the learning rate is increased
                from o up to the maximal learning rate. After these many
                training steps, the learning rate is decreased linearly
                back to zero.
            optimizer_class: Optimizer
            optimizer_params: Optimizer parameters
            weight_decay: Weight decay for model parameters
            evaluation_steps: If > 0, evaluate the model using evaluator
                after each number of training steps
            output_path: Storage path for the model and evaluation files
            save_best_model: If true, the best model (according to
                evaluator) is stored at output_path
            max_grad_norm: Used for gradient normalization.
            use_amp: Use Automatic Mixed Precision (AMP). Only for
                Pytorch >= 1.6.0
            callback: Callback function that is invoked after each
                evaluation. It must accept the following three
                parameters in this order: `score`, `epoch`, `steps`
            show_progress_bar: If True, output a tqdm progress bar
            checkpoint_path: Folder to save checkpoints during training
            checkpoint_save_steps: Will save a checkpoint after so many
                steps
            checkpoint_save_total_limit: Total number of checkpoints to
                store
        zGPlease install `datasets` to use this function: `pip install datasets`.r   )SentenceTransformerTrainerc                    | S r-    )batchs    r3   identityzFitMixin.fit.<locals>.identity   s    Lr4      r   )startdataset
batch_sizec                *    g | ]}|j         |j        fS r   )textslabel.0examples     r3   
<listcomp>z FitMixin.fit.<locals>.<listcomp>	  s"    1f1f1fU\7='-2P1f1f1fr4   c                     i | ]\  }}d | |S )	sentence_r   )r   idxtexts      r3   
<dictcomp>z FitMixin.fit.<locals>.<dictcomp>  s'    (i(i(iYS$):S):):D(i(i(ir4   TFr   	_dataset_r*   r%   c                    d} d}t          |                                           rt          t          t          |                                                               dk    rjd| } |dz  }t          |                                           r?t          t          t          |                                                               dk    j| S )Nzcheckpoints/modelr   r   zcheckpoints/model_)r   existslenr`   iterdir)dir_namer   s     r3   _default_checkpoint_dirz-FitMixin.fit.<locals>._default_checkpoint_dir  s    *HCx..'')) c$tH~~7M7M7O7O2P2P.Q.QUV.V.V555q x..'')) c$tH~~7M7M7O7O2P2P.Q.QUV.V.V Or4   c                     i | ]\  }}d | |S )r   r   )r   r   loss_fns      r3   r   z FitMixin.fit.<locals>.<dictcomp>#  s'    ddd|sG)C))7dddr4   NzqSetting `steps_per_epoch` alongside `epochs` > 1 no longer works. We will train with the full datasets per epoch.z4.41.0eval_strategyevaluation_strategy)r$   batch_samplermulti_dataset_batch_samplerper_device_train_batch_sizeper_device_eval_batch_sizenum_train_epochs	max_stepsstepsno)
eval_stepsr   fp16disable_tqdmsave_strategy
save_stepssave_total_limitc                4    g | ]}t          |          z  S r   r   )r   train_datasetr   s     r3   r   z FitMixin.fit.<locals>.<listcomp>M  s&    "u"u"u3}#5#5#C"u"u"ur4   biaszLayerNorm.biaszLayerNorm.weightc                R    g | ]"\  }t          fd D                        |#S )c              3      K   | ]}|v V  	d S r-   r   r   ndns     r3   	<genexpr>z*FitMixin.fit.<locals>.<listcomp>.<genexpr>V  s(      C_C_PRB!GC_C_C_C_C_C_r4   anyr   pr   no_decays     @r3   r   z FitMixin.fit.<locals>.<listcomp>V  sA    ```AC_C_C_C_V^C_C_C_@_@_`1```r4   paramsr}   c                R    g | ]"\  }t          fd D                        |#S )c              3      K   | ]}|v V  	d S r-   r   r   s     r3   r   z*FitMixin.fit.<locals>.<listcomp>.<genexpr>Y  s'      <X<XR1W<X<X<X<X<X<Xr4   r   r   s     @r3   r   z FitMixin.fit.<locals>.<listcomp>Y  s@    YYYda<X<X<X<Xx<X<X<X9X9XYYYYr4           rw   rx   t_total)r@   r;   r   eval_datasetlossr&   
optimizers	callbacks)r*   r%   r   )/r   ImportErrorsentence_transformers.trainerr   zip
collate_fnr   BATCH_SAMPLER	enumerater^   r   NO_DUPLICATEShasattrr   r   GROUP_BY_LABELr9   r   	from_dictset	TypeError
add_columnr   loggerwarningr	   parsetransformers__version__r   r   ROUND_ROBINminvaluesrv   r`   named_parameters_get_schedulerappendrV   rh   rd   r   rZ   add_callbackr#   train)1r1   rs   r&   ru   steps_per_epochrw   rx   ry   r{   r}   r~   r   r(   r   r   ri   r   r   r   r   r   data_loadersloss_fnsr   data_loaderr   train_dataset_dict
loader_idxr   labelsr   batch_textsbatch_labelsr   add_label_columnr   loss_fn_dictr   eval_strategy_keyr;   num_train_stepsparam_optimizeroptimizer_grouped_parameters	optimizerscheduler_objr   rZ   r   r   s1                                                  @@r3   fitzFitMixin.fit   sX   Z %&& 	ighhh 	MLLLLL!$&6!7h	 	 	 ( 	. 	.K%-K""
%3  '0Q'G'G'G 	C 	C#J+'=>> = - ;i00 =Z@SUi5j5j = - < lJGGJEF$ ' ',/1f1f`e1f1f1f,g)\$,&'(i(iR[\_af\gRhRh(i(i(ijjG#v;;1#%%',$    >!,,Wf==;B7:7788();<<	 	 	 	 edyQYabGcGcGcddd 	&?Q+>+>{{+		F   #'
 }\566'-:Q:QQQ O& 	
 4 
&C*A*A*C*C'(A(M(2'1#
 
 ".>.JO_bcOcOc77im
 ('..%4%@''d,8%
 
 
 
* "o&:&:!"u"u"u"uYkYrYrYtYt"u"u"uvvOo677 t446677AAA a`````` ,  ZYYYoYYYknoo(
$ $O$@UUDTUU	++ , 
 

 	 .y99:::#  !1(I!F!FGGG,,,!=1	
 	
 	
  0: 	+ 	+H($566 +#* "  !2;	?![![\\\s   /E
EEr   r   c                h   |                                 }|dk    rt          j        |           S |dk    rt          j        | |          S |dk    rt          j        | ||          S |dk    rt          j        | ||          S |dk    rt          j        | ||          S t          d|           )	z
        Returns the correct learning rate scheduler. Available scheduler:

        - constantlr,
        - warmupconstant,
        - warmuplinear,
        - warmupcosine,
        - warmupcosinewithhardrestarts
        
constantlrwarmupconstant)num_warmup_stepswarmuplinear)r   num_training_stepswarmupcosinewarmupcosinewithhardrestartszUnknown scheduler )lowerr   get_constant_schedule!get_constant_schedule_with_warmupget_linear_schedule_with_warmupget_cosine_schedule_with_warmup2get_cosine_with_hard_restarts_schedule_with_warmup
ValueError)r   rw   rx   r   s       r3   r   zFitMixin._get_scheduler|  s     OO%%	$$5i@@@***A)^jkkkk.((?LW    .((?LW    888RLW    =)==>>>r4   r   list[InputExample]&tuple[list[dict[str, Tensor]], Tensor]c                     d |D             } fdt          | D             }d |D             }|rGt          |d         t          j                  r't	          j        t          j        |                    }nt	          j        |          }||fS )a;  
        Transforms a batch from a SmartBatchingDataset to a batch of tensors for the model
        Here, batch is a list of InputExample instances: [InputExample(...), ...]

        Args:
            batch: a batch from a SmartBatchingDataset

        Returns:
            a batch of tensors for the model
        c                    g | ]	}|j         
S r   )r   r   s     r3   r   z3FitMixin.smart_batching_collate.<locals>.<listcomp>  s    4447444r4   c                :    g | ]}                     |          S r   )tokenize)r   sentencer1   s     r3   r   z3FitMixin.smart_batching_collate.<locals>.<listcomp>  s%    QQQT]]844QQQr4   c                    g | ]	}|j         
S r   )r   r   s     r3   r   z3FitMixin.smart_batching_collate.<locals>.<listcomp>  s    555G'-555r4   r   )r   r^   npndarraytorch
from_numpystacktensor)r1   r   r   sentence_featuresr   labels_tensors   `     r3   smart_batching_collatezFitMixin.smart_batching_collate  s     54e444QQQQS%[QQQ55u555  	1jBJ77 	1!,RXf-=-=>>MM!L00M -//r4   c                   2 g }|D ]-\  }}|                     t          j        ||                     .d                    d |D                       }t	          j        t          |          ||||t          |          ||	|
|d
dd          }d _        t          j	        
                    d|          
                    d	|           j        d
<   |r)ddlm} t          j        j                                        }                      j                   d |D             }|D ]} j        |_        d |D             }|D ]}|                     j                   d _        ||dk    rt/          d |D                       }t1          ||z            }g }g }|D ]}t3          |                                          } g d22fd| D             |	d2fd| D             ddg}! ||!fi |}"                     |"|||          }#|                    |"           |                    |#           d}$d |D             }%t;          |          }&d}'t=          |d|           D ]:}(d})|D ]*}|                                 |                                  +t=          |dd|           D ]}*tC          |&          D ]:}+||+         }||+         }"||+         }|%|+         },	 tE          |,          }-n9# tF          $ r, tI          ||+                   },|,|%|+<   tE          |,          }-Y nw xY w|-\  }.}/|/                     j                  }/t3          tK           fd|.                    }.|r |            5   ||.|/          }0ddd           n# 1 swxY w Y   |&                                }1|'                    |0          (                                 |)                    |"           t          j*        j+        ,                    |-                                |           |.                    |"           |/                                 |&                                |1k    }'nk ||.|/          }0|0(                                 t          j*        j+        ,                    |-                                |           |".                                 |"                                 |'s|.                                 <|)dz  })|$dz  }$|
dk    rP|)|
z  dk    rG 0                    ||||(|)|           |D ]*}|                                 |                                  +|(|&|dk    r |$|z  dk    r 1                    |||$           ډ 0                    ||||(d |           <|| 2                    |           | 1                    |||$           dS dS )!ak  
        Deprecated training method from before Sentence Transformers v3.0, it is recommended to use
        :class:`sentence_transformers.trainer.SentenceTransformerTrainer` instead. This method should
        only be used if you encounter issues with your existing training scripts after upgrading to v3.0+.

        This training approach uses a list of DataLoaders and Loss functions to train the model. Each DataLoader
        is sampled in turn for one batch. We sample only as many batches from each DataLoader as there are in the
        smallest one to make sure of equal training with each dataset, i.e. round robin sampling.

        Args:
            train_objectives: Tuples of (DataLoader, LossFunction). Pass
                more than one for multi-task learning
            evaluator: An evaluator (sentence_transformers.evaluation)
                evaluates the model performance during training on held-
                out dev data. It is used to determine the best model
                that is saved to disc.
            epochs: Number of epochs for training
            steps_per_epoch: Number of training steps per epoch. If set
                to None (default), one epoch is equal the DataLoader
                size from train_objectives.
            scheduler: Learning rate scheduler. Available schedulers:
                constantlr, warmupconstant, warmuplinear, warmupcosine,
                warmupcosinewithhardrestarts
            warmup_steps: Behavior depends on the scheduler. For
                WarmupLinear (default), the learning rate is increased
                from o up to the maximal learning rate. After these many
                training steps, the learning rate is decreased linearly
                back to zero.
            optimizer_class: Optimizer
            optimizer_params: Optimizer parameters
            weight_decay: Weight decay for model parameters
            evaluation_steps: If > 0, evaluate the model using evaluator
                after each number of training steps
            output_path: Storage path for the model and evaluation files
            save_best_model: If true, the best model (according to
                evaluator) is stored at output_path
            max_grad_norm: Used for gradient normalization.
            use_amp: Use Automatic Mixed Precision (AMP). Only for
                Pytorch >= 1.6.0
            callback: Callback function that is invoked after each
                evaluation. It must accept the following three
                parameters in this order: `score`, `epoch`, `steps`
            show_progress_bar: If True, output a tqdm progress bar
            checkpoint_path: Folder to save checkpoints during training
            checkpoint_save_steps: Will save a checkpoint after so many
                steps
            checkpoint_save_total_limit: Total number of checkpoints to
                store
        z

c                    g | ]}|S r   r   )r   r   s     r3   r   z$FitMixin.old_fit.<locals>.<listcomp>  s    *P*P*PD4*P*P*Pr4   )
r&   ru   r   rw   rx   ry   r{   r}   r~   r      T)indent	sort_keysNz{LOSS_FUNCTIONS}z{FIT_PARAMETERS}z{TRAINING_SECTION}r   )autocastc                    g | ]\  }}|S r   r   )r   
dataloaderr]   s      r3   r   z$FitMixin.old_fit.<locals>.<listcomp>"  s    HHHmj!zHHHr4   c                    g | ]\  }}|S r   r   )r   r]   r   s      r3   r   z$FitMixin.old_fit.<locals>.<listcomp>(  s    <<<4t<<<r4   iigc                ,    g | ]}t          |          S r   r   r   r$  s     r3   r   z$FitMixin.old_fit.<locals>.<listcomp>/  s    "Q"Q"Qz3z??"Q"Q"Qr4   r   c                R    g | ]"\  }t          fd D                        |#S )c              3      K   | ]}|v V  	d S r-   r   r   s     r3   r   z.FitMixin.old_fit.<locals>.<listcomp>.<genexpr><  s(      GcGcTVaGcGcGcGcGcGcr4   r   r   s     @r3   r   z$FitMixin.old_fit.<locals>.<listcomp><  sA    dddTQCGcGcGcGcZbGcGcGcDcDcdqdddr4   r   c                R    g | ]"\  }t          fd D                        |#S )c              3      K   | ]}|v V  	d S r-   r   r   s     r3   r   z.FitMixin.old_fit.<locals>.<listcomp>.<genexpr>?  s'      @\@\Rq@\@\@\@\@\@\r4   r   r   s     @r3   r   z$FitMixin.old_fit.<locals>.<listcomp>?  sA    ]]]$!QS@\@\@\@\S[@\@\@\=\=\]A]]]r4   r   r   c                ,    g | ]}t          |          S r   )iterr'  s     r3   r   z$FitMixin.old_fit.<locals>.<listcomp>K  s     IIIz$z**IIIr4   FEpoch)descdisable	Iterationg?)r/  	smoothingr0  c                .    t          | j                  S r-   )r   device)r   r1   s    r3   <lambda>z"FitMixin.old_fit.<locals>.<lambda>g  s    oeT[6Y6Y r4   r   r   )3extendr   get_train_objective_infojoinjsondumpsr   r%   _model_card_text__TRAINING_SECTION__replace_model_card_varstorch.cuda.ampr"  r  cudaamp
GradScalertor4  r  r   
best_scorer   rv   r`   r   r   r   r   r   	zero_gradr   rangenextStopIterationr-  map	get_scalescalebackwardunscale_r   utilsclip_grad_norm_
parametersstepupdate_eval_during_training_save_checkpointrF   )3r1   rs   r&   ru   r   rw   rx   ry   r{   r}   r~   r   r(   r   r   ri   r   r   r   r   info_loss_functionsr$  r   info_fit_parametersr"  scalerdataloadersloss_models
loss_modelr   r   
schedulersr   r   r   r   rn   data_iteratorsnum_train_objectivesskip_schedulerr\   training_stepsr]   	train_idxdata_iteratordatafeaturesr   
loss_valuescale_before_stepr   s3   `                                                 @r3   old_fitzFitMixin.old_fit  s   T ! 0 	e 	eJ&&'8'QR\^b'c'cdddd$kk*P*P<O*P*P*PQQ"j%i00 #2& ,#&#7#7$4 ,$4!.  
 
 
  !%6G6\6d6d 37
 7

'$&9
:
: 	23  	1//////Z^..00FHH7GHHH & 	@ 	@J$($?J!!<<+;<<<% 	' 	'JMM$+&&&&""o&:&:!"Q"Q["Q"Q"QRROo677 

% 	- 	-J":#>#>#@#@AAOEEEH eddd_ddd$0  ^]]]/]]]orss,( ((DYYHXYYI //Y\Sb 0  M i(((m,,,,II[III"#344F>O:OPPP D	e D	eEN) # #
$$&&&  """"O+[lWlmmm ;e ;e!&';!<!< &) &)I!,Y!7J *9 5I *9 5I$29$=M3#M22( 3 3 3(,[-C(D(D4Ay1#M223
 (,$Hf#YYt{33F#C(Y(Y(Y(Y[c$d$deeH )%XZZ F F)3Hf)E)EJF F F F F F F F F F F F F F F -3,<,<,>,>)Z0099;;;	22266z7L7L7N7NP]^^^I...)/)9)9););?P)P%/Z&%A%A
"++---66z7L7L7N7NP]^^^!((('')))) )!(((!#q #a''N=M,MQR,R,R..!;X`   '2 + +
",,..."((**** $/-9-11#&;;q@@))/;VXcddd&&y+PUWY[cdddd!8IIk"""&!!/3NP[\\\\\ '&s$   $K443L*	)L*	8N	NNc                D   |}|Lt          j        |d           t           j                            |d          }t          j        |d           |I || |||          }| ||||           || j        k    r"|| _        |r|                     |           dS dS dS dS )z#Runs evaluation during the trainingNT)exist_okrX   )r   r\   r   )osmakedirspathr8  rD  rF   )	r1   r&   r   r(   r\   r   ri   	eval_pathscores	            r3   rS  zFitMixin._eval_during_training  s    	"Kd3333[&99IK	D1111 Id	eTTTE#u---t&&"'" +IIk***** !  '&+ +r4   c                   |                      t          j                            |t	          |                               ||dk    rg }t          j        |          D ]Y}|                                rC|                    t          |          t          j                            ||          d           Zt          |          |k    r8t          |d           }t          j        |d         d                    d S d S d S d S )Nr   )rQ  rk  c                    | d         S )NrQ  r   )xs    r3   r5  z+FitMixin._save_checkpoint.<locals>.<lambda>  s
    &	 r4   )rI   rk  )rF   ri  rk  r8  r%   listdirisdigitr   rv   r   sortedshutilrmtree)r1   r   r   rQ  old_checkpointssubdirs         r3   rT  zFitMixin._save_checkpoint  s   		"',,D		::;;; '27RUV7V7V O*_55 q q>>## q#**CKKVegmInIn+o+oppp?##&AAA"(>Q>Q"R"R"Roa0899999 327V7V BAr4   )&rs   rt   r&   r   ru   rv   rw   r%   rx   rv   ry   rz   r{   r|   r}   r6   r~   rv   r   r%   r(   r)   r   r6   r   r)   ri   rj   r   r)   r   r%   r   rv   r   rv   r*   r+   )rw   r%   rx   rv   r   rv   r*   r   )r   r  r*   r  )r*   r+   )rO   rP   rQ   rR   r  optimAdamWr   staticmethodr   r  rf  rS  rT  r   r4   r3   rp   rp      s@       OO
 (,'!+0;+</3Tl" ! $ 6:"&#%(+,)\ \ \ \ \| ? ? ? \?<0 0 0 00 (,'!+0;+</3Tl" ! $ 6:"&#%(+,)c] c] c] c] c]J+ + + +": : : : : :r4   rp   )?
__future__r   r9  loggingri  rt  collections.abcr   pathlibr   typingr   r   r   numpyr  r  r   	packagingr	   r
   r   torch.optimr   torch.optim.lr_schedulerr   torch.utils.datar   tqdm.autonotebookr   r   r   r   5sentence_transformers.datasets.NoDuplicatesDataLoaderr   3sentence_transformers.datasets.SentenceLabelDatasetr   #sentence_transformers.training_argsr   r   r   sentence_transformers.utilr   r   r   
evaluationr   model_card_templatesr   datasetsr   r   	getLoggerrO   r   *sentence_transformers.readers.InputExampler    )sentence_transformers.SentenceTransformerr!   r#   rV   rh   rp   r   r4   r3   <module>r     s   " " " " " "   				  $ $ $ $ $ $       / / / / / / / / / /                        ! ! ! ! ! ! - - - - - - ' ' ' ' ' ' $ $ $ $ $ $ F F F F F F F F F F X X X X X X T T T T T T         
 X W W W W W W W W W ) ) ) ) ) ) 3 3 3 3 3 3 .--------		8	$	$ NGGGGGGMMMMMM1( 1( 1( 1( 1( 1( 1( 1(hg g g g g g g gDL L L L L L L L2_: _: _: _: _: _: _: _: _: _:r4   