
    Ngn0                        d dl mZ d dlmZmZ d dlmZ d dlmZ d dl	m
Z
 d dlZd dlZd dlmZmZ d dlmZmZ d d	lmZ d d
lmZ ddZ G d dej                  ZdS )    )annotations)IterableIterator)nullcontext)partial)AnyN)Tensornn)SentenceTransformerutil)RandContext)StaticEmbeddinggrad_outputr	   sentence_featuresIterable[dict[str, Tensor]]loss_obj+CachedMultipleNegativesSymmetricRankingLossreturnNonec           
        |j         J |j        J t          j                    5  t	          ||j         |j                  D ]\  }}}t	          |                    |dd|          |          D ]X\  \  }}}t          j        |                                |                                          | z  }	|	                                 Y	 ddd           dS # 1 swxY w Y   dS )zOA backward hook to backpropagate the cached gradients mini-batch by mini-batch.NTF)sentence_feature	with_gradcopy_random_staterandom_states)	cacher   torchenable_gradzipembed_minibatch_iterdotflattenbackward)
r   r   r   r   gradr   reps_mb_grad_mb	surrogates
             /var/www/html/ai-engine/env/lib/python3.11/site-packages/sentence_transformers/losses/CachedMultipleNegativesSymmetricRankingLoss.py_backward_hookr)      sT    >%%%!---				 % %589JHN\d\r5s5s 	% 	%1dM),--%5"&+"/	 .   * * 
% 
%%!g "Igoo&7&79J9JKKkY	""$$$$
%	%% % % % % % % % % % % % % % % % % %s   B#CCCc                  l     e Zd Zdej        ddfd+ fdZ	 d,d-dZ	 d,d.dZd/d#Zd/d$Z	d0d(Z
d1d*Z xZS )2r   g      4@    Fmodelr   scalefloatsimilarity_fct"callable[[Tensor, Tensor], Tensor]mini_batch_sizeintshow_progress_barboolr   r   c                .   t                                                       t          |d         t                    rt	          d          || _        || _        || _        t          j	                    | _
        || _        d| _        d| _        || _        dS )a  
        Boosted version of :class:`MultipleNegativesSymmetricRankingLoss` (MNSRL) by GradCache (https://arxiv.org/pdf/2101.06983.pdf).

        Given a list of (anchor, positive) pairs, MNSRL sums the following two losses:

        1. Forward loss: Given an anchor, find the sample with the highest similarity out of all positives in the batch.
        2. Backward loss: Given a positive, find the sample with the highest similarity out of all anchors in the batch.

        For example with question-answer pairs, the forward loss finds the answer for a given question and the backward loss
        finds the question for a given answer. This loss is common in symmetric tasks, such as semantic textual similarity.

        The caching modification allows for large batch sizes (which give a better training signal) with constant memory usage,
        allowing you to reach optimal training signal with regular hardware.

        Note: If you pass triplets, the negative entry will be ignored. An anchor is just searched for the positive.

        Args:
            model: SentenceTransformer model
            scale: Output of similarity function is multiplied by scale value
            similarity_fct: similarity function between sentence embeddings. By default, cos_sim.
                Can also be set to dot product (and then set scale to 1)
            mini_batch_size: Mini-batch size for the forward pass, this denotes how much memory is actually used during
                training and evaluation. The larger the mini-batch size, the more memory efficient the training is, but
                the slower the training will be.
            show_progress_bar: If True, shows progress bar during processing

        Requirements:
            1. (anchor, positive) pairs
            2. Should be used with large batch sizes for superior performance, but has slower training time than non-cached versions

        Inputs:
            +---------------------------------------+--------+
            | Texts                                 | Labels |
            +=======================================+========+
            | (anchor, positive) pairs              | none   |
            +---------------------------------------+--------+

        Recommendations:
            - Use ``BatchSamplers.NO_DUPLICATES`` (:class:`docs <sentence_transformers.training_args.BatchSamplers>`) to
              ensure that no in-batch negatives are duplicates of the anchor or positive samples.

        Relations:
            - Like :class:`MultipleNegativesRankingLoss`, but with an additional symmetric loss term and caching mechanism.
            - Inspired by :class:`CachedMultipleNegativesRankingLoss`, adapted for symmetric loss calculation.

        Example:
            ::

                from sentence_transformers import SentenceTransformer, SentenceTransformerTrainer, losses
                from datasets import Dataset

                model = SentenceTransformer("microsoft/mpnet-base")
                train_dataset = Dataset.from_dict({
                    "anchor": ["It's nice weather outside today.", "He drove to work."],
                    "positive": ["It's so sunny.", "He took the car to the office."],
                })
                loss = losses.CachedMultipleNegativesSymmetricRankingLoss(model, mini_batch_size=32)

                trainer = SentenceTransformerTrainer(
                    model=model,
                    train_dataset=train_dataset,
                    loss=loss,
                )
                trainer.train()

        References:
            - Efficient Natural Language Response Suggestion for Smart Reply, Section 4.4: https://arxiv.org/pdf/1705.00652.pdf
            - Scaling Deep Contrastive Learning Batch Size under Memory Limited Setup: https://arxiv.org/pdf/2101.06983.pdf
        r   zCachedMultipleNegativesSymmetricRankingLoss is not compatible with a SentenceTransformer model based on a StaticEmbedding. Consider using MultipleNegativesSymmetricRankingLoss instead.N)super__init__
isinstancer   
ValueErrorr,   r-   r/   r
   CrossEntropyLosscross_entropy_lossr1   r   r   r3   )selfr,   r-   r/   r1   r3   	__class__s         r(   r7   z4CachedMultipleNegativesSymmetricRankingLoss.__init__)   s    Z 	eAh00 	P  
 

,"$"5"7"7.04
=A!2    Nr   dict[str, Tensor]beginendr   r   random_stateRandContext | None!tuple[Tensor, RandContext | None]c                   |rt           nt          j        }|t                      n|}fd|                                D             }	|5   |            5  |rt	          |	                                 nd}|                     |	          d         }
ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   |
|fS )z Embed a mini-batch of sentences.Nc                ,    i | ]\  }}||         S  rG   ).0kvr@   rA   s      r(   
<dictcomp>zOCachedMultipleNegativesSymmetricRankingLoss.embed_minibatch.<locals>.<dictcomp>   s'    %[%[%[$!Qa59%[%[%[r>   sentence_embedding)r   r   no_graditemsr   valuesr,   )r<   r   r@   rA   r   r   rB   grad_contextrandom_state_contextsentence_feature_minibatchrepss     ``       r(   embed_minibatchz;CachedMultipleNegativesSymmetricRankingLoss.embed_minibatch   sx    '0B{{U]0<0D{}}},%[%[%[%[%[BRBXBXBZBZ%[%[%["! 	T 	T T TTeo{,F,M,M,O,OPPkozz"<==>RST T T T T T T T T T T T T T T	T 	T 	T 	T 	T 	T 	T 	T 	T 	T 	T 	T 	T 	T 	T \!!s6   B6;BB6B#	#B6&B#	'B66B:=B:r   list[RandContext] | None+Iterator[tuple[Tensor, RandContext | None]]c           
   #    K   |d         }|j         \  }}t          t          j        d|| j        d| j                             D ]=\  }}	|	| j        z   }
|                     ||	|
|||dn||                   \  }}||fV  >dS )z5Iterate over mini-batches of sentences for embedding.	input_idsr   zEmbed mini-batchesdescdisableN)r   r@   rA   r   r   rB   )shape	enumeratetqdmtranger1   r3   rT   )r<   r   r   r   r   rX   bszr%   iberS   rB   s                r(   r   z@CachedMultipleNegativesSymmetricRankingLoss.embed_minibatch_iter   s       -[9	QK$) 22  
 
 	% 	%DAq D((A!%!5!5!1#"3%2%:TTa@P "6 " "D, $$$$$%	% 	%r>   rS   list[list[Tensor]]r	   c           	        t          j        |d                   }t          j        d |dd         D                       }t          |          }t          j        ||j                  }g }t          j        d|| j        d| j                   D ]}t          || j        z   |          }| 
                    |||         |          | j        z  }	|                     |	|||                   }
|	dd||f         }|                     |                                |dt          |                             }|
|z   dz  }|                                 |                    |                                           t#          |          t          |          z  }|                                }d	 |D             | _        |S )
z1Calculate the symmetric loss and cache gradients.r   c                6    g | ]}t          j        |          S rG   r   catrH   rs     r(   
<listcomp>zbCachedMultipleNegativesSymmetricRankingLoss.calculate_loss_and_cache_gradients.<locals>.<listcomp>        !A!A!A1%)A,,!A!A!Ar>      NdevicezPreparing cachesrY      c                &    g | ]}d  |D             S )c                    g | ]	}|j         
S rG   )r#   ri   s     r(   rk   zmCachedMultipleNegativesSymmetricRankingLoss.calculate_loss_and_cache_gradients.<locals>.<listcomp>.<listcomp>   s    ***!qv***r>   rG   )rH   rss     r(   rk   zbCachedMultipleNegativesSymmetricRankingLoss.calculate_loss_and_cache_gradients.<locals>.<listcomp>   s'    :::r**r***:::r>   )r   rh   lenarangero   r^   r_   r1   r3   minr/   r-   r;   tr"   appenddetachsumrequires_grad_r   r<   rS   embeddings_aembeddings_b
batch_sizelabelslossesrb   rc   scoresforward_losspositive_scoresbackward_lossloss_mbatchlosss                  r(   "calculate_loss_and_cache_gradientszNCachedMultipleNegativesSymmetricRankingLoss.calculate_loss_and_cache_gradients   s   ya))y!A!AQRR!A!A!ABB&&
j1DEEE%' #..
 
 
 	0 	0A A,,j99A!00ac1BLQQTXT^^F)-)@)@PQRSPS)U)UL$QQQ!VnO*.*A*A/BSBSBUBUW]^t`cds`t`t^tWu*v*vM'-71<K  """MM+,,..////6{{S[[(""$$::T:::
r>   c           	        t          j        |d                   }t          j        d |dd         D                       }t          |          }t          j        ||j                  }g }t          j        d|| j        d| j                   D ]}t          || j        z   |          }| 
                    |||         |          | j        z  }	|                     |	|||                   }
|	dd||f         }|                     |                                |dt          |                             }|
|z   dz  }|                    |           t          |          t          |          z  }|S )	zHCalculate the symmetric loss without caching gradients (for evaluation).r   c                6    g | ]}t          j        |          S rG   rg   ri   s     r(   rk   zNCachedMultipleNegativesSymmetricRankingLoss.calculate_loss.<locals>.<listcomp>   rl   r>   rm   Nrn   zCalculating lossrY   rp   )r   rh   rt   ru   ro   r^   r_   r1   r3   rv   r/   r-   r;   rw   rx   rz   r|   s                  r(   calculate_lossz:CachedMultipleNegativesSymmetricRankingLoss.calculate_loss   s   ya))y!A!AQRR!A!A!ABB&&
j1DEEE%' #..
 
 
 	' 	'A A,,j99A!00ac1BLQQTXT^^F)-)@)@PQRSPS)U)UL$QQQ!VnO*.*A*A/BSBSBUBUW]^t`cds`t`t^tWu*v*vM'-71<KMM+&&&&6{{S[[(r>   r   r   r   c                0   g }g | _         |D ]}g }g }|                     |dd          D ]S\  }}|                    |                                                                           |                    |           T|                    |           | j                             |           t          j                    r@|                     |          }	|	                    t          t          ||                      n|                     |          }	|	S )z"Forward pass of the loss function.FT)r   r   r   )r   r   )r   r   rx   ry   r{   r   is_grad_enabledr   register_hookr   r)   r   )
r<   r   r   rS   r   reps_mbsrandom_state_mbsr$   rB   r   s
             r(   forwardz3CachedMultipleNegativesSymmetricRankingLoss.forward   s/    1 	8 	8H!)-)B)B!1"& *C * * 6 6%
  0 0 ? ? A ABBB ''5555KK!!!%%&67777 "" 	-::4@@Dw~IZeijjjkkkk&&t,,Dr>   dict[str, Any]c                6    | j         | j        j        | j        dS )z+Get the configuration of the loss function.)r-   r/   r1   )r-   r/   __name__r1   )r<   s    r(   get_config_dictz;CachedMultipleNegativesSymmetricRankingLoss.get_config_dict  s'     Z"1:#3
 
 	
r>   )r,   r   r-   r.   r/   r0   r1   r2   r3   r4   r   r   )N)r   r?   r@   r2   rA   r2   r   r4   r   r4   rB   rC   r   rD   )
r   r?   r   r4   r   r4   r   rU   r   rV   )rS   rd   r   r	   )r   r   r   r	   r   r	   )r   r   )r   
__module____qualname__r   cos_simr7   rT   r   r   r   r   r   __classcell__)r=   s   @r(   r   r   (   s         =A\!"'[3 [3 [3 [3 [3 [3 [3H ,0" " " " "0 37% % % % %<       D   :   2
 
 
 
 
 
 
 
r>   )r   r	   r   r   r   r   r   r   )
__future__r   collections.abcr   r   
contextlibr   	functoolsr   typingr   r   r^   r	   r
   sentence_transformersr   r   ?sentence_transformers.losses.CachedMultipleNegativesRankingLossr   sentence_transformers.modelsr   r)   Moduler   rG   r>   r(   <module>r      s   " " " " " " . . . . . . . . " " " " " "                       ; ; ; ; ; ; ; ; W W W W W W 8 8 8 8 8 8% % % %.m
 m
 m
 m
 m
") m
 m
 m
 m
 m
r>   