
    Ng                        d dl mZ d dlmZ d dlmZ d dlZd dlmZmZ d dl	m
Z
 d dlmZ  G d d	ej                  ZdS )
    )annotations)Iterable)AnyN)Tensornn)util)SentenceTransformerc                  X     e Zd Zdej        fd fdZddZddZedd            Z	 xZ
S )MultipleNegativesRankingLossg      4@modelr	   scalefloatreturnNonec                    t                                                       || _        || _        || _        t          j                    | _        dS )ad  
        This loss expects as input a batch consisting of sentence pairs ``(a_1, p_1), (a_2, p_2)..., (a_n, p_n)``
        where we assume that ``(a_i, p_i)`` are a positive pair and ``(a_i, p_j)`` for ``i != j`` a negative pair.

        For each ``a_i``, it uses all other ``p_j`` as negative samples, i.e., for ``a_i``, we have 1 positive example
        (``p_i``) and ``n-1`` negative examples (``p_j``). It then minimizes the negative log-likehood for softmax
        normalized scores.

        This loss function works great to train embeddings for retrieval setups where you have positive pairs
        (e.g. (query, relevant_doc)) as it will sample in each batch ``n-1`` negative docs randomly.

        The performance usually increases with increasing batch sizes.

        You can also provide one or multiple hard negatives per anchor-positive pair by structuring the data like this:
        ``(a_1, p_1, n_1), (a_2, p_2, n_2)``. Then, ``n_1`` is a hard negative for ``(a_1, p_1)``. The loss will use for
        the pair ``(a_i, p_i)`` all ``p_j`` for ``j != i`` and all ``n_j`` as negatives.

        Args:
            model: SentenceTransformer model
            scale: Output of similarity function is multiplied by scale
                value
            similarity_fct: similarity function between sentence
                embeddings. By default, cos_sim. Can also be set to dot
                product (and then set scale to 1)

        References:
            - Efficient Natural Language Response Suggestion for Smart Reply, Section 4.4: https://arxiv.org/pdf/1705.00652.pdf
            - `Training Examples > Natural Language Inference <../../examples/training/nli/README.html>`_
            - `Training Examples > Paraphrase Data <../../examples/training/paraphrases/README.html>`_
            - `Training Examples > Quora Duplicate Questions <../../examples/training/quora_duplicate_questions/README.html>`_
            - `Training Examples > MS MARCO <../../examples/training/ms_marco/README.html>`_
            - `Unsupervised Learning > SimCSE <../../examples/unsupervised_learning/SimCSE/README.html>`_
            - `Unsupervised Learning > GenQ <../../examples/unsupervised_learning/query_generation/README.html>`_

        Requirements:
            1. (anchor, positive) pairs or (anchor, positive, negative) triplets

        Inputs:
            +-------------------------------------------------+--------+
            | Texts                                           | Labels |
            +=================================================+========+
            | (anchor, positive) pairs                        | none   |
            +-------------------------------------------------+--------+
            | (anchor, positive, negative) triplets           | none   |
            +-------------------------------------------------+--------+
            | (anchor, positive, negative_1, ..., negative_n) | none   |
            +-------------------------------------------------+--------+

        Recommendations:
            - Use ``BatchSamplers.NO_DUPLICATES`` (:class:`docs <sentence_transformers.training_args.BatchSamplers>`) to
              ensure that no in-batch negatives are duplicates of the anchor or positive samples.

        Relations:
            - :class:`CachedMultipleNegativesRankingLoss` is equivalent to this loss, but it uses caching that allows for
              much higher batch sizes (and thus better performance) without extra memory usage. However, it is slightly
              slower.
            - :class:`MultipleNegativesSymmetricRankingLoss` is equivalent to this loss, but with an additional loss term.
            - :class:`GISTEmbedLoss` is equivalent to this loss, but uses a guide model to guide the in-batch negative
              sample selection. `GISTEmbedLoss` yields a stronger training signal at the cost of some training overhead.

        Example:
            ::

                from sentence_transformers import SentenceTransformer, SentenceTransformerTrainer, losses
                from datasets import Dataset

                model = SentenceTransformer("microsoft/mpnet-base")
                train_dataset = Dataset.from_dict({
                    "anchor": ["It's nice weather outside today.", "He drove to work."],
                    "positive": ["It's so sunny.", "He took the car to the office."],
                })
                loss = losses.MultipleNegativesRankingLoss(model)

                trainer = SentenceTransformerTrainer(
                    model=model,
                    train_dataset=train_dataset,
                    loss=loss,
                )
                trainer.train()
        N)super__init__r   r   similarity_fctr   CrossEntropyLosscross_entropy_loss)selfr   r   r   	__class__s       u/var/www/html/ai-engine/env/lib/python3.11/site-packages/sentence_transformers/losses/MultipleNegativesRankingLoss.pyr   z%MultipleNegativesRankingLoss.__init__   sI    b 	

,"$"5"7"7    sentence_featuresIterable[dict[str, Tensor]]labelsr   c                .     fd|D             }|d         }t          j        |dd                    }                     ||           j        z  }t          j        d|                    d          |j                  }                     ||          S )Nc                F    g | ]}                     |          d          S )sentence_embedding)r   ).0sentence_featurer   s     r   
<listcomp>z8MultipleNegativesRankingLoss.forward.<locals>.<listcomp>f   s-    mmmGW

+,,-ABmmmr   r      )device)torchcatr   r   arangesizer%   r   )r   r   r   repsembeddings_aembeddings_bscoresrange_labelss   `       r   forwardz$MultipleNegativesRankingLoss.forwarde   s    mmmm[lmmmAwyabb**$$\<@@4:M|Av{{1~~fmLLL&&v|<<<r   dict[str, Any]c                *    | j         | j        j        dS )N)r   r   )r   r   __name__r   s    r   get_config_dictz,MultipleNegativesRankingLoss.get_config_dicto   s    t7J7STTTr   strc                    dS )Nat  
@misc{henderson2017efficient,
    title={Efficient Natural Language Response Suggestion for Smart Reply},
    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
    year={2017},
    eprint={1705.00652},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}
 r3   s    r   citationz%MultipleNegativesRankingLoss.citationr   s    	 	r   )r   r	   r   r   r   r   )r   r   r   r   r   r   )r   r0   )r   r5   )r2   
__module____qualname__r   cos_simr   r/   r4   propertyr8   __classcell__)r   s   @r   r   r      s        BFW[Wc U8 U8 U8 U8 U8 U8 U8n= = = =U U U U 
 
 
 X
 
 
 
 
r   r   )
__future__r   collections.abcr   typingr   r&   r   r   sentence_transformersr   )sentence_transformers.SentenceTransformerr	   Moduler   r7   r   r   <module>rD      s    " " " " " " $ $ $ $ $ $                & & & & & & I I I I I Ip p p p p29 p p p p pr   