
    Ng                     ~    d dl mZ d dlmZ d dlZd dlmc mZ d dlm	Z	mZ d dl
mZmZ  G d dej                  ZdS )    )annotations)IterableN)Tensornn)SentenceTransformerutilc                  R     e Zd Z	 	 	 	 dd fdZddZddZedd            Z xZS )MegaBatchMarginLoss皙?333333?T2   modelr   positive_marginfloatnegative_marginuse_mini_batched_versionboolmini_batch_sizeintreturnNonec                    t                                                       || _        || _        || _        || _        |r| j        n| j        | _        dS )a  
        Given a large batch (like 500 or more examples) of (anchor_i, positive_i) pairs, find for each pair in the batch
        the hardest negative, i.e. find j != i such that cos_sim(anchor_i, positive_j) is maximal. Then create from this a
        triplet (anchor_i, positive_i, positive_j) where positive_j serves as the negative for this triplet.

        Then train as with the triplet loss.

        Args:
            model: SentenceTransformerModel
            positive_margin: Positive margin, cos(anchor, positive)
                should be > positive_margin
            negative_margin: Negative margin, cos(anchor, negative)
                should be < negative_margin
            use_mini_batched_version: As large batch sizes require a lot
                of memory, we can use a mini-batched version. We break
                down the large batch into smaller batches with fewer
                examples.
            mini_batch_size: Size for the mini-batches. Should be a
                devisor for the batch size in your data loader.

        References:
            - This loss function was inspired by the ParaNMT paper: https://www.aclweb.org/anthology/P18-1042/

        Requirements:
            1. (anchor, positive) pairs
            2. Large batches (500 or more examples)

        Inputs:
            +---------------------------------------+--------+
            | Texts                                 | Labels |
            +=======================================+========+
            | (anchor, positive) pairs              | none   |
            +---------------------------------------+--------+

        Recommendations:
            - Use ``BatchSamplers.NO_DUPLICATES`` (:class:`docs <sentence_transformers.training_args.BatchSamplers>`) to
              ensure that no in-batch negatives are duplicates of the anchor or positive samples.

        Example:
            ::

                from sentence_transformers import SentenceTransformer, SentenceTransformerTrainingArguments, SentenceTransformerTrainer, losses
                from datasets import Dataset

                train_batch_size = 250
                train_mini_batch_size = 32

                model = SentenceTransformer('all-MiniLM-L6-v2')
                train_dataset = Dataset.from_dict({
                    "anchor": [f"This is sentence number {i}" for i in range(500)],
                    "positive": [f"This is sentence number {i}" for i in range(1, 501)],
                })
                loss = losses.MegaBatchMarginLoss(model=model, mini_batch_size=train_mini_batch_size)

                args = SentenceTransformerTrainingArguments(
                    output_dir="output",
                    per_device_train_batch_size=train_batch_size,
                )
                trainer = SentenceTransformerTrainer(
                    model=model,
                    args=args,
                    train_dataset=train_dataset,
                    loss=loss,
                )
                trainer.train()
        N)	super__init__r   r   r   r   forward_mini_batchedforward_non_mini_batchedforward)selfr   r   r   r   r   	__class__s         l/var/www/html/ai-engine/env/lib/python3.11/site-packages/sentence_transformers/losses/MegaBatchMarginLoss.pyr   zMegaBatchMarginLoss.__init__   sW    T 	
...4Lot00RVRo    sentence_featuresIterable[dict[str, Tensor]]labelsr   c                ,   |\  t                                                    }t          j                    5  | j                                         |                               d                                         }| j                                         d d d            n# 1 swxY w Y   t          j        t          |          t          |          |j
                  }t          dt          |          | j                  D ]| j        z   |                     fd|D                       d         }d |D             }t          j                    5  t          j        ||          }|d|         z  z
  }	t          j        |	d          \  }
}d d d            n# 1 swxY w Y   |D ].}|D ])}||                             |         |                    */|D ]}t          j        ||                   ||<    |                     fd	|D                       d         }|                     |          d         }|j        |j        k    sJ |j        |j        k    sJ t'          j        ||          }t'          j        ||          }t'          j        | j        |z
            t'          j        || j        z
            z   }|                                }t          |          k     r|                                 |S )
Nsentence_embedding)devicer   c                2    i | ]}||                  S  r)   ).0keyanchorend_idx	start_idxs     r    
<dictcomp>z<MegaBatchMarginLoss.forward_mini_batched.<locals>.<dictcomp>l   s)    $b$b$bSS&+i6G*H$b$b$br!   c                    i | ]}|g S r)   r)   )r*   r+   s     r    r/   z<MegaBatchMarginLoss.forward_mini_batched.<locals>.<dictcomp>r   s    %G%G%G#c2%G%G%Gr!         dimc                2    i | ]}||                  S r)   r)   )r*   r+   r-   positiver.   s     r    r/   z<MegaBatchMarginLoss.forward_mini_batched.<locals>.<dictcomp>   s*    &f&f&fQTsHSM)G:K,L&f&f&fr!   )listkeystorchno_gradr   evaldetachtraineyelenr'   ranger   r   pytorch_cos_simmaxappendstackshapeFcosine_similarityrelur   r   meanbackward)r   r"   r$   feature_namesall_positive_embdiagonal_matrix
anchor_embhard_negative_features
cos_scoresnegative_scoresnegatives_maxnegatives_idshard_negative_idr+   positive_embnegative_emb
pos_cosine
neg_cosinelossesr,   r-   r6   r.   s                      @@@@r    r   z(MegaBatchMarginLoss.forward_mini_batched^   s   ,V[[]]++]__ 	 	JOO#zz(334HIPPRRJ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	
  )C(8$9$93?O;P;PYiYpqqq q#&6"7"79MNN (	" (	"I$"66G$b$b$b$b$b$bTa$b$b$bcc$J &H%G%G%G%G" Q Q!1*>NOO
_Yw5F%G!GG   05ya/P/P/P,}Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q %2 X X ( X XC*3/66x}EU7VWWWWX % W W.3k:PQT:U.V.V&s++  ::&f&f&f&f&f&fXe&f&f&fgg$L  ::&<==>RSL#|'99999#|'99999 ,ZFFJ,ZFFJVD0:=>>
UYUiHiAjAjjF[[]]F Z((!!!s$   A B**B.1B.#?F..F2	5F2	c                     fd|D             }|\  }}t          j        ||          }t          j        |          }|dt          j        |j        d|j        iz  z
  }t          j        |d          \  }	}
t          j	         j
        |z
            t          j	        |	 j        z
            z   }|                                S )Nc                F    g | ]}                     |          d          S )r&   )r   )r*   sentence_featurer   s     r    
<listcomp>z@MegaBatchMarginLoss.forward_non_mini_batched.<locals>.<listcomp>   s-    mmmGW

+,,-ABmmmr!   r1   r'   r2   r3   )r   rA   r9   diagonalr>   rE   r'   rB   rF   rH   r   r   rI   )r   r"   r$   repsembeddings_aembeddings_brP   positive_scoresrQ   rR   _rY   s   `           r    r   z,MegaBatchMarginLoss.forward_non_mini_batched   s    mmmm[lmmm%)"l),EE
.44$	:+FJ4EFFF
 !9_!<<<q,>??!&Y]YmImBnBnn{{}}r!   strc                    dS )Na  
@inproceedings{wieting-gimpel-2018-paranmt,
    title = "{P}ara{NMT}-50{M}: Pushing the Limits of Paraphrastic Sentence Embeddings with Millions of Machine Translations",
    author = "Wieting, John and Gimpel, Kevin",
    editor = "Gurevych, Iryna and Miyao, Yusuke",
    booktitle = "Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
    month = jul,
    year = "2018",
    address = "Melbourne, Australia",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/P18-1042",
    doi = "10.18653/v1/P18-1042",
    pages = "451--462",
}
r)   )r   s    r    citationzMegaBatchMarginLoss.citation   s     r!   )r   r   Tr   )r   r   r   r   r   r   r   r   r   r   r   r   )r"   r#   r$   r   r   r   )r   rd   )	__name__
__module____qualname__r   r   r   propertyrf   __classcell__)r   s   @r    r
   r
      s         "%!$)-!Op Op Op Op Op Op Opb6 6 6 6r       X    r!   r
   )
__future__r   collections.abcr   r9   torch.nn.functionalr   
functionalrF   r   sentence_transformersr   r   Moduler
   r)   r!   r    <module>rr      s    " " " " " " $ $ $ $ $ $                   ; ; ; ; ; ; ; ;h h h h h") h h h h hr!   