
    Ngt                    j    d dl mZ d dlmZ d dlZd dlmZ d dlmZ ddlmZm	Z	  G d d	e          Z
dS )
    )annotations)IterableN)Tensor)SentenceTransformer   )BatchHardTripletLoss$BatchHardTripletLossDistanceFunctionc                  V     e Zd Zej        fd fdZdd
ZddZedd            Z	 xZ
S )BatchHardSoftMarginTripletLossmodelr   returnNonec                f    t                                          |           || _        || _        dS )a  
        BatchHardSoftMarginTripletLoss takes a batch with (sentence, label) pairs and computes the loss for all possible, valid
        triplets, i.e., anchor and positive must have the same label, anchor and negative a different label. The labels
        must be integers, with same label indicating sentences from the same class. Your train dataset
        must contain at least 2 examples per label class. This soft-margin variant does not require setting a margin.

        Args:
            model: SentenceTransformer model
            distance_metric: Function that returns a distance between
                two embeddings. The class SiameseDistanceMetric contains
                pre-defined metrics that can be used.

        Definitions:
            :Easy triplets: Triplets which have a loss of 0 because
                ``distance(anchor, positive) + margin < distance(anchor, negative)``.
            :Hard triplets: Triplets where the negative is closer to the anchor than the positive, i.e.,
                ``distance(anchor, negative) < distance(anchor, positive)``.
            :Semi-hard triplets: Triplets where the negative is not closer to the anchor than the positive, but which
                still have a positive loss, i.e., ``distance(anchor, positive) < distance(anchor, negative) + margin``.

        References:
            * Source: https://github.com/NegatioN/OnlineMiningTripletLoss/blob/master/online_triplet_loss/losses.py
            * Paper: In Defense of the Triplet Loss for Person Re-Identification, https://arxiv.org/abs/1703.07737
            * Blog post: https://omoindrot.github.io/triplet-loss

        Requirements:
            1. Each sentence must be labeled with a class.
            2. Your dataset must contain at least 2 examples per labels class.
            3. Your dataset should contain hard positives and negatives.

        Inputs:
            +------------------+--------+
            | Texts            | Labels |
            +==================+========+
            | single sentences | class  |
            +------------------+--------+

        Recommendations:
            - Use ``BatchSamplers.GROUP_BY_LABEL`` (:class:`docs <sentence_transformers.training_args.BatchSamplers>`) to
              ensure that each batch contains 2+ examples per label class.

        Relations:
            * :class:`BatchHardTripletLoss` uses a user-specified margin, while this loss does not require setting a margin.

        Example:
            ::

                from sentence_transformers import SentenceTransformer, SentenceTransformerTrainer, losses
                from datasets import Dataset

                model = SentenceTransformer("microsoft/mpnet-base")
                # E.g. 0: sports, 1: economy, 2: politics
                train_dataset = Dataset.from_dict({
                    "sentence": [
                        "He played a great game.",
                        "The stock is up 20%",
                        "They won 2-1.",
                        "The last goal was amazing.",
                        "They all voted against the bill.",
                    ],
                    "label": [0, 1, 0, 0, 2],
                })
                loss = losses.BatchHardSoftMarginTripletLoss(model)

                trainer = SentenceTransformerTrainer(
                    model=model,
                    train_dataset=train_dataset,
                    loss=loss,
                )
                trainer.train()
        N)super__init__sentence_embedderdistance_metric)selfr   r   	__class__s      w/var/www/html/ai-engine/env/lib/python3.11/site-packages/sentence_transformers/losses/BatchHardSoftMarginTripletLoss.pyr   z'BatchHardSoftMarginTripletLoss.__init__   s4    T 	!&.    sentence_featuresIterable[dict[str, Tensor]]labelsr   c                p    |                      |d                   d         }|                     ||          S )Nr   sentence_embedding)r   #batch_hard_triplet_soft_margin_loss)r   r   r   reps       r   forwardz&BatchHardSoftMarginTripletLoss.forward\   s7    $$%6q%9::;OP77DDDr   
embeddingsc                   |                      |          }t          j        |                                          }||z  }|                    dd          \  }}t          j        |                                          }|                    dd          \  }	}||	d|z
  z  z   }
|
                    dd          \  }}t          j        t          j	        ||z
                      }|
                                }|S )a6  Build the triplet loss over a batch of embeddings.
        For each anchor, we get the hardest positive and hardest negative to form a triplet.
        Args:
            labels: labels of the batch, of size (batch_size,)
            embeddings: tensor of shape (batch_size, embed_dim)
            squared: Boolean. If true, output is the pairwise squared euclidean distance matrix.
                     If false, output is the pairwise euclidean distance matrix.
        Returns:
            Label_Sentence_Triplet: scalar tensor containing the triplet loss
        r   T)keepdimg      ?)r   r    get_anchor_positive_triplet_maskfloatmax get_anchor_negative_triplet_maskmintorchlog1pexpmean)r   r   r    pairwise_distmask_anchor_positiveanchor_positive_disthardest_positive_dist_mask_anchor_negativemax_anchor_negative_distanchor_negative_disthardest_negative_disttltriplet_losss                 r   r   zBBatchHardSoftMarginTripletLoss.batch_hard_triplet_soft_margin_lossb   s    ,,Z88  4TU[\\bbdd  4mC $8#;#;At#;#L#L q  4TU[\\bbdd '4&7&74&7&H&H# !,/G3QeKe/ff $8#;#;At#;#L#L q
 [#8;P#PQQRRwwyyr   strc                    dS )Na  
@misc{hermans2017defense,
    title={In Defense of the Triplet Loss for Person Re-Identification},
    author={Alexander Hermans and Lucas Beyer and Bastian Leibe},
    year={2017},
    eprint={1703.07737},
    archivePrefix={arXiv},
    primaryClass={cs.CV}
}
 )r   s    r   citationz'BatchHardSoftMarginTripletLoss.citation   s    	 	r   )r   r   r   r   )r   r   r   r   r   r   )r   r   r    r   r   r   )r   r7   )__name__
__module____qualname__r	   eucledian_distancer   r   r   propertyr:   __classcell__)r   s   @r   r   r      s        :^:qL/ L/ L/ L/ L/ L/ L/\E E E E) ) ) )V 
 
 
 X
 
 
 
 
r   r   )
__future__r   collections.abcr   r(   r   )sentence_transformers.SentenceTransformerr   r   r	   r   r9   r   r   <module>rD      s    " " " " " " $ $ $ $ $ $        I I I I I I \ \ \ \ \ \ \ \K K K K K%9 K K K K Kr   