§
    ÑìNg|  ã                  óp   — d dl mZ d dlmZ d dlmZmZ d dlmZ ddl	m	Z	m
Z
  G d„ dej        ¦  «        Zd	S )
é    )Úannotations)ÚIterable)ÚTensorÚnn)ÚSentenceTransformeré   )ÚBatchHardTripletLossÚ$BatchHardTripletLossDistanceFunctionc                  óX   ‡ — e Zd Zej        dfdˆ fd„Zdd„Zdd„Zedd„¦   «         Z	ˆ xZ
S )ÚBatchAllTripletLossé   Úmodelr   ÚmarginÚfloatÚreturnÚNonec                ór   •— t          ¦   «                              ¦   «          || _        || _        || _        dS )a¦  
        BatchAllTripletLoss takes a batch with (sentence, label) pairs and computes the loss for all possible, valid
        triplets, i.e., anchor and positive must have the same label, anchor and negative a different label. The labels
        must be integers, with same label indicating sentences from the same class. Your train dataset
        must contain at least 2 examples per label class.

        Args:
            model: SentenceTransformer model
            distance_metric: Function that returns a distance between
                two embeddings. The class SiameseDistanceMetric contains
                pre-defined metrics that can be used.
            margin: Negative samples should be at least margin further
                apart from the anchor than the positive.

        References:
            * Source: https://github.com/NegatioN/OnlineMiningTripletLoss/blob/master/online_triplet_loss/losses.py
            * Paper: In Defense of the Triplet Loss for Person Re-Identification, https://arxiv.org/abs/1703.07737
            * Blog post: https://omoindrot.github.io/triplet-loss

        Requirements:
            1. Each sentence must be labeled with a class.
            2. Your dataset must contain at least 2 examples per labels class.

        Inputs:
            +------------------+--------+
            | Texts            | Labels |
            +==================+========+
            | single sentences | class  |
            +------------------+--------+

        Recommendations:
            - Use ``BatchSamplers.GROUP_BY_LABEL`` (:class:`docs <sentence_transformers.training_args.BatchSamplers>`) to
              ensure that each batch contains 2+ examples per label class.

        Relations:
            * :class:`BatchHardTripletLoss` uses only the hardest positive and negative samples, rather than all possible, valid triplets.
            * :class:`BatchHardSoftMarginTripletLoss` uses only the hardest positive and negative samples, rather than all possible, valid triplets.
              Also, it does not require setting a margin.
            * :class:`BatchSemiHardTripletLoss` uses only semi-hard triplets, valid triplets, rather than all possible, valid triplets.

        Example:
            ::

                from sentence_transformers import SentenceTransformer, SentenceTransformerTrainer, losses
                from datasets import Dataset

                model = SentenceTransformer("microsoft/mpnet-base")
                # E.g. 0: sports, 1: economy, 2: politics
                train_dataset = Dataset.from_dict({
                    "sentence": [
                        "He played a great game.",
                        "The stock is up 20%",
                        "They won 2-1.",
                        "The last goal was amazing.",
                        "They all voted against the bill.",
                    ],
                    "label": [0, 1, 0, 0, 2],
                })
                loss = losses.BatchAllTripletLoss(model)

                trainer = SentenceTransformerTrainer(
                    model=model,
                    train_dataset=train_dataset,
                    loss=loss,
                )
                trainer.train()

        N)ÚsuperÚ__init__Úsentence_embedderÚtriplet_marginÚdistance_metric)Úselfr   r   r   Ú	__class__s       €úl/var/www/html/ai-engine/env/lib/python3.11/site-packages/sentence_transformers/losses/BatchAllTripletLoss.pyr   zBatchAllTripletLoss.__init__   s:   ø€ õT 	‰Œ×ÒÑÔÐØ!&ˆÔØ$ˆÔØ.ˆÔÐÐó    Úsentence_featuresúIterable[dict[str, Tensor]]Úlabelsr   c                óp   — |                       |d         ¦  «        d         }|                      ||¦  «        S )Nr   Úsentence_embedding)r   Úbatch_all_triplet_loss)r   r   r   Úreps       r   ÚforwardzBatchAllTripletLoss.forward\   s7   € Ø×$Ò$Ð%6°qÔ%9Ñ:Ô:Ð;OÔPˆØ×*Ò*¨6°3Ñ7Ô7Ð7r   Ú
embeddingsc                ó|  — |                       |¦  «        }|                     d¦  «        }|                     d¦  «        }||z
  | j        z   }t          j        |¦  «        }|                     ¦   «         |z  }d||dk     <   ||dk             }|                     d¦  «        }	|                     ¦   «         |	dz   z  }|S )a]  Build the triplet loss over a batch of embeddings.
        We generate all the valid triplets and average the loss over the positive ones.
        Args:
            labels: labels of the batch, of size (batch_size,)
            embeddings: tensor of shape (batch_size, embed_dim)
            margin: margin for triplet loss
            squared: Boolean. If true, output is the pairwise squared euclidean distance matrix.
                     If false, output is the pairwise euclidean distance matrix.
        Returns:
            Label_Sentence_Triplet: scalar tensor containing the triplet loss
        é   r   r   g¼‰Ø—²Òœ<)r   Ú	unsqueezer   r	   Úget_triplet_maskr   ÚsizeÚsum)
r   r   r%   Úpairwise_distÚanchor_positive_distÚanchor_negative_distÚtriplet_lossÚmaskÚvalid_tripletsÚnum_positive_tripletss
             r   r"   z*BatchAllTripletLoss.batch_all_triplet_loss`   sÒ   € ð ×,Ò,¨ZÑ8Ô8ˆà,×6Ò6°qÑ9Ô9ÐØ,×6Ò6°qÑ9Ô9Ðð ,Ð.BÑBÀTÔEXÑXˆõ $Ô4°VÑ<Ô<ˆØ—z’z‘|”| lÑ2ˆð *+ˆ\ AÒ%Ñ&ð & l°UÒ&:Ô;ˆØ .× 3Ò 3°AÑ 6Ô 6Ðð
 $×'Ò'Ñ)Ô)Ð-BÀUÑ-JÑKˆàÐr   Ústrc                ó   — dS )Na  
@misc{hermans2017defense,
    title={In Defense of the Triplet Loss for Person Re-Identification},
    author={Alexander Hermans and Lucas Beyer and Bastian Leibe},
    year={2017},
    eprint={1703.07737},
    archivePrefix={arXiv},
    primaryClass={cs.CV}
}
© )r   s    r   ÚcitationzBatchAllTripletLoss.citation‹   s   € ð	ð 	r   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r%   r   r   r   )r   r3   )Ú__name__Ú
__module__Ú__qualname__r
   Úeucledian_distancer   r$   r"   Úpropertyr6   Ú__classcell__)r   s   @r   r   r      sž   ø€ € € € € ð =ÔOØð	M/ð M/ð M/ð M/ð M/ð M/ð M/ð^8ð 8ð 8ð 8ð)ð )ð )ð )ðV ð
ð 
ð 
ñ „Xð
ð 
ð 
ð 
ð 
r   r   N)Ú
__future__r   Úcollections.abcr   Útorchr   r   Ú)sentence_transformers.SentenceTransformerr   r	   r
   ÚModuler   r5   r   r   ú<module>rB      s«   ðØ "Ð "Ð "Ð "Ð "Ð "à $Ð $Ð $Ð $Ð $Ð $à Ð Ð Ð Ð Ð Ð Ð à IÐ IÐ IÐ IÐ IÐ Ià \Ð \Ð \Ð \Ð \Ð \Ð \Ð \ðJð Jð Jð Jð J˜"œ)ñ Jô Jð Jð Jð Jr   