o
    S"g                      @  sh   d dl mZ d dlmZ d dlZd dlm  mZ d dlm	Z	mZ d dl
mZmZ G dd dejZdS )    )annotations)IterableN)Tensornn)SentenceTransformerutilc                      sJ   e Zd Z				dd fddZdddZdddZed ddZ  ZS )!MegaBatchMarginLoss皙?333333?T2   modelr   positive_marginfloatnegative_marginuse_mini_batched_versionboolmini_batch_sizeintreturnNonec                   s>   t    || _|| _|| _|| _|r| j| _dS | j| _dS )a  
        Given a large batch (like 500 or more examples) of (anchor_i, positive_i) pairs, find for each pair in the batch
        the hardest negative, i.e. find j != i such that cos_sim(anchor_i, positive_j) is maximal. Then create from this a
        triplet (anchor_i, positive_i, positive_j) where positive_j serves as the negative for this triplet.

        Then train as with the triplet loss.

        Args:
            model: SentenceTransformerModel
            positive_margin: Positive margin, cos(anchor, positive)
                should be > positive_margin
            negative_margin: Negative margin, cos(anchor, negative)
                should be < negative_margin
            use_mini_batched_version: As large batch sizes require a lot
                of memory, we can use a mini-batched version. We break
                down the large batch into smaller batches with fewer
                examples.
            mini_batch_size: Size for the mini-batches. Should be a
                divisor for the batch size in your data loader.

        References:
            - This loss function was inspired by the ParaNMT paper: https://www.aclweb.org/anthology/P18-1042/

        Requirements:
            1. (anchor, positive) pairs
            2. Large batches (500 or more examples)

        Inputs:
            +---------------------------------------+--------+
            | Texts                                 | Labels |
            +=======================================+========+
            | (anchor, positive) pairs              | none   |
            +---------------------------------------+--------+

        Recommendations:
            - Use ``BatchSamplers.NO_DUPLICATES`` (:class:`docs <sentence_transformers.training_args.BatchSamplers>`) to
              ensure that no in-batch negatives are duplicates of the anchor or positive samples.

        Example:
            ::

                from sentence_transformers import SentenceTransformer, SentenceTransformerTrainingArguments, SentenceTransformerTrainer, losses
                from datasets import Dataset

                train_batch_size = 250
                train_mini_batch_size = 32

                model = SentenceTransformer('all-MiniLM-L6-v2')
                train_dataset = Dataset.from_dict({
                    "anchor": [f"This is sentence number {i}" for i in range(500)],
                    "positive": [f"This is sentence number {i}" for i in range(1, 501)],
                })
                loss = losses.MegaBatchMarginLoss(model=model, mini_batch_size=train_mini_batch_size)

                args = SentenceTransformerTrainingArguments(
                    output_dir="output",
                    per_device_train_batch_size=train_batch_size,
                )
                trainer = SentenceTransformerTrainer(
                    model=model,
                    args=args,
                    train_dataset=train_dataset,
                    loss=loss,
                )
                trainer.train()
        N)	super__init__r   r   r   r   forward_mini_batchedforward_non_mini_batchedforward)selfr   r   r   r   r   	__class__ r/mnt/skqttb/ctump_chatbot/chatbot/lib/python3.10/site-packages/sentence_transformers/losses/MegaBatchMarginLoss.pyr      s   
JzMegaBatchMarginLoss.__init__sentence_featuresIterable[dict[str, Tensor]]labelsr   c              	     s  |\ t   }t  | j  | d  }| j  W d    n1 s,w   Y  tjt	|t	||j
d}tdt	|| jD ]| j |  fdd|D d }dd |D }t ! t||}|d|   }	tj|	dd	\}
}W d    n1 sw   Y  |D ]}|D ]}|| | |  qq|D ]}t|| ||< q| fd
d|D d }| |d }|j|jksJ |j|jksJ t||}t||}t| j| t|| j  }| }t	|k r|  qG|S )Nsentence_embedding)devicer   c                   s   i | ]}| |  qS r   r   .0key)anchorend_idx	start_idxr   r   
<dictcomp>l       z<MegaBatchMarginLoss.forward_mini_batched.<locals>.<dictcomp>c                 S  s   i | ]}|g qS r   r   r%   r   r   r   r+   r   s          dimc                   s   i | ]}||   qS r   r   r%   )r)   positiver*   r   r   r+      r,   )listkeystorchno_gradr   evaldetachtraineyelenr$   ranger   r   pytorch_cos_simmaxappendstackshapeFcosine_similarityrelur   r   meanbackward)r   r    r"   feature_namesall_positive_embdiagonal_matrix
anchor_embhard_negative_features
cos_scoresnegative_scoresnegatives_maxnegatives_idshard_negative_idr'   positive_embnegative_emb
pos_cosine
neg_cosinelossesr   )r(   r)   r1   r*   r   r   ^   sP   



 z(MegaBatchMarginLoss.forward_mini_batchedc                   s    fdd|D }|\}}t ||}t|}|dtj|jd|ji  }tj|dd\}	}
t	 j
| t	|	 j  }| S )Nc                   s   g | ]	}  |d  qS )r#   )r   )r&   sentence_featurer   r   r   
<listcomp>   s    z@MegaBatchMarginLoss.forward_non_mini_batched.<locals>.<listcomp>r-   r$   r.   r/   )r   r<   r4   diagonalr9   r@   r$   r=   rA   rC   r   r   rD   )r   r    r"   repsembeddings_aembeddings_brK   positive_scoresrL   rM   _rT   r   rV   r   r      s   
 z,MegaBatchMarginLoss.forward_non_mini_batchedstrc                 C  s   dS )Na  
@inproceedings{wieting-gimpel-2018-paranmt,
    title = "{P}ara{NMT}-50{M}: Pushing the Limits of Paraphrastic Sentence Embeddings with Millions of Machine Translations",
    author = "Wieting, John and Gimpel, Kevin",
    editor = "Gurevych, Iryna and Miyao, Yusuke",
    booktitle = "Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
    month = jul,
    year = "2018",
    address = "Melbourne, Australia",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/P18-1042",
    doi = "10.18653/v1/P18-1042",
    pages = "451--462",
}
r   rV   r   r   r   citation   s   zMegaBatchMarginLoss.citation)r	   r
   Tr   )r   r   r   r   r   r   r   r   r   r   r   r   )r    r!   r"   r   r   r   )r   r^   )	__name__
__module____qualname__r   r   r   propertyr_   __classcell__r   r   r   r   r      s    
Q
9r   )
__future__r   collections.abcr   r4   torch.nn.functionalr   
functionalrA   r   sentence_transformersr   r   Moduler   r   r   r   r   <module>   s    