
    Ng                    l    d dl mZ d dlZd dlZd dlZddlmZ  G d d          Z G d de          ZdS )	    )annotationsN   )InputExamplec                  <    e Zd ZdZddddej        dddfdZdd	Zd
S )STSDataReadera1  Reads in the STS dataset. Each line contains two sentences (s1_col_idx, s2_col_idx) and one label (score_col_idx)

    Default values expects a tab separated file with the first & second column the sentence pair and third column the score (0...1). Default config normalizes scores from 0...5 to 0...1
    r   r      	T   c
                    || _         || _        || _        || _        || _        || _        || _        || _        |	| _        d S )N)	dataset_folderscore_col_idx
s1_col_idx
s2_col_idx	delimiterquotingnormalize_scores	min_score	max_score)
selfr   r   r   r   r   r   r   r   r   s
             g/var/www/html/ai-engine/env/lib/python3.11/site-packages/sentence_transformers/readers/STSDataReader.py__init__zSTSDataReader.__init__   sJ     -*$$" 0""    c           
        t           j                            | j        |          }|                    d          rt          j        |dd          nt          |d          5 }t          j        || j	        | j
                  }g }t          |          D ]\  }}t          || j                           }	| j        r|	| j        z
  | j        | j        z
  z  }	|| j                 }
|| j                 }|                    t)          |t+          |          z   |
|g|	                     |dk    rt-          |          |k    r nd	d	d	           n# 1 swxY w Y   |S )
zJfilename specified which data split to use (train.csv, dev.csv, test.csv).z.gzrtutf8)encodingzutf-8)r   r   )guidtextslabelr   N)ospathjoinr   endswithgzipopencsvreaderr   r   	enumeratefloatr   r   r   r   r   r   appendr   strlen)r   filenamemax_examplesfilepathfIndataexamplesidrowscores1s2s               r   get_exampleszSTSDataReader.get_examples&   s   7<< 3X>>   ''2DIhv6666h111	 69:cT^T\RRRDH$T?? 
 
Cc$"4566( Y"T^38WXE))(SWW2DRQSH\a b b bccc!##H(E(EE#	 	 	 	 	 	 	 	 	 	 	 	 	 	 	& s   #C EEEN)r   )__name__
__module____qualname____doc__r&   
QUOTE_NONEr   r8    r   r   r   r   
   sb          # # # #,     r   r   c                  >     e Zd ZdZddddej        dddf fd	Z xZS )	STSBenchmarkDataReaderzReader especially for the STS benchmark dataset. There, the sentences are in column 5 and 6, the score is in column 4.
    Scores are normalized from 0...5 to 0...1
    r
         r	   Tr   c
                \    t                                          |||||||||		  	         d S )N)	r   r   r   r   r   r   r   r   r   )superr   )r   r   r   r   r   r   r   r   r   r   	__class__s             r   r   zSTSBenchmarkDataReader.__init__D   sK     	)!!'- 	 
	
 
	
 
	
 
	
 
	
r   )r9   r:   r;   r<   r&   r=   r   __classcell__)rE   s   @r   r@   r@   ?   sc          
 
 
 
 
 
 
 
 
 
r   r@   )	
__future__r   r&   r$   r     r   r   r@   r>   r   r   <module>rI      s    " " " " " " 



  				      2 2 2 2 2 2 2 2j
 
 
 
 
] 
 
 
 
 
r   