
    gZ                        d Z ddlZddlZddlmZ ddlmZ ddlmZm	Z	m
Z
 ddlmZ ddlmZmZ d	d
lmZmZmZ  e            rddlZ ej        e          ZdZ	 	 	 	 d/de
ee         df         dede	e         fdZ e            r)edfdej        j        dede	e         dej        j        fdZ	 	 	 	 d/dee         dede	e         fdZ G d de          Z  G d de          Z! G d de          Z" G d de"          Z# G d de          Z$ G d de          Z% G d  d!e          Z& G d" d#e          Z' G d$ d%e          Z( G d& d'e          Z) G d( d)e          Z*d*dd*d*d	d*d*d*d*d+	Z+e$e"e#e!e%e&e'e(e)e*d,
Z,d-d-d-d-d-d.d-d-d-d-d,
Z-dS )0zGLUE processors and helpers    N)asdict)Enum)ListOptionalUnion   )PreTrainedTokenizer)is_tf_availablelogging   )DataProcessorInputExampleInputFeaturesu  This {0} will be removed from the library soon, preprocessing should be handled with the 🤗 Datasets library. You can have a look at this example script for pointers: https://github.com/huggingface/transformers/blob/main/examples/pytorch/text-classification/run_glue.pyexamplesztf.data.Dataset	tokenizer
max_lengthc                 2   t          j        t                              d          t                     t                      rCt          | t          j        j	                  r$|t          d          t          | |||          S t          | |||||          S )a=  
    Loads a data file into a list of `InputFeatures`

    Args:
        examples: List of `InputExamples` or `tf.data.Dataset` containing the examples.
        tokenizer: Instance of a tokenizer that will tokenize the examples
        max_length: Maximum example length. Defaults to the tokenizer's max_len
        task: GLUE task
        label_list: List of labels. Can be obtained from the processor using the `processor.get_labels()` method
        output_mode: String indicating the output mode. Either `regression` or `classification`

    Returns:
        If the `examples` input is a `tf.data.Dataset`, will return a `tf.data.Dataset` containing the task-specific
        features. If the input is a list of `InputExamples`, will return a list of task-specific `InputFeatures` which
        can be fed to the model.

    functionNzWWhen calling glue_convert_examples_to_features from TF, the task parameter is required.r   task)r   r   
label_listoutput_mode)warningswarnDEPRECATION_WARNINGformatFutureWarningr
   
isinstancetfdataDataset
ValueError%_tf_glue_convert_examples_to_features"_glue_convert_examples_to_features)r   r   r   r   r   r   s         ]/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/data/processors/glue.py!glue_convert_examples_to_featuresr&   )   s    2 M%,,Z88-HHH lZ"'/BB l<vwww4XyU_fjkkkk-)
*bm       returnc                 d   t          |                     fd| D             } t          | |||          |dk    rt          j        nt          j        }fd}|j        }t          j        j                            |d |D             |fd |D             t          j	        g           f          S )zb
        Returns:
            A `tf.data.Dataset` containing the task-specific features.

        c                 `    g | ]*}                                         |                    +S  )tfds_mapget_example_from_tensor_dict).0example	processors     r%   
<listcomp>z9_tf_glue_convert_examples_to_features.<locals>.<listcomp>Z   s6    ppp\cI&&y'M'Mg'V'VWWpppr'   r   sts-bc               3      K   D ]H} d t          |                                           D             }|                    d          }||fV  Id S )Nc                     i | ]
\  }}|||S Nr+   )r.   kvs      r%   
<dictcomp>zF_tf_glue_convert_examples_to_features.<locals>.gen.<locals>.<dictcomp>`   s    JJJdaAMQMMMr'   label)r   itemspop)exdr9   featuress      r%   genz2_tf_glue_convert_examples_to_features.<locals>.gen^   sg       ! !JJfRjj&6&6&8&8JJJg%j    ! !r'   c                 (    i | ]}|t           j        S r+   )r   int32r.   r6   s     r%   r8   z9_tf_glue_convert_examples_to_features.<locals>.<dictcomp>h   s    ///aa///r'   c                 :    i | ]}|t          j        d g          S r5   )r   TensorShaperB   s     r%   r8   z9_tf_glue_convert_examples_to_features.<locals>.<dictcomp>i   s&    ===Aa''===r'   )
glue_processorsr&   r   float32int64model_input_namesr    r!   from_generatorrD   )	r   r   r   r   
label_typer?   input_namesr>   r0   s	          @@r%   r#   r#   N   s     $D)++	ppppgoppp4XyU_fjkkk#'7??RZZ
	! 	! 	! 	! 	!  1w--//;///<=====r~b?Q?QR
 
 	
r'   c                    ||j         }|yt          |                     }|4|                                }t                              d| d|            -t
          |         t                              d d|            d t          |          D             dt          dt          t          t          d f         ffdfd| D             } |d	 | D             |d
d          g }t          t          |                     D ]<fdD             }	t          di |	d|         i}
|                    |
           =t          | d d                   D ]d\  }t                              d           t                              d|j                    t                              d|                     e|S )NzUsing label list z
 for task zUsing output mode c                     i | ]\  }}||	S r+   r+   )r.   ir9   s      r%   r8   z6_glue_convert_examples_to_features.<locals>.<dictcomp>   s    @@@ha@@@r'   r/   r(   c                     | j         d S dk    r| j                  S dk    rt          | j                   S t                    )Nclassification
regression)r9   floatKeyError)r/   	label_mapr   s    r%   label_from_examplez>_glue_convert_examples_to_features.<locals>.label_from_example   sR    = 4***W]++L(('''{###r'   c                 &    g | ]} |          S r+   r+   )r.   r/   rU   s     r%   r1   z6_glue_convert_examples_to_features.<locals>.<listcomp>   s%    BBBg  ))BBBr'   c                 *    g | ]}|j         |j        fS r+   )text_atext_b)r.   r/   s     r%   r1   z6_glue_convert_examples_to_features.<locals>.<listcomp>   s!    BBBg'.'.	)BBBr'   r   T)r   padding
truncationc                 .    i | ]}||                  S r+   r+   )r.   r6   batch_encodingrN   s     r%   r8   z6_glue_convert_examples_to_features.<locals>.<dictcomp>   s%    BBBa!^A&q)BBBr'   r9      z*** Example ***zguid: z
features: r+   )model_max_lengthrE   
get_labelsloggerinfoglue_output_modes	enumerater   r   intrR   rangelenr   appendguid)r   r   r   r   r   r   r0   labelsr>   inputsfeaturer/   r]   rN   rU   rT   s        `      @@@@r%   r$   r$   m   s/    /
#D)++	"--//JKKHJHH$HHIII+D1KKKJ[JJDJJKKK@@)J*?*?@@@I$L $U3t;K5L $ $ $ $ $ $ $ CBBBBBBFYBBBBB	  N H3x==!! ! !BBBBB>BBB::&::q	:::    !-- 0 0
7%&&&+W\++,,,.!..////Or'   c                       e Zd ZdZdZdS )
OutputModerP   rQ   N)__name__
__module____qualname__rP   rQ   r+   r'   r%   rn   rn      s        %NJJJr'   rn   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	MrpcProcessorz/Processor for the MRPC data set (GLUE version).c                      t                      j        |i | t          j        t                              d          t                     d S Nr0   super__init__r   r   r   r   r   selfargskwargs	__class__s      r%   rx   zMrpcProcessor.__init__   F    $)&))))00==}MMMMMr'   c           	      L   t          |d                                         |d                                                             d          |d                                                             d          t          |d                                                             S See base class.idx	sentence1utf-8	sentence2r9   r   numpydecodestrrz   tensor_dicts     r%   r-   z*MrpcProcessor.get_example_from_tensor_dict       $$&&$**,,33G<<$**,,33G<<G$**,,--	
 
 	
r'   c                    t                               dt          j                            |d                      |                     |                     t          j                            |d                    d          S )r   zLOOKING AT 	train.tsvtrain)ra   rb   ospathjoin_create_examples	_read_tsvrz   data_dirs     r%   get_train_examplesz MrpcProcessor.get_train_examples   s`    G"',,x"E"EGGHHH$$T^^BGLL;4W4W%X%XZabbbr'   c                     |                      |                     t          j                            |d                    d          S r   zdev.tsvdevr   r   r   r   r   r   s     r%   get_dev_exampleszMrpcProcessor.get_dev_examples   5    $$T^^BGLL94U4U%V%VX]^^^r'   c                     |                      |                     t          j                            |d                    d          S r   ztest.tsvtestr   r   s     r%   get_test_exampleszMrpcProcessor.get_test_examples   5    $$T^^BGLL:4V4V%W%WY_```r'   c                 
    ddgS r   01r+   rz   s    r%   r`   zMrpcProcessor.get_labels       Szr'   c           	          g }t          |          D ]Y\  }}|dk    r| d| }|d         }|d         }|dk    rdn|d         }	|                    t          ||||	                     Z|S )5Creates examples for the training, dev and test sets.r   -r      r   Nri   rX   rY   r9   rd   rh   r   
rz   linesset_typer   rN   lineri   rX   rY   r9   s
             r%   r   zMrpcProcessor._create_examples   s     '' 	` 	`GAtAvv$$$$D!WF!WF$..DDDGEOOLd6&X]^^^____r'   ro   rp   rq   __doc__rx   r-   r   r   r   r`   r   __classcell__r}   s   @r%   rs   rs      s        99N N N N N
 
 
c c c
_ _ _a a a        r'   rs   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	MnliProcessorz3Processor for the MultiNLI data set (GLUE version).c                      t                      j        |i | t          j        t                              d          t                     d S ru   rv   ry   s      r%   rx   zMnliProcessor.__init__   r~   r'   c           	      L   t          |d                                         |d                                                             d          |d                                                             d          t          |d                                                             S )r   r   premiser   
hypothesisr9   r   r   s     r%   r-   z*MnliProcessor.get_example_from_tensor_dict   s    $$&&	"((**11'::%++--44W==G$**,,--	
 
 	
r'   c                     |                      |                     t          j                            |d                    d          S r   r   r   r   r   s     r%   r   z MnliProcessor.get_train_examples   5    $$T^^BGLL;4W4W%X%XZabbbr'   c                     |                      |                     t          j                            |d                    d          S )r   zdev_matched.tsvdev_matchedr   r   s     r%   r   zMnliProcessor.get_dev_examples   s6    $$T^^BGLLK\4]4]%^%^`mnnnr'   c                     |                      |                     t          j                            |d                    d          S )r   ztest_matched.tsvtest_matchedr   r   s     r%   r   zMnliProcessor.get_test_examples   s6    $$T^^BGLLK]4^4^%_%_aopppr'   c                 
    g dS )r   )contradiction
entailmentneutralr+   r   s    r%   r`   zMnliProcessor.get_labels   s    9999r'   c           	         g }t          |          D ]n\  }}|dk    r| d|d          }|d         }|d         }|                    d          rdn|d         }	|                    t          ||||	                     o|S )	r   r   r      	   r   Nr   )rd   
startswithrh   r   r   s
             r%   r   zMnliProcessor._create_examples   s     '' 	` 	`GAtAvv**a**D!WF!WF$//77EDDT"XEOOLd6&X]^^^____r'   r   r   s   @r%   r   r      s        ==N N N N N
 
 
c c co o oq q q: : :      r'   r   c                   .     e Zd ZdZ fdZd Zd Z xZS )MnliMismatchedProcessorz>Processor for the MultiNLI Mismatched data set (GLUE version).c                      t                      j        |i | t          j        t                              d          t                     d S ru   rv   ry   s      r%   rx   z MnliMismatchedProcessor.__init__	  r~   r'   c                     |                      |                     t          j                            |d                    d          S )r   zdev_mismatched.tsvdev_mismatchedr   r   s     r%   r   z(MnliMismatchedProcessor.get_dev_examples  s6    $$T^^BGLLK_4`4`%a%acstttr'   c                     |                      |                     t          j                            |d                    d          S )r   ztest_mismatched.tsvtest_mismatchedr   r   s     r%   r   z)MnliMismatchedProcessor.get_test_examples  s6    $$T^^BGLLK`4a4a%b%bduvvvr'   )ro   rp   rq   r   rx   r   r   r   r   s   @r%   r   r     sk        HHN N N N Nu u uw w w w w w wr'   r   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	ColaProcessorz/Processor for the CoLA data set (GLUE version).c                      t                      j        |i | t          j        t                              d          t                     d S ru   rv   ry   s      r%   rx   zColaProcessor.__init__  r~   r'   c           	          t          |d                                         |d                                                             d          dt          |d                                                             S r   r   sentencer   Nr9   r   r   s     r%   r-   z*ColaProcessor.get_example_from_tensor_dict  i    $$&&
#))++227;;G$**,,--	
 
 	
r'   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r%   r   z ColaProcessor.get_train_examples&  r   r'   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r%   r   zColaProcessor.get_dev_examples*  r   r'   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r%   r   zColaProcessor.get_test_examples.  r   r'   c                 
    ddgS r   r+   r   s    r%   r`   zColaProcessor.get_labels2  r   r'   c           	          |dk    }|r
|dd         }|rdnd}g }t          |          D ]F\  }}| d| }||         }	|rdn|d         }
|                    t          ||	d|
                     G|S )r   r   r   Nr   r   r   r   )rz   r   r   	test_mode
text_indexr   rN   r   ri   rX   r9   s              r%   r   zColaProcessor._create_examples6  s    &	 	!""IE#*QQ
 '' 	^ 	^GAt$$$$D*%F%2DD47EOOLd6$V[\\\]]]]r'   r   r   s   @r%   r   r     s        99N N N N N
 
 
c c c_ _ _a a a        r'   r   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	Sst2Processorz0Processor for the SST-2 data set (GLUE version).c                      t                      j        |i | t          j        t                              d          t                     d S ru   rv   ry   s      r%   rx   zSst2Processor.__init__H  r~   r'   c           	          t          |d                                         |d                                                             d          dt          |d                                                             S r   r   r   s     r%   r-   z*Sst2Processor.get_example_from_tensor_dictL  r   r'   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r%   r   z Sst2Processor.get_train_examplesU  r   r'   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r%   r   zSst2Processor.get_dev_examplesY  r   r'   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r%   r   zSst2Processor.get_test_examples]  r   r'   c                 
    ddgS r   r+   r   s    r%   r`   zSst2Processor.get_labelsa  r   r'   c           	          g }|dk    rdnd}t          |          D ]Q\  }}|dk    r| d| }||         }|dk    rdn|d         }	|                    t          ||d|	                     R|S )r   r   r   r   r   Nr   r   )
rz   r   r   r   r   rN   r   ri   rX   r9   s
             r%   r   zSst2Processor._create_examplese  s    "f,,QQ!
 '' 	^ 	^GAtAvv$$$$D*%F$..DDDGEOOLd6$V[\\\]]]]r'   r   r   s   @r%   r   r   E  s        ::N N N N N
 
 
c c c_ _ _a a a        r'   r   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	StsbProcessorz0Processor for the STS-B data set (GLUE version).c                      t                      j        |i | t          j        t                              d          t                     d S ru   rv   ry   s      r%   rx   zStsbProcessor.__init__v  r~   r'   c           	      L   t          |d                                         |d                                                             d          |d                                                             d          t          |d                                                             S r   r   r   s     r%   r-   z*StsbProcessor.get_example_from_tensor_dictz  r   r'   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r%   r   z StsbProcessor.get_train_examples  r   r'   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r%   r   zStsbProcessor.get_dev_examples  r   r'   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r%   r   zStsbProcessor.get_test_examples  r   r'   c                     dgS )r   Nr+   r   s    r%   r`   zStsbProcessor.get_labels  s	    vr'   c           	          g }t          |          D ]_\  }}|dk    r| d|d          }|d         }|d         }|dk    rdn|d         }	|                    t          ||||	                     `|S )	r   r   r      r   r   Nr   r   r   r   s
             r%   r   zStsbProcessor._create_examples       '' 	` 	`GAtAvv**a**D!WF!WF$..DDDHEOOLd6&X]^^^____r'   r   r   s   @r%   r   r   s  s        ::N N N N N
 
 
c c c_ _ _a a a        r'   r   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	QqpProcessorz.Processor for the QQP data set (GLUE version).c                      t                      j        |i | t          j        t                              d          t                     d S ru   rv   ry   s      r%   rx   zQqpProcessor.__init__  r~   r'   c           	      L   t          |d                                         |d                                                             d          |d                                                             d          t          |d                                                             S )r   r   	question1r   	question2r9   r   r   s     r%   r-   z)QqpProcessor.get_example_from_tensor_dict  r   r'   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r%   r   zQqpProcessor.get_train_examples  r   r'   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r%   r   zQqpProcessor.get_dev_examples  r   r'   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r%   r   zQqpProcessor.get_test_examples  r   r'   c                 
    ddgS r   r+   r   s    r%   r`   zQqpProcessor.get_labels  r   r'   c           	      (   |dk    }|rdnd}|rdnd}g }t          |          D ]m\  }}|dk    r| d|d          }		 ||         }
||         }|rdn|d	         }n# t          $ r Y Cw xY w|                    t          |	|
||
                     n|S )r   r   r   r      r   r   r   Nr^   r   )rd   
IndexErrorrh   r   )rz   r   r   r   q1_indexq2_indexr   rN   r   ri   rX   rY   r9   s                r%   r   zQqpProcessor._create_examples  s    &	!(11q!(11q '' 
	` 
	`GAtAvv**a**Dhh )6tAw   OOLd6&X]^^^____s   A
A('A(r   r   s   @r%   r   r     s        88N N N N N
 
 
c c c_ _ _a a a        r'   r   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	QnliProcessorz/Processor for the QNLI data set (GLUE version).c                      t                      j        |i | t          j        t                              d          t                     d S ru   rv   ry   s      r%   rx   zQnliProcessor.__init__  r~   r'   c           	      L   t          |d                                         |d                                                             d          |d                                                             d          t          |d                                                             S )r   r   questionr   r   r9   r   r   s     r%   r-   z*QnliProcessor.get_example_from_tensor_dict  s    $$&&
#))++227;;
#))++227;;G$**,,--	
 
 	
r'   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r%   r   z QnliProcessor.get_train_examples  r   r'   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r%   r   zQnliProcessor.get_dev_examples  r   r'   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r%   r   zQnliProcessor.get_test_examples  r   r'   c                 
    ddgS r   r   not_entailmentr+   r   s    r%   r`   zQnliProcessor.get_labels      .//r'   c           	          g }t          |          D ]_\  }}|dk    r| d|d          }|d         }|d         }|dk    rdn|d         }	|                    t          ||||	                     `|S 	r   r   r   r   r  r   Nr   r   r   r   s
             r%   r   zQnliProcessor._create_examples  r   r'   r   r   s   @r%   r  r    s        99N N N N N
 
 
c c c_ _ _a a a0 0 0      r'   r  c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	RteProcessorz.Processor for the RTE data set (GLUE version).c                      t                      j        |i | t          j        t                              d          t                     d S ru   rv   ry   s      r%   rx   zRteProcessor.__init__  r~   r'   c           	      L   t          |d                                         |d                                                             d          |d                                                             d          t          |d                                                             S r   r   r   s     r%   r-   z)RteProcessor.get_example_from_tensor_dict
  r   r'   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r%   r   zRteProcessor.get_train_examples  r   r'   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r%   r   zRteProcessor.get_dev_examples  r   r'   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r%   r   zRteProcessor.get_test_examples  r   r'   c                 
    ddgS r  r+   r   s    r%   r`   zRteProcessor.get_labels  r  r'   c           	          g }t          |          D ]_\  }}|dk    r| d|d          }|d         }|d         }|dk    rdn|d         }	|                    t          ||||	                     `|S r  r   r   s
             r%   r   zRteProcessor._create_examples#  r   r'   r   r   s   @r%   r  r    s        88N N N N N
 
 
c c c_ _ _a a a0 0 0      r'   r  c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	WnliProcessorz/Processor for the WNLI data set (GLUE version).c                      t                      j        |i | t          j        t                              d          t                     d S ru   rv   ry   s      r%   rx   zWnliProcessor.__init__4  r~   r'   c           	      L   t          |d                                         |d                                                             d          |d                                                             d          t          |d                                                             S r   r   r   s     r%   r-   z*WnliProcessor.get_example_from_tensor_dict8  r   r'   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r%   r   z WnliProcessor.get_train_examplesA  r   r'   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r%   r   zWnliProcessor.get_dev_examplesE  r   r'   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r%   r   zWnliProcessor.get_test_examplesI  r   r'   c                 
    ddgS r   r+   r   s    r%   r`   zWnliProcessor.get_labelsM  r   r'   c           	          g }t          |          D ]_\  }}|dk    r| d|d          }|d         }|d         }|dk    rdn|d         }	|                    t          ||||	                     `|S r  r   r   s
             r%   r   zWnliProcessor._create_examplesQ  r   r'   r   r   s   @r%   r  r  1  s        99N N N N N
 
 
c c c_ _ _a a a        r'   r  r  )	colamnlimrpcsst-2r2   qqpqnlirtewnli)
r%  r&  zmnli-mmr'  r(  r2   r)  r*  r+  r,  rP   rQ   )NNNN).r   r   r   dataclassesr   enumr   typingr   r   r   tokenization_utilsr	   utilsr
   r   r   r   r   
tensorflowr   
get_loggerro   ra   r   re   r&   r   r    r!   r#   r$   rn   rs   r   r   r   r   r   r   r  r  r  glue_tasks_num_labelsrE   rc   r+   r'   r%   <module>r5     sQ    " ! 				              ( ( ( ( ( ( ( ( ( ( 5 5 5 5 5 5 - - - - - - - - = = = = = = = = = = ? 		H	%	%m  !%	   D&(99: "         F ? 

 $(	
 
'/
&
 SM	

 

 
 
 
D !%	4 4< 4"4 4 4 4 4n       
, , , , ,M , , ,^+ + + + +M + + +\w w w w wm w w w , , , , ,M , , ,^+ + + + +M + + +\+ + + + +M + + +\1 1 1 1 1= 1 1 1h+ + + + +M + + +\+ + + + += + + +\+ + + + +M + + +^ 
 
  &      r'   