
    g[N                        d dl Z d dlZd dlZddlmZmZmZmZ ddl	m
Z
mZmZmZmZ  e            rd dlZddlmZmZ  e            rd dlZddlmZmZ  G d d	e
          Z e ed
                     G d de                      ZdS )    N   )add_end_docstringsis_tf_availableis_torch_availablerequires_backends   )ArgumentHandlerDatasetPipelinePipelineExceptionbuild_pipeline_init_args),MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES0MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMES)/TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES3TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMESc                       e Zd ZdZddZdS )%TableQuestionAnsweringArgumentHandlerzB
    Handles arguments for the TableQuestionAnsweringPipeline
    Nc                    t          | d           dd l}|t          d          |]t          |t                    r/|                    d          |                    d          |g}nt          |t                    rt          |          dk    rt          d |D                       st          dd |D                        |d                             d          |d                             d          |}nt          d	|d         	                                 d
          t          t          |t                    st          |t          j                  r|S t          dt          |           d          ||dg}|D ]R}t          |d         |j                  s5|d         t          d          |                    |d                   |d<   S|S )Npandasr   z(Keyword argument `table` cannot be None.querytablec              3   @   K   | ]}t          |t                    V  d S N)
isinstancedict.0ds     k/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/pipelines/table_question_answering.py	<genexpr>zATableQuestionAnsweringArgumentHandler.__call__.<locals>.<genexpr>5   s,      >>1:a..>>>>>>    z:Keyword argument `table` should be a list of dict, but is c              3   4   K   | ]}t          |          V  d S r   )typer   s     r   r    zATableQuestionAnsweringArgumentHandler.__call__.<locals>.<genexpr>7   s-      UmUmbcVZ[\V]V]UmUmUmUmUmUmr!   zIf keyword argument `table` is a list of dictionaries, each dictionary should have a `table` and `query` key, but only dictionary has keys z `table` and `query` keys.zZInvalid input. Keyword argument `table` should be either of type `dict` or `list`, but is ))r   r   zTable cannot be None.)r   r   
ValueErrorr   r   getlistlenallkeysr
   typesGeneratorTyper#   	DataFrame)selfr   r   kwargspdtqa_pipeline_inputstqa_pipeline_inputs          r   __call__z.TableQuestionAnsweringArgumentHandler.__call__%   s2    	$)))=GHHH]%&& 599W+=+=+IeiiX_N`N`Nl',g##E4(( SZZ!^^>>>>>>> $oUmUmglUmUmUmoo   8<<((4qg9N9N9Z*/''$vJOPQ(--//v v v   $E7)C)C$zRWY^YlGmGm$ )u++) ) )  
 .3U#C#C"D"5 	X 	X092<HH X%g.6$%<===.0ll;Mg;V.W.W"7+""r!   )NN)__name__
__module____qualname____doc__r3    r!   r   r   r       s2         -# -# -# -# -# -#r!   r   T)has_tokenizerc                   n     e Zd ZdZdZ e            f fd	Zd Zd Z fdZ	ddZ
dd
ZddZd Z xZS )TableQuestionAnsweringPipelinea  
    Table Question Answering pipeline using a `ModelForTableQuestionAnswering`. This pipeline is only available in
    PyTorch.

    Example:

    ```python
    >>> from transformers import pipeline

    >>> oracle = pipeline(model="google/tapas-base-finetuned-wtq")
    >>> table = {
    ...     "Repository": ["Transformers", "Datasets", "Tokenizers"],
    ...     "Stars": ["36542", "4512", "3934"],
    ...     "Contributors": ["651", "77", "34"],
    ...     "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
    ... }
    >>> oracle(query="How many stars does the transformers repository have?", table=table)
    {'answer': 'AVERAGE > 36542', 'coordinates': [(0, 1)], 'cells': ['36542'], 'aggregator': 'AVERAGE'}
    ```

    Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

    This tabular question answering pipeline can currently be loaded from [`pipeline`] using the following task
    identifier: `"table-question-answering"`.

    The models that this pipeline can use are models that have been fine-tuned on a tabular question answering task.
    See the up-to-date list of available models on
    [huggingface.co/models](https://huggingface.co/models?filter=table-question-answering).
    ztable,queryc                 .    t                      j        |i | || _        | j        dk    r.t	          j                    }|                    t                     n-t          j                    }|                    t                     | 
                    |           t          t          | j        j        dd                     o't          t          | j        j        dd                     | _        t!          | j        j        d          rdnd | _        d S )Ntfaggregation_labelsnum_aggregation_labelstapas)super__init___args_parser	frameworkr   copyupdater   r   r   check_model_typeboolgetattrmodelconfig	aggregatehasattrr#   )r.   args_parserargsr/   mapping	__class__s        r   rB   z'TableQuestionAnsweringPipeline.__init__w   s    $)&)))'>T!!INPPGNNJKKKKFKMMGNNGHHHg&&&gdj&79MtTTUU 
Z^DJ%'?FF[
 [
  'tz'8:NOOYGGUY			r!   c                      | j         di |S )Nr8   )rJ   )r.   inputss     r   batch_inferencez.TableQuestionAnsweringPipeline.batch_inference   s    tz##F###r!   c           	         | j         dk    rDg }g }d}|d         j        d         }|d                             | j                  }|d                             | j                  }|d                             | j                  }d}	t	          |          D ]h}
|W|	dddf         }t          j        |                                                                          }||
         }	t	          |j        d                   D ]}|	dddf         	                                |         }|	dddf         	                                |         dz
  }|	ddd	f         	                                |         dz
  }|dk    r&|dk    r |dk    rt          |||f                   ||<   t          j        |                              t          j                                      | j                  |	dddf<   ||
         }||
         }||
         }	|                     |                    d          |                    d          |	                    d          
          }|j        }| j        r|                    |j                   |                    |           t          j                            |          }|j        |                    t          j                                      |j        j                  z  }t3          j        t6                    t9          |                                	                                          D ]\  }}|	dddf         	                                |         }|	dddf         	                                |         dz
  }|	ddd	f         	                                |         dz
  }|dk    r)|dk    r#|dk    r||f                             |           fdD             }jt          j        t?          |          d          }| j        s|fn#|t          j        t?          |          d          fS g }g }d}|d         j        d         }|d         }|d         }|d                                         }d}	t	          |          D ]}
||	dddf         }t          j        |t
          j                   }||
         }	t	          |j        d                   D ]}|	dddf         	                                |         }|	dddf         	                                |         dz
  }|	ddd	f         	                                |         dz
  }|dk    r&|dk    r |dk    rt          |||f                   ||<   ||	dddf<   ||
         }||
         }||
         }	|                     t          j!        |d          t          j!        |d          t          j!        |	d          
          }|j        }| j        r|                    |j                   |                    |           tD          j#        $                    tE          j%        |tD          j                            tE          j%        |tD          j                  z  }t3          j        t6                    |	}	t9          tE          j        |                                          	                                          D ]\  }}|	dddf         	                                |         }|	dddf         	                                |         dz
  }|	ddd	f         	                                |         dz
  }|dk    r)|dk    r#|dk    r||f                             |           fdD             }tE          j&        t?          |          d          }| j        s|fn#|tE          j&        t?          |          d          fS )z
        Inference used for models that need to process sequences in a sequential fashion, like the SQA models which
        handle conversational query related to a table.
        ptN	input_idsr   attention_masktoken_type_ids   r   r   )rW   rX   rY   )logitsc                 r    i | ]3}|t          j        |                                                   d k    4S g      ?nparraymeanr   keycoords_to_probss     r   
<dictcomp>zGTableQuestionAnsweringPipeline.sequential_inference.<locals>.<dictcomp>   <    lllUXRXoc.B%C%C%H%H%J%JS%Plllr!   )dtype)axisc                 r    i | ]3}|t          j        |                                                   d k    4S r]   r^   rb   s     r   re   zGTableQuestionAnsweringPipeline.sequential_inference.<locals>.<dictcomp>  rf   r!   )'rD   shapetodeviceranger_   
zeros_likecpunumpytolistinttorch
from_numpyr#   longrJ   	unsqueezer[   rL   appendlogits_aggregationdistributions	Bernoulliprobsfloat32collectionsdefaultdictr'   	enumeratesqueezecattupleint32expand_dimsr=   mathsigmoidcastconcat)r.   rS   
all_logitsall_aggregationsprev_answers
batch_sizerW   rX   rY   token_type_ids_exampleindexprev_labels_examplemodel_labelsi
segment_idcol_idrow_idinput_ids_exampleattention_mask_exampleoutputsr[   dist_per_tokenprobabilitiespcolrowlogits_batchrd   s                              @r   sequential_inferencez3TableQuestionAnsweringPipeline.sequential_inference   s%   
 >T!!J!L,215J{+..t{;;I#$4588EEN#$4588EEN%)"z** .m .m  +*@A*F'#%=1D1H1H1J1J1P1P1R1R#S#SL-;E-B*"<#5a#899 R R%;AAAqD%A%H%H%J%J1%M
!71!=!D!D!F!Fq!IA!M!71!=!D!D!F!Fq!IA!M!Q;;6Q;;:??.1,?O2P.Q.QLO383CL3Q3Q3V3VW\Wa3b3b3e3efjfq3r3r*111a40$-e$4!)7)>&)7)>&**/99!<<#9#C#CA#F#F#9#C#CA#F#F %  
 !> H$++G,FGGG!!&)))!&!4!>!>f!>!M!M . 47M7R7RSXS`7a7a7d7d"(/8 8 ! #."9$"?"?%m&;&;&=&=&D&D&F&FGG > >DAq!71!=!D!D!F!Fq!IJ0A6==??BQFC0A6==??BQFCaxxC1HHq'c
3::1===llll\klll 9U:%6%6::L*..sL??|UYW\]mWnWnpqMrMr>ssJ!L,215J{+I#$45N#$45;;==N%)"z** .m .m  +*@A*F'#%=1DBH#U#U#UL-;E-B*"<#5a#899 R R%;AAAqD%A%H%H%J%J1%M
!71!=!D!D!F!Fq!IA!M!71!=!D!D!F!Fq!IA!M!Q;;6Q;;:??.1,?O2P.Q.QLO3?*111a40$-e$4!)7)>&)7)>&** n->QGGG#%>2Hq#Q#Q#Q#%>2Hq#Q#Q#Q %  
 !> H$++G,FGGG!!&))) "
0K0K L Lrw*BJP P ! #."9$"?"?)?&%bj&?&?&E&E&G&G&N&N&P&PQQ > >DAq!71!=!D!D!F!Fq!IJ0A6==??BQFC0A6==??BQFCaxxC1HHq'c
3::1===llll\klll9U:%6%6::L*..sL??|RYW\]mWnWnpqMrMr>ssr!   c                      | j         |i |} t                      j        |fi |}t          |          dk    r|d         S |S )a  
        Answers queries according to a table. The pipeline accepts several types of inputs which are detailed below:

        - `pipeline(table, query)`
        - `pipeline(table, [query])`
        - `pipeline(table=table, query=query)`
        - `pipeline(table=table, query=[query])`
        - `pipeline({"table": table, "query": query})`
        - `pipeline({"table": table, "query": [query]})`
        - `pipeline([{"table": table, "query": query}, {"table": table, "query": query}])`

        The `table` argument should be a dict or a DataFrame built from that dict, containing the whole table:

        Example:

        ```python
        data = {
            "actors": ["brad pitt", "leonardo di caprio", "george clooney"],
            "age": ["56", "45", "59"],
            "number of movies": ["87", "53", "69"],
            "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
        }
        ```

        This dictionary can be passed in as such, or can be converted to a pandas DataFrame:

        Example:

        ```python
        import pandas as pd

        table = pd.DataFrame.from_dict(data)
        ```

        Args:
            table (`pd.DataFrame` or `Dict`):
                Pandas DataFrame or dictionary that will be converted to a DataFrame containing all the table values.
                See above for an example of dictionary.
            query (`str` or `List[str]`):
                Query or list of queries that will be sent to the model alongside the table.
            sequential (`bool`, *optional*, defaults to `False`):
                Whether to do inference sequentially or as a batch. Batching is faster, but models like SQA require the
                inference to be done sequentially to extract relations within sequences, given their conversational
                nature.
            padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `False`):
                Activates and controls padding. Accepts the following values:

                - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
                  sequence if provided).
                - `'max_length'`: Pad to a maximum length specified with the argument `max_length` or to the maximum
                  acceptable input length for the model if that argument is not provided.
                - `False` or `'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of different
                  lengths).

            truncation (`bool`, `str` or [`TapasTruncationStrategy`], *optional*, defaults to `False`):
                Activates and controls truncation. Accepts the following values:

                - `True` or `'drop_rows_to_fit'`: Truncate to a maximum length specified with the argument `max_length`
                  or to the maximum acceptable input length for the model if that argument is not provided. This will
                  truncate row by row, removing rows from the table.
                - `False` or `'do_not_truncate'` (default): No truncation (i.e., can output batch with sequence lengths
                  greater than the model maximum admissible input size).


        Return:
            A dictionary or a list of dictionaries containing results: Each result is a dictionary with the following
            keys:

            - **answer** (`str`) -- The answer of the query given the table. If there is an aggregator, the answer will
              be preceded by `AGGREGATOR >`.
            - **coordinates** (`List[Tuple[int, int]]`) -- Coordinates of the cells of the answers.
            - **cells** (`List[str]`) -- List of strings made up of the answer cell values.
            - **aggregator** (`str`) -- If the model has an aggregator, this returns the aggregator.
        r   r   )rC   rA   r3   r(   )r.   rO   r/   pipeline_inputsresultsrQ   s        r   r3   z'TableQuestionAnsweringPipeline.__call__  s[    V ,$+T<V<<"%''"?==f==w<<11:r!   Nc                 >    i }|||d<   |||d<   i }|||d<   ||i fS )Npadding
truncation
sequentialr8   )r.   r   r   r   r/   preprocess_paramsforward_paramss          r   _sanitize_parametersz3TableQuestionAnsweringPipeline._sanitize_parameters_  sL    +2i(!.8l+!+5N<( ."44r!   Tc                     || j         dk    rd}nd}|d         |d         }}|j        rt          d          ||dk    rt          d          |                     ||| j        ||	          }||d<   |S )
Nr@   drop_rows_to_fitdo_not_truncater   r   ztable is empty zquery is empty)return_tensorsr   r   )r#   emptyr%   	tokenizerrD   )r.   pipeline_inputr   r   r   r   r   rS   s           r   
preprocessz)TableQuestionAnsweringPipeline.preprocessk  s    yG##/

.
%g.w0Gu; 	/-...=ERKK-...uT^Xblsttwr!   Fc                     |                     d          }| j        dk    r|r | j        di |}n0 | j        di |}n"d|vr
| j        |d<    | j        j        di ||}|||d}|S )Nr   r@   generation_config)model_inputsr   r   r8   )popr#   r   rT   r   rJ   generate)r.   r   r   generate_kwargsr   r   model_outputss          r   _forwardz'TableQuestionAnsweringPipeline._forward{  s      ))9 ?3$3CClCC.$.>>>> #/997;7M 34)dj)LLLLOLLG)5RYZZr!   c                     |d         }|d         |d         } j         dk    re j        rw|d d         \  }} j                            |||          }|\  }} fdt	          |          D              j        j        j        fdt	          |          D             }	n/|d         } j                            ||          }|d         }i i }	g }
t	          |          D ]\  }}fd	|D             }                    |d
          }|	                    |d
          }|d	                    |          z   |fd|D             d}|r||d<   |

                    |           t          |          dk    rt          d          n&d  j                            |d          D             }
t          |
          dk    r|
n|
d         S )Nr   r   r   r@   r   c                 F    i | ]\  }}|j         j        j        |         S r8   )rJ   rK   r>   )r   r   predr.   s      r   re   z>TableQuestionAnsweringPipeline.postprocess.<locals>.<dictcomp>  s/    wwwQXQRTXq$*"3"Ft"Lwwwr!   c                 :    i | ]\  }}|k    ||         d z   S )z > r8   )r   r   r   aggregatorsno_agg_label_indexs      r   re   z>TableQuestionAnsweringPipeline.postprocess.<locals>.<dictcomp>  s;     & & &29!T[_cu[u[uA{1~-[u[u[ur!   r   c                 *    g | ]}j         |         S r8   iatr   
coordinater   s     r   
<listcomp>z>TableQuestionAnsweringPipeline.postprocess.<locals>.<listcomp>  s     MMM::.MMMr!   r   z, c                 *    g | ]}j         |         S r8   r   r   s     r   r   z>TableQuestionAnsweringPipeline.postprocess.<locals>.<listcomp>  s     RRR
ei
3RRRr!   )answercoordinatescells
aggregatorzEmpty answerc                     g | ]}d |iS )r   r8   )r   r   s     r   r   z>TableQuestionAnsweringPipeline.postprocess.<locals>.<listcomp>  s    wwwf&)wwwr!   T)skip_special_tokensr   )r#   rL   r   convert_logits_to_predictionsr   rJ   rK   no_aggregation_label_indexr&   joinrw   r(   r   batch_decode)r.   r   rS   r   r[   
logits_aggpredictionsanswer_coordinates_batchagg_predictionsaggregators_prefixanswersr   r   r   r   aggregator_prefixr   r   r   r   s   `                @@@r   postprocessz*TableQuestionAnsweringPipeline.postprocess  sK   ~.g&	*9~ (%,RaR["
"nJJ6SY[eff<G9(/wwww\efu\v\vwww%)Z%6%Q"& & & & &=F=W=W& & &"" !"nJJ6SYZZ+6q>( %'"G&/0H&I&I ' '"{MMMMMMM(__UB77
$6$:$:5"$E$E!/$))E2B2BB#.RRRRkRRR 
  6+5F<(v&&&&6{{a'777   xw8S8ST[qu8S8v8vwwwGg,,**ww
:r!   )NNN)NTN)F)r4   r5   r6   r7   default_input_namesr   rB   rT   r   r3   r   r   r   r   __classcell__)rQ   s   @r   r;   r;   U   s         < (#H#H#J#J Z Z Z Z Z Z"$ $ $@t @t @tDP P P P Pd
5 
5 
5 
5       "(; (; (; (; (; (; (;r!   r;   )r}   r+   rp   r_   utilsr   r   r   r   baser	   r
   r   r   r   rs   models.auto.modeling_autor   r   
tensorflowr=   models.auto.modeling_tf_autor   r   r   r;   r8   r!   r   <module>r      s                       b a a a a a a a a a a a a a  LLL       
 ?        2# 2# 2# 2# 2#O 2# 2# 2#j ,,4@@@AA^; ^; ^; ^; ^;X ^; ^; BA^; ^; ^;r!   