
    Ng7*                       d Z ddlmZ ddlZddlZddlmZmZmZm	Z	m
Z
 ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZmZ ddlmZmZ ddlmZ ddZddZ G d deee          Z  G d deee          Z! G d de!          Z"dS )z-LLM Chains for evaluating question answering.    )annotationsN)AnyListOptionalSequenceTuple)	Callbacks)BaseLanguageModel)PromptTemplate)
ConfigDict)LLMChain)CONTEXT_PROMPT
COT_PROMPTPROMPT)LLMEvalChainStringEvaluator)RUN_KEYtextstrreturnOptional[Tuple[str, int]]c                   t          j        d|                                 t           j                  }|rZ|                    d                                          dk    rdS |                    d                                          dk    rdS 	 |                                                                 d                             t          	                    ddt          j                            }|                                dk    rdS |                                dk    rdS |                                                                 d	                             t          	                    ddt          j                            }|                                dk    rdS |                                dk    rdS n# t          $ r Y nw xY wd S )
Nzgrade:\s*(correct|incorrect)   CORRECT)r   r   	INCORRECT)r   r   r    )researchstrip
IGNORECASEgroupuppersplit	translater   	maketransstringpunctuation
IndexError)r   match
first_word	last_words       ^/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain/evaluation/qa/eval_chain.py
_get_scorer.      s   I5tzz||R]SSE ";;q>>!!Y..<[[^^!!##{22!>JJLL  #--cmmBFDV.W.WXX 	 **<;..!>JJLLUWWRYs}}RV-?@@AA 	
 ??	))<__+--!> .   4s&   A;F? F? 'A;F? $F? ?
GGdictc                n    |                                  }t          |          }|d\  }}n|\  }}|||dS )zParse the output text.

    Args:
        text (str): The output text to parse.

    Returns:
        Any: The parsed output.
    N)NN)	reasoningvaluescore)r    r.   )r   r1   parsed_scoresr2   r3   s        r-   _parse_string_eval_outputr5   1   sP     

Iy))M!uu$u      c                      e Zd ZU dZdZded<    ed          Zed.d	            Z	e
d/d
            Ze
d.d            Ze
d.d            Ze	 d0d1d            Z	 	 	 d2ddd3d"Zd4d$Zdddd%d&d5d,Zdddd%d&d5d-ZdS )6QAEvalChainz,LLM Chain for evaluating question answering.resultsr   
output_keyignoreextrar   boolc                    dS NF clss    r-   is_lc_serializablezQAEvalChain.is_lc_serializableP       ur6   c                    dS )NcorrectnessrA   selfs    r-   evaluation_namezQAEvalChain.evaluation_nameT   s    }r6   c                    dS NTrA   rH   s    r-   requires_referencezQAEvalChain.requires_referenceX       tr6   c                    dS rL   rA   rH   s    r-   requires_inputzQAEvalChain.requires_input\   rN   r6   Nllmr
   promptOptional[PromptTemplate]kwargsr   c                    |pt           }h d}|t          |j                  k    rt          d| d|j                    | d||d|S )a  Load QA Eval Chain from LLM.

        Args:
            llm (BaseLanguageModel): the base language model to use.

            prompt (PromptTemplate): A prompt template containing the input_variables:
            'input', 'answer' and 'result' that will be used as the prompt
            for evaluation.
            Defaults to PROMPT.

            **kwargs: additional keyword arguments.

        Returns:
            QAEvalChain: the loaded QA eval chain.
        >   queryanswerresultInput variables should be 
, but got rQ   rR   rA   )r   setinput_variables
ValueError)rC   rQ   rR   rT   expected_input_varss        r-   from_llmzQAEvalChain.from_llm`   s    , !6;;;#f&<"="===4-@ 4 4!14 4   s4s644V444r6   rV   rW   rX   	callbacksexamplesSequence[dict]predictionsquestion_key
answer_keyprediction_keyrb   r	   
List[dict]c               t    fdt          |          D             }|                     ||          S )5Evaluate question answering examples and predictions.c                R    g | ]#\  }}|         |         |                  d $S )rV   rW   rX   rA   ).0iexamplerg   rh   re   rf   s      r-   
<listcomp>z(QAEvalChain.evaluate.<locals>.<listcomp>   sR     
 
 
 7	 !.!*-%a.8 
 
 
r6   ra   	enumerateapply)rI   rc   re   rf   rg   rh   rb   inputss     ````  r-   evaluatezQAEvalChain.evaluate   _    
 
 
 
 
 
 
 (11
 
 
 zz&Iz666r6   r/   c                v    t          || j                           }t          |v r|t                   |t          <   |S Nr5   r:   r   rI   rX   parsed_results      r-   _prepare_outputzQAEvalChain._prepare_output   5    1&2IJJf%+G_M'"r6   F	referenceinputrb   include_run_info
predictionr   Optional[str]r   r   c               P     | |||d||          }|                      |          S )a  Evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): the LLM or chain prediction to evaluate.
            reference (Optional[str], optional): the reference label
                to evaluate against.
            input (Optional[str], optional): the input to consider during evaluation
            callbacks (Callbacks, optional): the callbacks to use for tracing.
            include_run_info (bool, optional): whether to include run info in the
                returned results.
            **kwargs: additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
        rm   rb   r   r}   rI   r   r   r   rb   r   rT   rX   s           r-   _evaluate_stringszQAEvalChain._evaluate_strings   sK    0 #$ 
  -
 
 
 ##F+++r6   c               t   K   |                      |||d||           d {V }|                     |          S )Nrm   ru   rb   r   acallr}   r   s           r-   _aevaluate_stringszQAEvalChain._aevaluate_strings   sg       zz"i:NN- " 
 
 
 
 
 
 
 

 ##F+++r6   r   r>   r   r   ry   )rQ   r
   rR   rS   rT   r   r   r8   rm   )rc   rd   re   rd   rf   r   rg   r   rh   r   rb   r	   r   ri   rX   r/   r   r/   r   r   r   r   r   r   rb   r	   r   r>   rT   r   r   r/   )__name__
__module____qualname____doc__r:   __annotations__r   model_configclassmethodrD   propertyrJ   rM   rP   r`   rv   r}   r   r   rA   r6   r-   r8   r8   G   s        66J:  L    [    X    X    X  ,05 5 5 5 [5D $"&7  $7 7 7 7 7 7,    $(##!&!, !, !, !, !, !,N $(##!&, , , , , , , ,r6   r8   c                     e Zd ZdZed.d            Zed.d            Zed.d            Z e	d          Z
ed/d            Zed0d            Ze	 d1d2d            Z	 	 	 d3ddd4d"Zd5d$Zdddd%d&d6d,Zdddd%d&d6d-ZdS )7ContextQAEvalChainz3LLM Chain for evaluating QA w/o GT based on contextr   r>   c                    dS r@   rA   rB   s    r-   rD   z%ContextQAEvalChain.is_lc_serializable   rE   r6   c                    dS )z.Whether the chain requires a reference string.TrA   rH   s    r-   rM   z%ContextQAEvalChain.requires_reference   	     tr6   c                    dS )z+Whether the chain requires an input string.TrA   rH   s    r-   rP   z!ContextQAEvalChain.requires_input   r   r6   r;   r<   rR   r   Nonec                r    h d}|t          |j                  k    rt          d| d|j                   d S )N>   rV   rX   contextrY   rZ   )r\   r]   r^   )rC   rR   r_   s      r-   _validate_input_varsz'ContextQAEvalChain._validate_input_vars   s`    <<<#f&<"="===4-@ 4 4!14 4   >=r6   r   c                    dS )NzContextual AccuracyrA   rH   s    r-   rJ   z"ContextQAEvalChain.evaluation_name   s    $$r6   NrQ   r
   rS   rT   r   c                T    |pt           }|                     |            | d||d|S )a  Load QA Eval Chain from LLM.

        Args:
            llm (BaseLanguageModel): the base language model to use.

            prompt (PromptTemplate): A prompt template containing the input_variables:
            'query', 'context' and 'result' that will be used as the prompt
            for evaluation.
            Defaults to PROMPT.

            **kwargs: additional keyword arguments.

        Returns:
            ContextQAEvalChain: the loaded QA eval chain.
        r[   rA   )r   r   rC   rQ   rR   rT   s       r-   r`   zContextQAEvalChain.from_llm   s?    , )>  (((s4s644V444r6   rV   r   rX   ra   rc   ri   re   rf   context_keyrh   rb   r	   c               t    fdt          |          D             }|                     ||          S )rk   c                R    g | ]#\  }}|         |         |                  d $S )rV   r   rX   rA   )rn   ro   rp   r   rh   re   rf   s      r-   rq   z/ContextQAEvalChain.evaluate.<locals>.<listcomp>  sR     
 
 
 7	 !.";/%a.8 
 
 
r6   ra   rr   )rI   rc   re   rf   r   rh   rb   ru   s     ````  r-   rv   zContextQAEvalChain.evaluate  rw   r6   r/   c                v    t          || j                           }t          |v r|t                   |t          <   |S ry   rz   r{   s      r-   r}   z"ContextQAEvalChain._prepare_output"  r~   r6   Fr   r   r   r   r   r   c               P     | |||d||          }|                      |          S )Nr   r   r   r   s           r-   r   z$ContextQAEvalChain._evaluate_strings(  sK     $$ 
  -
 
 
 ##F+++r6   c               t   K   |                      |||d||           d {V }|                     |          S )Nr   r   r   r   s           r-   r   z%ContextQAEvalChain._aevaluate_strings=  sg       zz"yJOO- " 
 
 
 
 
 
 
 

 ##F+++r6   r   )rR   r   r   r   r   ry   )rQ   r
   rR   rS   rT   r   r   r   r   )rc   ri   re   ri   rf   r   r   r   rh   r   rb   r	   r   ri   r   r   )r   r   r   r   r   rD   r   rM   rP   r   r   r   rJ   r`   rv   r}   r   r   rA   r6   r-   r   r      s       ==   [    X    X :  L    [ % % % X%  ,05 5 5 5 [5: $$&7  $7 7 7 7 7 7,    $(##!&, , , , , ,2 $(##!&, , , , , , , ,r6   r   c                  ^    e Zd ZdZedd            Zedd            Ze	 ddd            ZdS )CotQAEvalChainz=LLM Chain for evaluating QA using chain of thought reasoning.r   r>   c                    dS r@   rA   rB   s    r-   rD   z!CotQAEvalChain.is_lc_serializableR  rE   r6   r   c                    dS )NzCOT Contextual AccuracyrA   rH   s    r-   rJ   zCotQAEvalChain.evaluation_nameV  s    ((r6   NrQ   r
   rR   rS   rT   r   c                T    |pt           }|                     |            | d||d|S )zLoad QA Eval Chain from LLM.r[   rA   )r   r   r   s       r-   r`   zCotQAEvalChain.from_llmZ  s?     %:  (((s4s644V444r6   r   r   ry   )rQ   r
   rR   rS   rT   r   r   r   )	r   r   r   r   r   rD   r   rJ   r`   rA   r6   r-   r   r   O  s        GG   [ ) ) ) X)  ,0	5 	5 	5 	5 [	5 	5 	5r6   r   )r   r   r   r   )r   r   r   r/   )#r   
__future__r   r   r'   typingr   r   r   r   r    langchain_core.callbacks.managerr	   langchain_core.language_modelsr
   langchain_core.promptsr   pydanticr   langchain.chains.llmr   #langchain.evaluation.qa.eval_promptr   r   r   langchain.evaluation.schemar   r   langchain.schemar   r.   r5   r8   r   r   rA   r6   r-   <module>r      s   3 3 " " " " " " 				  7 7 7 7 7 7 7 7 7 7 7 7 7 7 6 6 6 6 6 6 < < < < < < 1 1 1 1 1 1       ) ) ) ) ) ) R R R R R R R R R R E E E E E E E E $ $ $ $ $ $   :   ,F, F, F, F, F,(O\ F, F, F,R|, |, |, |, |,?L |, |, |,~5 5 5 5 5' 5 5 5 5 5r6   