§
    ‡ìNgG  ã                  ó`  — d Z ddlmZ ddlZddlmZmZ ddlmZ ddl	m
Z
mZmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ  ej        e¦  «        Z G d„ dee¦  «        Z G d„ de¦  «        Z G d„ d¦  «        Z G d„ dee¦  «        Z  G d„ dee¦  «        Z! G d„ dee¦  «        Z"dS )z3Interfaces to be implemented by general evaluators.é    )ÚannotationsN)ÚABCÚabstractmethod)ÚEnum)ÚAnyÚOptionalÚSequenceÚTupleÚUnion)Úwarn)ÚAgentAction)ÚBaseLanguageModel)Úrun_in_executor)ÚChainc                  óˆ   — e Zd ZdZdZ	 dZ	 dZ	 dZ	 dZ	 dZ		 dZ
	 d	Z	 d
Z	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZdS )ÚEvaluatorTypezThe types of the evaluators.ÚqaÚcot_qaÚ
context_qaÚpairwise_stringÚscore_stringÚlabeled_pairwise_stringÚlabeled_score_stringÚ
trajectoryÚcriteriaÚlabeled_criteriaÚstring_distanceÚexact_matchÚregex_matchÚpairwise_string_distanceÚembedding_distanceÚpairwise_embedding_distanceÚjson_validityÚjson_equalityÚjson_edit_distanceÚjson_schema_validationN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__ÚQAÚCOT_QAÚ
CONTEXT_QAÚPAIRWISE_STRINGÚSCORE_STRINGÚLABELED_PAIRWISE_STRINGÚLABELED_SCORE_STRINGÚAGENT_TRAJECTORYÚCRITERIAÚLABELED_CRITERIAÚSTRING_DISTANCEÚEXACT_MATCHÚREGEX_MATCHÚPAIRWISE_STRING_DISTANCEÚEMBEDDING_DISTANCEÚPAIRWISE_EMBEDDING_DISTANCEÚJSON_VALIDITYÚJSON_EQUALITYÚJSON_EDIT_DISTANCEÚJSON_SCHEMA_VALIDATION© ó    úW/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain/evaluation/schema.pyr   r      sÐ   € € € € € Ø&Ð&à	€Bðà€Fð%ð €JØSØ'€Oðà!€Lðà7ÐðHà1Ðð@à#ÐØVØ€Hð<à)Ðð7à'€OØPØ€KØIØ€KØNØ9ÐØ=Ø-ÐØMØ"?ÐØ;Ø#€MØ.Ø#€MØ=Ø-ÐØTØ5ÐØIÐIr@   r   c                  ó:   — e Zd ZdZeed	d„¦   «         ¦   «         ZdS )
ÚLLMEvalChainz,A base class for evaluators that use an LLM.Úllmr   Úkwargsr   Úreturnc                ó   — dS )z#Create a new evaluator from an LLM.Nr?   )ÚclsrD   rE   s      rA   Úfrom_llmzLLMEvalChain.from_llmM   ó   € € € r@   N)rD   r   rE   r   rF   rC   )r'   r(   r)   r*   Úclassmethodr   rI   r?   r@   rA   rC   rC   J   sB   € € € € € Ø6Ð6àØð2ð 2ð 2ñ „^ñ „[ð2ð 2ð 2r@   rC   c                  ó€   — e Zd ZdZedd„¦   «         Zedd„¦   «         Zedd„¦   «         Zedd„¦   «         Z	 	 ddd„Z	d	S )Ú_EvalArgsMixinz(Mixin for checking evaluation arguments.rF   Úboolc                ó   — dS ©z2Whether this evaluator requires a reference label.Fr?   ©Úselfs    rA   Úrequires_referencez!_EvalArgsMixin.requires_referenceV   ó	   € ð ˆur@   c                ó   — dS )ú0Whether this evaluator requires an input string.Fr?   rQ   s    rA   Úrequires_inputz_EvalArgsMixin.requires_input[   rT   r@   Ústrc                ó"   — d| j         j        › dS )z&Warning to show when input is ignored.zIgnoring input in ú, as it is not expected.©Ú	__class__r'   rQ   s    rA   Ú_skip_input_warningz"_EvalArgsMixin._skip_input_warning`   s   € ð V D¤NÔ$;ÐUÐUÐUÐUr@   c                ó"   — d| j         j        › dS )z*Warning to show when reference is ignored.zIgnoring reference in rZ   r[   rQ   s    rA   Ú_skip_reference_warningz&_EvalArgsMixin._skip_reference_warninge   s   € ð W T¤^Ô%<ÐVÐVÐVð	
r@   NÚ	referenceúOptional[str]ÚinputÚNonec                ó  — | j         r|€t          | j        j        › d¦  «        ‚|| j         st	          | j        ¦  «         | j        r|€t          | j        j        › d¦  «        ‚|| j        st	          | j        ¦  «         dS dS dS )a†  Check if the evaluation arguments are valid.

        Args:
            reference (Optional[str], optional): The reference label.
            input (Optional[str], optional): The input string.
        Raises:
            ValueError: If the evaluator requires an input string but none is provided,
                or if the evaluator requires a reference label but none is provided.
        Nz requires an input string.z requires a reference string.)rW   Ú
ValueErrorr\   r'   r   r]   rS   r_   )rR   r`   rb   s      rA   Ú_check_evaluation_argsz%_EvalArgsMixin._check_evaluation_argsl   s­   € ð Ôð 	+ 5 =Ý ¤Ô 7ÐSÐSÐSÑTÔTÐTØÐ tÔ':ÐÝÔ)Ñ*Ô*Ð*ØÔ"ð 	/ yÐ'8Ý ¤Ô 7ÐVÐVÐVÑWÔWÐWØÐ"¨4Ô+BÐ"ÝÔ-Ñ.Ô.Ð.Ð.Ð.ð #Ð"Ð"Ð"r@   ©rF   rN   ©rF   rX   )NN)r`   ra   rb   ra   rF   rc   )
r'   r(   r)   r*   ÚpropertyrS   rW   r]   r_   rf   r?   r@   rA   rM   rM   S   s½   € € € € € Ø2Ð2àðð ð ñ „Xðð ðð ð ñ „Xðð ðVð Vð Vñ „XðVð ð
ð 
ð 
ñ „Xð
ð $(Ø#ð/ð /ð /ð /ð /ð /ð /r@   rM   c                  ó’   — e Zd ZdZedd„¦   «         Zedd„¦   «         Zedddœdd„¦   «         Zdddœdd„Z	dddœdd„Z
dddœdd„ZdS )ÚStringEvaluatorzcGrade, tag, or otherwise evaluate predictions relative to their inputs
    and/or reference labels.rF   rX   c                ó   — | j         j        S )zThe name of the evaluation.r[   rQ   s    rA   Úevaluation_namezStringEvaluator.evaluation_nameˆ   s   € ð Œ~Ô&Ð&r@   rN   c                ó   — dS rP   r?   rQ   s    rA   rS   z"StringEvaluator.requires_reference   rT   r@   N©r`   rb   Ú
predictionúUnion[str, Any]r`   úOptional[Union[str, Any]]rb   rE   r   Údictc               ó   — dS )a:  Evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
                It is recommended that the dictionary contain the following keys:
                     - score: the score of the evaluation, if applicable.
                     - value: the string value of the evaluation, if applicable.
                     - reasoning: the reasoning for the evaluation, if applicable.
        Nr?   ©rR   rp   r`   rb   rE   s        rA   Ú_evaluate_stringsz!StringEvaluator._evaluate_strings’   rJ   r@   c             ‹  ó@   K  — t          d| j        f|||dœ|¤Žƒ d{V —†S )aI  Asynchronously evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
                It is recommended that the dictionary contain the following keys:
                     - score: the score of the evaluation, if applicable.
                     - value: the string value of the evaluation, if applicable.
                     - reasoning: the reasoning for the evaluation, if applicable.
        N©rp   r`   rb   )r   rv   ru   s        rA   Ú_aevaluate_stringsz"StringEvaluator._aevaluate_stringsª   s`   è è € õ, %ØØÔ"ð
ð "ØØð
ð 
ð ð
ð 
ð 
ð 
ð 
ð 
ð 
ð 
ð 	
r@   ra   c               óR   — |                       ||¬¦  «          | j        d|||dœ|¤ŽS )aú  Evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
        ro   rx   r?   )rf   rv   ru   s        rA   Úevaluate_stringsz StringEvaluator.evaluate_stringsÉ   sM   € ð$ 	×#Ò#¨i¸uÐ#ÑEÔEÐEØ%ˆtÔ%ð 
Ø!¨Y¸eð
ð 
ØGMð
ð 
ð 	
r@   c             ‹  ób   K  — |                       ||¬¦  «          | j        d|||dœ|¤Žƒ d{V —†S )a	  Asynchronously evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
        ro   rx   Nr?   )rf   ry   ru   s        rA   Úaevaluate_stringsz!StringEvaluator.aevaluate_stringsà   so   è è € ð$ 	×#Ò#¨i¸uÐ#ÑEÔEÐEØ,TÔ,ð 
Ø!¨Y¸eð
ð 
ØGMð
ð 
ð 
ð 
ð 
ð 
ð 
ð 
ð 	
r@   rh   rg   )
rp   rq   r`   rr   rb   rr   rE   r   rF   rs   )
rp   rX   r`   ra   rb   ra   rE   r   rF   rs   )r'   r(   r)   r*   ri   rm   rS   r   rv   ry   r{   r}   r?   r@   rA   rk   rk   „   s  € € € € € ð ð  ð ð'ð 'ð 'ñ „Xð'ð ðð ð ñ „Xðð ð
 04Ø+/ðð ð ð ð ñ „^ðð6 04Ø+/ð
ð 
ð 
ð 
ð 
ð 
ðF $(Ø#ð
ð 
ð 
ð 
ð 
ð 
ð6 $(Ø#ð
ð 
ð 
ð 
ð 
ð 
ð 
ð 
r@   rk   c                  ób   — e Zd ZdZedddœdd„¦   «         Zdddœdd„Zdddœdd„Zdddœdd„ZdS )ÚPairwiseStringEvaluatorzDCompare the output of two models (or two outputs of the same model).Nro   rp   rX   Úprediction_br`   ra   rb   rE   r   rF   rs   c               ó   — dS )á1  Evaluate the output string pairs.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            reference (Optional[str], optional): The expected output / reference string.
            input (Optional[str], optional): The input string.
            kwargs: Additional keyword arguments, such as callbacks and optional reference strings.
        Returns:
            dict: A dictionary containing the preference, scores, and/or other information.
        Nr?   ©rR   rp   r€   r`   rb   rE   s         rA   Ú_evaluate_string_pairsz.PairwiseStringEvaluator._evaluate_string_pairsû   rJ   r@   c             ‹  óB   K  — t          d| j        f||||dœ|¤Žƒ d{V —†S )á@  Asynchronously evaluate the output string pairs.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            reference (Optional[str], optional): The expected output / reference string.
            input (Optional[str], optional): The input string.
            kwargs: Additional keyword arguments, such as callbacks and optional reference strings.
        Returns:
            dict: A dictionary containing the preference, scores, and/or other information.
        N©rp   r€   r`   rb   )r   r„   rƒ   s         rA   Ú_aevaluate_string_pairsz/PairwiseStringEvaluator._aevaluate_string_pairs  sc   è è € õ( %ØØÔ'ð
ð "Ø%ØØð
ð 
ð ð
ð 
ð 
ð 
ð 
ð 
ð 
ð 
ð 	
r@   c               óT   — |                       ||¬¦  «          | j        d||||dœ|¤ŽS )r‚   ro   r‡   r?   )rf   r„   rƒ   s         rA   Úevaluate_string_pairsz-PairwiseStringEvaluator.evaluate_string_pairs/  sT   € ð( 	×#Ò#¨i¸uÐ#ÑEÔEÐEØ*ˆtÔ*ð 
Ø!Ø%ØØð	
ð 
ð
 ð
ð 
ð 	
r@   c             ‹  ód   K  — |                       ||¬¦  «          | j        d||||dœ|¤Žƒ d{V —†S )r†   ro   r‡   Nr?   )rf   rˆ   rƒ   s         rA   Úaevaluate_string_pairsz.PairwiseStringEvaluator.aevaluate_string_pairsL  sv   è è € ð( 	×#Ò#¨i¸uÐ#ÑEÔEÐEØ1TÔ1ð 
Ø!Ø%ØØð	
ð 
ð
 ð
ð 
ð 
ð 
ð 
ð 
ð 
ð 
ð 	
r@   )rp   rX   r€   rX   r`   ra   rb   ra   rE   r   rF   rs   )	r'   r(   r)   r*   r   r„   rˆ   rŠ   rŒ   r?   r@   rA   r   r   ø   s¾   € € € € € ØNÐNàð $(Ø#ðð ð ð ð ñ „^ðð4 $(Ø#ð
ð 
ð 
ð 
ð 
ð 
ðF $(Ø#ð
ð 
ð 
ð 
ð 
ð 
ðD $(Ø#ð
ð 
ð 
ð 
ð 
ð 
ð 
ð 
r@   r   c                  ór   — e Zd ZdZedd„¦   «         Zeddœdd„¦   «         Zddœdd„Zddœdd„Z	ddœdd„Z
dS )ÚAgentTrajectoryEvaluatorz,Interface for evaluating agent trajectories.rF   rN   c                ó   — dS )rV   Tr?   rQ   s    rA   rW   z'AgentTrajectoryEvaluator.requires_inputm  s	   € ð ˆtr@   N)r`   rp   rX   Úagent_trajectoryú!Sequence[Tuple[AgentAction, str]]rb   r`   ra   rE   r   rs   c               ó   — dS )á–  Evaluate a trajectory.

        Args:
            prediction (str): The final predicted response.
            agent_trajectory (List[Tuple[AgentAction, str]]):
                The intermediate steps forming the agent trajectory.
            input (str): The input to the agent.
            reference (Optional[str]): The reference answer.

        Returns:
            dict: The evaluation result.
        Nr?   ©rR   rp   r   rb   r`   rE   s         rA   Ú_evaluate_agent_trajectoryz3AgentTrajectoryEvaluator._evaluate_agent_trajectoryr  rJ   r@   c             ‹  óB   K  — t          d| j        f||||dœ|¤Žƒ d{V —†S )á¥  Asynchronously evaluate a trajectory.

        Args:
            prediction (str): The final predicted response.
            agent_trajectory (List[Tuple[AgentAction, str]]):
                The intermediate steps forming the agent trajectory.
            input (str): The input to the agent.
            reference (Optional[str]): The reference answer.

        Returns:
            dict: The evaluation result.
        N)rp   r   r`   rb   )r   r•   r”   s         rA   Ú_aevaluate_agent_trajectoryz4AgentTrajectoryEvaluator._aevaluate_agent_trajectory‰  sc   è è € õ* %ØØÔ+ð
ð "Ø-ØØð
ð 
ð ð
ð 
ð 
ð 
ð 
ð 
ð 
ð 
ð 	
r@   c               óT   — |                       ||¬¦  «          | j        d||||dœ|¤ŽS )r“   ro   ©rp   rb   r   r`   r?   )rf   r•   r”   s         rA   Úevaluate_agent_trajectoryz2AgentTrajectoryEvaluator.evaluate_agent_trajectory¨  sT   € ð* 	×#Ò#¨i¸uÐ#ÑEÔEÐEØ.ˆtÔ.ð 
Ø!ØØ-Øð	
ð 
ð
 ð
ð 
ð 	
r@   c             ‹  ód   K  — |                       ||¬¦  «          | j        d||||dœ|¤Žƒ d{V —†S )r—   ro   rš   Nr?   )rf   r˜   r”   s         rA   Úaevaluate_agent_trajectoryz3AgentTrajectoryEvaluator.aevaluate_agent_trajectoryÆ  sv   è è € ð* 	×#Ò#¨i¸uÐ#ÑEÔEÐEØ5TÔ5ð 
Ø!ØØ-Øð	
ð 
ð
 ð
ð 
ð 
ð 
ð 
ð 
ð 
ð 
ð 	
r@   rg   )rp   rX   r   r‘   rb   rX   r`   ra   rE   r   rF   rs   )r'   r(   r)   r*   ri   rW   r   r•   r˜   r›   r   r?   r@   rA   rŽ   rŽ   j  sÒ   € € € € € Ø6Ð6àðð ð ñ „Xðð ð $(ðð ð ð ð ñ „^ðð8 $(ð
ð 
ð 
ð 
ð 
ð 
ðJ $(ð
ð 
ð 
ð 
ð 
ð 
ðH $(ð
ð 
ð 
ð 
ð 
ð 
ð 
ð 
r@   rŽ   )#r*   Ú
__future__r   ÚloggingÚabcr   r   Úenumr   Útypingr   r   r	   r
   r   Úwarningsr   Úlangchain_core.agentsr   Úlangchain_core.language_modelsr   Úlangchain_core.runnables.configr   Úlangchain.chains.baser   Ú	getLoggerr'   ÚloggerrX   r   rC   rM   rk   r   rŽ   r?   r@   rA   ú<module>rª      s  ðØ 9Ð 9à "Ð "Ð "Ð "Ð "Ð "à €€€Ø #Ð #Ð #Ð #Ð #Ð #Ð #Ð #Ø Ð Ð Ð Ð Ð Ø 8Ð 8Ð 8Ð 8Ð 8Ð 8Ð 8Ð 8Ð 8Ð 8Ð 8Ð 8Ð 8Ð 8Ø Ð Ð Ð Ð Ð à -Ð -Ð -Ð -Ð -Ð -Ø <Ð <Ð <Ð <Ð <Ð <Ø ;Ð ;Ð ;Ð ;Ð ;Ð ;à 'Ð 'Ð 'Ð 'Ð 'Ð 'à	ˆÔ	˜8Ñ	$Ô	$€ð3Jð 3Jð 3Jð 3Jð 3JC˜ñ 3Jô 3Jð 3Jðl2ð 2ð 2ð 2ð 25ñ 2ô 2ð 2ð./ð ./ð ./ð ./ð ./ñ ./ô ./ð ./ðbq
ð q
ð q
ð q
ð q
n cñ q
ô q
ð q
ðho
ð o
ð o
ð o
ð o
˜n¨cñ o
ô o
ð o
ðdx
ð x
ð x
ð x
ð x
˜~¨sñ x
ô x
ð x
ð x
ð x
r@   