
    Ng)                        d dl mZ d dlmZmZmZmZmZmZ d dl	m
Z
 d dlmZ d dlmZmZ erd dlmZ d dl	mZ  G d d	e          Z G d
 d          ZdS )    )annotations)TYPE_CHECKINGAnyCallableOptional	TypedDictUnion)DynamicRunEvaluator)	traceable)ExampleRunStringEvaluator)RunEvaluatorc                  6    e Zd ZU dZded<   	 ded<   	 ded<   dS )	SingleEvaluatorInputz!The input to a `StringEvaluator`.str
predictionzOptional[Any]	referencezOptional[str]inputN)__name__
__module____qualname____doc____annotations__     h/var/www/html/ai-engine/env/lib/python3.11/site-packages/langsmith/evaluation/integrations/_langchain.pyr   r      s?         ++OOO r   r   c                  *    e Zd ZdZddddd
ZddZdS )LangChainStringEvaluatora  A class for wrapping a LangChain StringEvaluator.

    Requires the `langchain` package to be installed.

    Attributes:
        evaluator (StringEvaluator): The underlying StringEvaluator OR the name
            of the evaluator to load.

    Methods:
        as_run_evaluator() -> RunEvaluator:
            Convert the LangChainStringEvaluator to a RunEvaluator.

    Examples:
        Creating a simple LangChainStringEvaluator:

        >>> evaluator = LangChainStringEvaluator("exact_match")

        Converting a LangChainStringEvaluator to a RunEvaluator:

        >>> from langsmith.evaluation import LangChainStringEvaluator
        >>> from langchain_openai import ChatOpenAI
        >>> evaluator = LangChainStringEvaluator(
        ...     "criteria",
        ...     config={
        ...         "criteria": {
        ...             "usefulness": "The prediction is useful if"
        ...             " it is correct and/or asks a useful followup question."
        ...         },
        ...         "llm": ChatOpenAI(model="gpt-4o"),
        ...     },
        ... )
        >>> run_evaluator = evaluator.as_run_evaluator()
        >>> run_evaluator  # doctest: +ELLIPSIS
        <DynamicRunEvaluator ...>

        Customizing the LLM model used by the evaluator:

        >>> from langsmith.evaluation import LangChainStringEvaluator
        >>> from langchain_anthropic import ChatAnthropic
        >>> evaluator = LangChainStringEvaluator(
        ...     "criteria",
        ...     config={
        ...         "criteria": {
        ...             "usefulness": "The prediction is useful if"
        ...             " it is correct and/or asks a useful followup question."
        ...         },
        ...         "llm": ChatAnthropic(model="claude-3-opus-20240229"),
        ...     },
        ... )
        >>> run_evaluator = evaluator.as_run_evaluator()
        >>> run_evaluator  # doctest: +ELLIPSIS
        <DynamicRunEvaluator ...>

        Using the `evaluate` API with different evaluators:
        >>> def prepare_data(run: Run, example: Example):
        ...     # Convert the evaluation data into the format expected by the evaluator
        ...     # Only required for datasets with multiple inputs/output keys
        ...     return {
        ...         "prediction": run.outputs["prediction"],
        ...         "reference": example.outputs["answer"],
        ...         "input": str(example.inputs),
        ...     }
        >>> import re
        >>> from langchain_anthropic import ChatAnthropic
        >>> import langsmith
        >>> from langsmith.evaluation import LangChainStringEvaluator, evaluate
        >>> criteria_evaluator = LangChainStringEvaluator(
        ...     "criteria",
        ...     config={
        ...         "criteria": {
        ...             "usefulness": "The prediction is useful if it is correct"
        ...             " and/or asks a useful followup question."
        ...         },
        ...         "llm": ChatAnthropic(model="claude-3-opus-20240229"),
        ...     },
        ...     prepare_data=prepare_data,
        ... )
        >>> embedding_evaluator = LangChainStringEvaluator("embedding_distance")
        >>> exact_match_evaluator = LangChainStringEvaluator("exact_match")
        >>> regex_match_evaluator = LangChainStringEvaluator(
        ...     "regex_match", config={"flags": re.IGNORECASE}, prepare_data=prepare_data
        ... )
        >>> scoring_evaluator = LangChainStringEvaluator(
        ...     "labeled_score_string",
        ...     config={
        ...         "criteria": {
        ...             "accuracy": "Score 1: Completely inaccurate\nScore 5: Somewhat accurate\nScore 10: Completely accurate"
        ...         },
        ...         "normalize_by": 10,
        ...         "llm": ChatAnthropic(model="claude-3-opus-20240229"),
        ...     },
        ...     prepare_data=prepare_data,
        ... )
        >>> string_distance_evaluator = LangChainStringEvaluator(
        ...     "string_distance",
        ...     config={"distance_metric": "levenshtein"},
        ...     prepare_data=prepare_data,
        ... )
        >>> from langsmith import Client
        >>> client = Client()
        >>> results = evaluate(
        ...     lambda inputs: {"prediction": "foo"},
        ...     data=client.list_examples(dataset_name="Evaluate Examples", limit=1),
        ...     evaluators=[
        ...         embedding_evaluator,
        ...         criteria_evaluator,
        ...         exact_match_evaluator,
        ...         regex_match_evaluator,
        ...         scoring_evaluator,
        ...         string_distance_evaluator,
        ...     ],
        ... )  # doctest: +ELLIPSIS
        View the evaluation results for experiment:...
    N)configprepare_data	evaluatorUnion[StringEvaluator, str]r!   Optional[dict]r"   BOptional[Callable[[Run, Optional[Example]], SingleEvaluatorInput]]c                   ddl m} t          ||          r|| _        nKt          |t                    rddlm}  ||fi |pi | _        nt          dt          |                     || _	        dS )a(  Initialize a LangChainStringEvaluator.

        See: https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.schema.StringEvaluator.html#langchain-evaluation-schema-stringevaluator

        Args:
            evaluator (StringEvaluator): The underlying StringEvaluator.
        r   r   )load_evaluatorzUnsupported evaluator type: N)
langchain.evaluation.schemar   
isinstancer#   r   langchain.evaluationr(   NotImplementedErrortype_prepare_data)selfr#   r!   r"   r   r(   s         r   __init__z!LangChainStringEvaluator.__init__   s      	@?????i11 	X&DNN	3'' 	X;;;;;;+^IHH&,BHHDNN%&VT)__&V&VWWW)r   returnr   c                D     j         j        rdnd} j         j        rdnd}d| | dt          	 dd fd            t           j         j                  dd fd            }t           j         j                  dd fd            }t          ||          S )zConvert the LangChainStringEvaluator to a RunEvaluator.

        This is the object used in the LangSmith `evaluate` API.

        Returns:
            RunEvaluator: The converted RunEvaluator.
        z)
       "input": example.inputs['input'], z0
       "reference": example.outputs['expected']z]
def prepare_data(run, example):
    return {
        "prediction": run.outputs['my_output'],zL
    }
evaluator = LangChainStringEvaluator(..., prepare_data=prepare_data)
Nrunr   exampleOptional[Example]r1   r   c           
     8   | j         r2t          | j                   dk    rt          dj         d           j        j        r;|r9|j         r2t          |j                   dk    rt          dj         d           j        j        r;|r9|j        r2t          |j                  dk    rt          dj         d           t          t          t          | j         
                                                    j        j        r<|r:|j         r3t          t          |j         
                                                    nd j        j        r<|r:|j        r3t          t          |j        
                                                    nd           S )N   z
Evaluator z{ only supports a single prediction key. Please ensure that the run has a single output. Or initialize with a prepare_data:
z nly supports a single reference key. Please ensure that the example has a single output. Or create a custom evaluator yourself:
zy only supports a single input key. Please ensure that the example has a single input. Or initialize with a prepare_data:
)r   r   r   )outputslen
ValueErrorr#   requires_referencerequires_inputinputsr   nextitervalues)r4   r5   customization_error_strr/   s     r   prepare_evaluator_inputszKLangChainStringEvaluator.as_run_evaluator.<locals>.prepare_evaluator_inputs   s    { s3;//!33 1 1 1 /1 1   1 O ((1,, 1 1 1 /1 1   - N ''!++ 1 1 1 /1 1   (S[%7%7%9%9 : :;; 9 $ $O	Dgo446677888  5:AFMnDgn335566777   r   )namedictc                    j          | |          n                     | |          } j        j        di |}dj        j        i|S Nkeyr   )r.   r#   evaluate_stringsevaluation_namer4   r5   eval_inputsresultsrC   r/   s       r   evaluatez;LangChainStringEvaluator.as_run_evaluator.<locals>.evaluate   sl     %- )(g666''W55 
 6dn5DDDDG4>9EWEEr   c                   K   j          | |          n                     | |          } j        j        di | d {V }dj        j        i|S rG   )r.   r#   aevaluate_stringsrJ   rK   s       r   	aevaluatez<LangChainStringEvaluator.as_run_evaluator.<locals>.aevaluate  s       %- )(g666''W55 
 =DN<KK{KKKKKKKKG4>9EWEEr   )N)r4   r   r5   r6   r1   r   )r4   r   r5   r6   r1   rE   )r#   r=   r<   r   rJ   r
   )r/   	input_strreference_strrN   rQ   rB   rC   s   `    @@r   as_run_evaluatorz)LangChainStringEvaluator.as_run_evaluator   sK    ~,:: 	 ~0AA 	
# 1># @I# # # 
373	 3	 3	 3	 3	 3	 3	 
3	j 
6	7	7	7	F 	F 	F 	F 	F 	F 	F 
8	7	F 
6	7	7	7	F 	F 	F 	F 	F 	F 	F 
8	7	F #8Y777r   )r#   r$   r!   r%   r"   r&   )r1   r   )r   r   r   r   r0   rT   r   r   r   r    r       sc        q qn "& * * * * * *:f8 f8 f8 f8 f8 f8r   r    N)
__future__r   typingr   r   r   r   r   r	   langsmith.evaluation.evaluatorr
   langsmith.run_helpersr   langsmith.schemasr   r   r)   r   r   r   r    r   r   r   <module>rZ      s   " " " " " " K K K K K K K K K K K K K K K K > > > > > > + + + + + + * * * * * * * * <;;;;;;;;;;;;    9   w8 w8 w8 w8 w8 w8 w8 w8 w8 w8r   