
    NgU)             	         d Z ddlmZ ddlZddlZddlmZ ddlZddl	Z	ddl
Z
ddlZddlZddlZddlZddlZddlZddlZddlZddlmZ ddlmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z% ddl&m'Z' ddl(Z(ddl(m)Z* ddl(m+Z, dd	l(m-Z. dd
l(m/Z/ ddl(m0Z1 ddl2m3Z3 ddl4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z;m<Z<m=Z=m>Z> ddl?m@Z@ erddlAZBddlCmDZD eBjE        ZEneZE ejF        eG          ZHeeIgeIf         ZJe$eKejL        ee/jM                 e/jN        f         ZOe$e;ee/jP        e e/jM                 ge$e9e:f         f         ede$eIe:e9f         f         f         ZQe$ee/jP        e e/jM                 gee$e9e:f                  f         f         ZR	 	 	 	 	 	 	 	 	 	 	 ddd-ZS	 	 	 	 	 	 	 ddd1ZT G d2 d3e'          ZU G d4 d,          ZVee!e/jP                 e e/jM                 ge$e$e6eIf         ee$e6eIf                  f         f         ZW	 	 	 	 	 	 	 ddd;ZX G d< d:          ZYddAZZddCZ[	 	 	 	 	 	 	 	 	 	 	 dddDZ\ddGZ]ddLZ^	 dddNZ_ddQZ` e#dR          ZaddTZb e#dUdVW          Zc G dX dV          Zd G dY dZed          Zedd]Zfdd`Zg G da dbe'          ZhddiZiddkZjddnZkddrZlddsZmddvZnddzZo	 	 dddZp	 	 dddZq e	jr        d          dd            ZsddZtdS )zV2 Evaluation Interface.    )annotationsN)copy_context)TYPE_CHECKINGAny	AwaitableCallableDefaultDictDict	GeneratorIterableIteratorListOptionalSequenceTupleTypeVarUnioncast)	TypedDict)env)run_helpers)	run_trees)schemas)utils)
_warn_once)
SUMMARY_EVALUATOR_TComparisonEvaluationResultDynamicComparisonRunEvaluatorDynamicRunEvaluatorEvaluationResultEvaluationResultsRunEvaluator_normalize_summary_evaluatorcomparison_evaluatorrun_evaluator)LangChainStringEvaluatorRunnable.   TdataDATA_T
evaluatorsOptional[Sequence[EVALUATOR_T]]summary_evaluators'Optional[Sequence[SUMMARY_EVALUATOR_T]]metadataOptional[dict]experiment_prefixOptional[str]descriptionmax_concurrencyOptional[int]num_repetitionsintclientOptional[langsmith.Client]blockingbool
experiment6Optional[Union[schemas.TracerSession, str, uuid.UUID]]upload_resultstargetUnion[TARGET_T, Runnable]returnExperimentResultsc                   |st          d           t          |           r#t          j        |           rt	          d          |r|rt	          d| d|           t          | |||||||||	|
||          S )a?  Evaluate a target system or function on a given dataset.

    Args:
        target (TARGET_T): The target system or function to evaluate.
        data (DATA_T): The dataset to evaluate on. Can be a dataset name, a list of
            examples, or a generator of examples.
        evaluators (Optional[Sequence[EVALUATOR_T]]): A list of evaluators to run
            on each example. Defaults to None.
        summary_evaluators (Optional[Sequence[SUMMARY_EVALUATOR_T]]): A list of summary
            evaluators to run on the entire dataset. Defaults to None.
        metadata (Optional[dict]): Metadata to attach to the experiment.
            Defaults to None.
        experiment_prefix (Optional[str]): A prefix to provide for your experiment name.
            Defaults to None.
        description (Optional[str]): A free-form text description for the experiment.
        max_concurrency (Optional[int]): The maximum number of concurrent
            evaluations to run. Defaults to None (max number of workers).
        client (Optional[langsmith.Client]): The LangSmith client to use.
            Defaults to None.
        blocking (bool): Whether to block until the evaluation is complete.
            Defaults to True.
        num_repetitions (int): The number of times to run the evaluation.
            Each item in the dataset will be run and evaluated this many times.
            Defaults to 1.
        experiment (Optional[schemas.TracerSession]): An existing experiment to
            extend. If provided, experiment_prefix is ignored. For advanced
            usage only.

    Returns:
        ExperimentResults: The results of the evaluation.

    Examples:
        Prepare the dataset:

        >>> from typing import Sequence
        >>> from langsmith import Client
        >>> from langsmith.evaluation import evaluate
        >>> from langsmith.schemas import Example, Run
        >>> client = Client()
        >>> dataset = client.clone_public_dataset(
        ...     "https://smith.langchain.com/public/419dcab2-1d66-4b94-8901-0357ead390df/d"
        ... )
        >>> dataset_name = "Evaluate Examples"

        Basic usage:

        >>> def accuracy(run: Run, example: Example):
        ...     # Row-level evaluator for accuracy.
        ...     pred = run.outputs["output"]
        ...     expected = example.outputs["answer"]
        ...     return {"score": expected.lower() == pred.lower()}
        >>> def precision(runs: Sequence[Run], examples: Sequence[Example]):
        ...     # Experiment-level evaluator for precision.
        ...     # TP / (TP + FP)
        ...     predictions = [run.outputs["output"].lower() for run in runs]
        ...     expected = [example.outputs["answer"].lower() for example in examples]
        ...     # yes and no are the only possible answers
        ...     tp = sum([p == e for p, e in zip(predictions, expected) if p == "yes"])
        ...     fp = sum([p == "yes" and e == "no" for p, e in zip(predictions, expected)])
        ...     return {"score": tp / (tp + fp)}
        >>> def predict(inputs: dict) -> dict:
        ...     # This can be any function or just an API call to your app.
        ...     return {"output": "Yes"}
        >>> results = evaluate(
        ...     predict,
        ...     data=dataset_name,
        ...     evaluators=[accuracy],
        ...     summary_evaluators=[precision],
        ...     experiment_prefix="My Experiment",
        ...     description="Evaluating the accuracy of a simple prediction model.",
        ...     metadata={
        ...         "my-prompt-version": "abcd-1234",
        ...     },
        ... )  # doctest: +ELLIPSIS
        View the evaluation results for experiment:...

        Evaluating over only a subset of the examples

        >>> experiment_name = results.experiment_name
        >>> examples = client.list_examples(dataset_name=dataset_name, limit=5)
        >>> results = evaluate(
        ...     predict,
        ...     data=examples,
        ...     evaluators=[accuracy],
        ...     summary_evaluators=[precision],
        ...     experiment_prefix="My Experiment",
        ...     description="Just testing a subset synchronously.",
        ... )  # doctest: +ELLIPSIS
        View the evaluation results for experiment:...

        Streaming each prediction to more easily + eagerly debug.

        >>> results = evaluate(
        ...     predict,
        ...     data=dataset_name,
        ...     evaluators=[accuracy],
        ...     summary_evaluators=[precision],
        ...     description="I don't even have to block!",
        ...     blocking=False,
        ... )  # doctest: +ELLIPSIS
        View the evaluation results for experiment:...
        >>> for i, result in enumerate(results):  # doctest: +ELLIPSIS
        ...     pass

        Using the `evaluate` API with an off-the-shelf LangChain evaluator:

        >>> from langsmith.evaluation import LangChainStringEvaluator
        >>> from langchain_openai import ChatOpenAI
        >>> def prepare_criteria_data(run: Run, example: Example):
        ...     return {
        ...         "prediction": run.outputs["output"],
        ...         "reference": example.outputs["answer"],
        ...         "input": str(example.inputs),
        ...     }
        >>> results = evaluate(
        ...     predict,
        ...     data=dataset_name,
        ...     evaluators=[
        ...         accuracy,
        ...         LangChainStringEvaluator("embedding_distance"),
        ...         LangChainStringEvaluator(
        ...             "labeled_criteria",
        ...             config={
        ...                 "criteria": {
        ...                     "usefulness": "The prediction is useful if it is correct"
        ...                     " and/or asks a useful followup question."
        ...                 },
        ...                 "llm": ChatOpenAI(model="gpt-4o"),
        ...             },
        ...             prepare_data=prepare_criteria_data,
        ...         ),
        ...     ],
        ...     description="Evaluating with off-the-shelf LangChain evaluators.",
        ...     summary_evaluators=[precision],
        ... )  # doctest: +ELLIPSIS
        View the evaluation results for experiment:...

        Evaluating a LangChain object:

        >>> from langchain_core.runnables import chain as as_runnable
        >>> @as_runnable
        ... def nested_predict(inputs):
        ...     return {"output": "Yes"}
        >>> @as_runnable
        ... def lc_predict(inputs):
        ...     return nested_predict.invoke(inputs)
        >>> results = evaluate(
        ...     lc_predict.invoke,
        ...     data=dataset_name,
        ...     evaluators=[accuracy],
        ...     description="This time we're evaluating a LangChain object.",
        ...     summary_evaluators=[precision],
        ... )  # doctest: +ELLIPSIS
        View the evaluation results for experiment:...
    z&'upload_results' parameter is in beta.zAsync functions are not supported by `evaluate`. Please use `aevaluate` instead:

from langsmith import aevaluate

await aevaluate(
    async_target_function,
    data=data,
    evaluators=evaluators,
    # ... other parameters
)zeExpected at most one of 'experiment' or 'experiment_prefix', but both were provided. Got: experiment=z, experiment_prefix=)r*   r,   r.   r0   r2   r4   r5   r7   r9   r;   r=   r?   )r   callablerhis_async
ValueError	_evaluate)r@   r*   r,   r.   r0   r2   r4   r5   r7   r9   r;   r=   r?   s                X/var/www/html/ai-engine/env/lib/python3.11/site-packages/langsmith/evaluation/_runner.pyevaluaterK   ]   s    V  =;<<< 
BK// 


 

 
	
  
' 
S)S S?PS S
 
 	

 -+''%       Fload_nested,Union[str, uuid.UUID, schemas.TracerSession]c                  |pt          j        d          }t          | t          j                  r| nt          | |          }t          | ||          }	t          ||          fd|	D             }
t          |	|
|||||||	  	        S )a  Evaluate existing experiment runs.

    Args:
        experiment (Union[str, uuid.UUID]): The identifier of the experiment to evaluate.
        data (DATA_T): The data to use for evaluation.
        evaluators (Optional[Sequence[EVALUATOR_T]]): Optional sequence of evaluators to use for individual run evaluation.
        summary_evaluators (Optional[Sequence[SUMMARY_EVALUATOR_T]]): Optional sequence of evaluators
            to apply over the entire dataset.
        metadata (Optional[dict]): Optional metadata to include in the evaluation results.
        max_concurrency (Optional[int]): Optional maximum number of concurrent evaluations.
        client (Optional[langsmith.Client]): Optional Langsmith client to use for evaluation.
        load_nested: Whether to load all child runs for the experiment.
            Default is to only load the top-level root runs.
        blocking (bool): Whether to block until evaluation is complete.

    Returns:
        ExperimentResults: The evaluation results.

    Environment:
        - LANGSMITH_TEST_CACHE: If set, API calls will be cached to disk to save time and
            cost during testing. Recommended to commit the cache files to your repository
            for faster CI/CD runs.
            Requires the 'langsmith[vcr]' package to be installed.

    Examples:
        >>> from langsmith.evaluation import evaluate, evaluate_existing
        >>> dataset_name = "Evaluate Examples"
        >>> def predict(inputs: dict) -> dict:
        ...     # This can be any function or just an API call to your app.
        ...     return {"output": "Yes"}
        >>> # First run inference on the dataset
        ... results = evaluate(
        ...     predict,
        ...     data=dataset_name,
        ... )  # doctest: +ELLIPSIS
        View the evaluation results for experiment:...
        >>> # Then apply evaluators to the experiment
        ... def accuracy(run: Run, example: Example):
        ...     # Row-level evaluator for accuracy.
        ...     pred = run.outputs["output"]
        ...     expected = example.outputs["answer"]
        ...     return {"score": expected.lower() == pred.lower()}
        >>> def precision(runs: Sequence[Run], examples: Sequence[Example]):
        ...     # Experiment-level evaluator for precision.
        ...     # TP / (TP + FP)
        ...     predictions = [run.outputs["output"].lower() for run in runs]
        ...     expected = [example.outputs["answer"].lower() for example in examples]
        ...     # yes and no are the only possible answers
        ...     tp = sum([p == e for p, e in zip(predictions, expected) if p == "yes"])
        ...     fp = sum([p == "yes" and e == "no" for p, e in zip(predictions, expected)])
        ...     return {"score": tp / (tp + fp)}
        >>> experiment_name = (
        ...     results.experiment_name
        ... )  # Can use the returned experiment name
        >>> experiment_name = "My Experiment:64e6e91"  # Or manually specify
        >>> results = evaluate_existing(
        ...     experiment_name,
        ...     summary_evaluators=[precision],
        ... )  # doctest: +ELLIPSIS
        View the evaluation results for experiment:...
    )i N  i_ )
timeout_msrM   c                Z    g | ]'}t          t          j        |j                           (S  )r   uuidUUIDreference_example_id).0rundata_maps     rJ   
<listcomp>z%evaluate_existing.<locals>.<listcomp>}  s-    PPPCHT$)S%=>>?PPPrL   )r*   r,   r.   r0   r5   r9   r;   r=   )	rtget_cached_client
isinstancer   TracerSession_load_experiment_load_traces_load_examples_maprI   )r=   r,   r.   r0   r5   r9   rM   r;   projectrunsr*   rY   s              @rJ   evaluate_existingrd   -  s    P Hr+7GHHHF j'"788	2

j&11 
 
FDDDD!&'22HPPPP4PPPD-'
 
 
 
rL   c                  .    e Zd ZU ded<   ded<   ded<   dS )ExperimentResultRowschemas.RunrX   schemas.Exampleexampler!   evaluation_resultsN__name__
__module____qualname____annotations__rS   rL   rJ   rf   rf     s6         ))))))rL   rf   c                  p    e Zd ZdZdddZedd
            ZddZddZd dZ		 d!d"dZ
ddZddZddZdS )#rC   a  Represents the results of an evaluate() call.

    This class provides an iterator interface to iterate over the experiment results
    as they become available. It also provides methods to access the experiment name,
    the number of results, and to wait for the results to be processed.

    Methods:
        experiment_name() -> str: Returns the name of the experiment.
        wait() -> None: Waits for the experiment data to be processed.
    Texperiment_manager_ExperimentManagerr;   r<   c                0   || _         g | _        t          j                    | _        t          j                    | _        |s:t          j        | j	                  | _
        | j
                                         d S d | _
        | 	                                 d S )Nr@   )_manager_resultsqueueQueue_queue	threadingEvent_processing_completeThread_process_data_threadstart)selfrq   r;   s      rJ   __init__zExperimentResults.__init__  s    *358=$-O$5$5! 	!7@7G)8 8 8DL L     DL     rL   rB   strc                    | j         j        S N)ru   experiment_namer   s    rJ   r   z!ExperimentResults.experiment_name  s    },,rL   Iterator[ExperimentResultRow]c              #    K   d}| j                                         r1| j                                        r|t	          | j                  k     r	 |t	          | j                  k     r| j        |         V  |dz  }n| j                            dd           n# t          j        $ r Y w xY w| j                                         y| j                                        |t	          | j                  k     d S d S )Nr   r)   Tg?)blocktimeout)	r|   is_setry   emptylenrv   getrw   Empty)r   ixs     rJ   __iter__zExperimentResults.__iter__  s     )0022	;$$&&	 C&&&&DM****-++++!GBBKOO$O<<<;    )0022	;$$&&	 C&&&&&&&&s   A	B B,+B,Nonec                J   t                      }| j                                        } ||          D ]6}| j                            |           | j                            |           7| j                                        }|| _        | j	        
                                 d S r   )
_load_tqdmru   get_resultsry   putrv   appendget_summary_scores_summary_resultsr|   set)r   tqdmresultsitemsummary_scoress        rJ   r~   zExperimentResults._process_data  s    ||-++--DMM 	' 	'DKOOD!!!M  &&&&99;; .!%%'''''rL   r8   c                *    t          | j                  S r   )r   rv   r   s    rJ   __len__zExperimentResults.__len__  s    4=!!!rL   r   Nr   r6   end	DataFramec                0    t          | j        ||          S )Nr   r   )
_to_pandasrv   )r   r   r   s      rJ   	to_pandaszExperimentResults.to_pandas  s     $-u#>>>>rL   c                    dd l }| j        rB|j                            d          r(|                                 }|                                S |                                 S )Nr   pandas)importlib.utilrv   util	find_specr   _repr_html___repr__)r   	importlibdfs      rJ   r   zExperimentResults._repr_html_  s\    = 	#Y^55h?? 	#!!B>>###==??"rL   c                    d| j          dS )Nz<ExperimentResults >)r   r   s    rJ   r   zExperimentResults.__repr__  s    <T%9<<<<rL   c                J    | j         r| j                                          dS dS )zWait for the evaluation runner to complete.

        This method blocks the current thread until the evaluation runner has
        finished its execution.
        N)r   joinr   s    rJ   waitzExperimentResults.wait  s2     < 	 L	  	 rL   )T)rq   rr   r;   r<   rB   r   )rB   r   rB   r   )rB   r8   r   N)r   r6   r   r6   rB   r   )rl   rm   rn   __doc__r   propertyr   r   r~   r   r   r   r   r   rS   rL   rJ   rC   rC     s        	 	! ! ! ! ! - - - X-    
( 
( 
( 
(" " " " >B? ? ? ? ?
# # # #= = = =           rL      !Sequence[COMPARATIVE_EVALUATOR_T]randomize_orderexperiments3Tuple[Union[str, uuid.UUID], Union[str, uuid.UUID]]ComparativeExperimentResultsc	          
     D  & t          |           dk     rt          d          |st          d          |dk     rt          d          pt          j                    fd| D             }	d |	D             }
t          t	          |
                    dk    st          d	          d
 |	D             }|Sd |	D             }d                    |          dz   t          t          j                    j	        dd                   z   }n3|dz   t          t          j                    j	        dd                   z   }t          j                    }
                    |||||          &t          t          t          t          j        t          j        f         t!          |	                    &           fd| D             }d}|D ]}d |D             }||}||z  }|t#          |          ng }d |D             }d}i }t%          dt          |          |          D ]]}||||z            }                    |	d         j        |	d         j                            d          |          D ]}|||j        <   ^t1          j        t"                    }|D ]H}|D ]C}|j        |v r8|t          t          j        |j                                               |           DId |pg D             }i }d'&fd#}t;                      }t=          j        |pd$          5 } g }! ||                                           D ]\  }"}d%|i||"<   |D ]d}#|dk    r5| !                    ||||"         |#|           }$|!                    |$           = ||||"         |#|           }%|%||"         d&|%j"         <   e|!r@tG          j$        |!           |!D ])}$|$%                                }%|%||"         d&|%j"         <   *	 ddd           n# 1 swxY w Y   tM          ||          S )(a!  Evaluate existing experiment runs against each other.

    This lets you use pairwise preference scoring to generate more
    reliable feedback in your experiments.

    Args:
        experiments (Tuple[Union[str, uuid.UUID], Union[str, uuid.UUID]]):
            The identifiers of the experiments to compare.
        evaluators (Sequence[COMPARATIVE_EVALUATOR_T]):
            A list of evaluators to run on each example.
        experiment_prefix (Optional[str]): A prefix to provide for your experiment name.
            Defaults to None.
        description (Optional[str]): A free-form text description for the experiment.
        max_concurrency (int): The maximum number of concurrent evaluations to run.
            Defaults to 5.
        client (Optional[langsmith.Client]): The LangSmith client to use.
            Defaults to None.
        metadata (Optional[dict]): Metadata to attach to the experiment.
            Defaults to None.
        load_nested (bool): Whether to load all child runs for the experiment.
            Default is to only load the top-level root runs.
        randomize_order (bool): Whether to randomize the order of the outputs for each evaluation.
            Default is False.

    Returns:
        ComparativeExperimentResults: The results of the comparative evaluation.

    Examples:
        Suppose you want to compare two prompts to see which one is more effective.
        You would first prepare your dataset:

        >>> from typing import Sequence
        >>> from langsmith import Client
        >>> from langsmith.evaluation import evaluate
        >>> from langsmith.schemas import Example, Run
        >>> client = Client()
        >>> dataset = client.clone_public_dataset(
        ...     "https://smith.langchain.com/public/419dcab2-1d66-4b94-8901-0357ead390df/d"
        ... )
        >>> dataset_name = "Evaluate Examples"

        Then you would run your different prompts:
        >>> import functools
        >>> import openai
        >>> from langsmith.evaluation import evaluate
        >>> from langsmith.wrappers import wrap_openai
        >>> oai_client = openai.Client()
        >>> wrapped_client = wrap_openai(oai_client)
        >>> prompt_1 = "You are a helpful assistant."
        >>> prompt_2 = "You are an exceedingly helpful assistant."
        >>> def predict(inputs: dict, prompt: str) -> dict:
        ...     completion = wrapped_client.chat.completions.create(
        ...         model="gpt-3.5-turbo",
        ...         messages=[
        ...             {"role": "system", "content": prompt},
        ...             {
        ...                 "role": "user",
        ...                 "content": f"Context: {inputs['context']}"
        ...                 f"\n\ninputs['question']",
        ...             },
        ...         ],
        ...     )
        ...     return {"output": completion.choices[0].message.content}
        >>> results_1 = evaluate(
        ...     functools.partial(predict, prompt=prompt_1),
        ...     data=dataset_name,
        ...     description="Evaluating our basic system prompt.",
        ...     blocking=False,  # Run these experiments in parallel
        ... )  # doctest: +ELLIPSIS
        View the evaluation results for experiment:...
        >>> results_2 = evaluate(
        ...     functools.partial(predict, prompt=prompt_2),
        ...     data=dataset_name,
        ...     description="Evaluating our advanced system prompt.",
        ...     blocking=False,
        ... )  # doctest: +ELLIPSIS
        View the evaluation results for experiment:...
        >>> results_1.wait()
        >>> results_2.wait()
        >>> import time
        >>> time.sleep(10)  # Wait for the traces to be fully processed

            Finally, you would compare the two prompts directly:
        >>> import json
        >>> from langsmith.evaluation import evaluate_comparative
        >>> def score_preferences(runs: list, example: schemas.Example):
        ...     assert len(runs) == 2  # Comparing 2 systems
        ...     assert isinstance(example, schemas.Example)
        ...     assert all(run.reference_example_id == example.id for run in runs)
        ...     pred_a = runs[0].outputs["output"]
        ...     pred_b = runs[1].outputs["output"]
        ...     ground_truth = example.outputs["answer"]
        ...     tools = [
        ...         {
        ...             "type": "function",
        ...             "function": {
        ...                 "name": "rank_preferences",
        ...                 "description": "Saves the prefered response ('A' or 'B')",
        ...                 "parameters": {
        ...                     "type": "object",
        ...                     "properties": {
        ...                         "reasoning": {
        ...                             "type": "string",
        ...                             "description": "The reasoning behind the choice.",
        ...                         },
        ...                         "preferred_option": {
        ...                             "type": "string",
        ...                             "enum": ["A", "B"],
        ...                             "description": "The preferred option, either 'A' or 'B'",
        ...                         },
        ...                     },
        ...                     "required": ["preferred_option"],
        ...                 },
        ...             },
        ...         }
        ...     ]
        ...     completion = openai.Client().chat.completions.create(
        ...         model="gpt-3.5-turbo",
        ...         messages=[
        ...             {"role": "system", "content": "Select the better response."},
        ...             {
        ...                 "role": "user",
        ...                 "content": f"Option A: {pred_a}"
        ...                 f"\n\nOption B: {pred_b}"
        ...                 f"\n\nGround Truth: {ground_truth}",
        ...             },
        ...         ],
        ...         tools=tools,
        ...         tool_choice={
        ...             "type": "function",
        ...             "function": {"name": "rank_preferences"},
        ...         },
        ...     )
        ...     tool_args = completion.choices[0].message.tool_calls[0].function.arguments
        ...     loaded_args = json.loads(tool_args)
        ...     preference = loaded_args["preferred_option"]
        ...     comment = loaded_args["reasoning"]
        ...     if preference == "A":
        ...         return {
        ...             "key": "ranked_preference",
        ...             "scores": {runs[0].id: 1, runs[1].id: 0},
        ...             "comment": comment,
        ...         }
        ...     else:
        ...         return {
        ...             "key": "ranked_preference",
        ...             "scores": {runs[0].id: 0, runs[1].id: 1},
        ...             "comment": comment,
        ...         }
        >>> def score_length_difference(runs: list, example: schemas.Example):
        ...     # Just return whichever response is longer.
        ...     # Just an example, not actually useful in real life.
        ...     assert len(runs) == 2  # Comparing 2 systems
        ...     assert isinstance(example, schemas.Example)
        ...     assert all(run.reference_example_id == example.id for run in runs)
        ...     pred_a = runs[0].outputs["output"]
        ...     pred_b = runs[1].outputs["output"]
        ...     if len(pred_a) > len(pred_b):
        ...         return {
        ...             "key": "length_difference",
        ...             "scores": {runs[0].id: 1, runs[1].id: 0},
        ...         }
        ...     else:
        ...         return {
        ...             "key": "length_difference",
        ...             "scores": {runs[0].id: 0, runs[1].id: 1},
        ...         }
        >>> results = evaluate_comparative(
        ...     [results_1.experiment_name, results_2.experiment_name],
        ...     evaluators=[score_preferences, score_length_difference],
        ...     client=client,
        ... )  # doctest: +ELLIPSIS
        View the pairwise evaluation results at:...
        >>> eval_results = list(results)
        >>> assert len(eval_results) >= 10  # doctest: +SKIP
        >>> assert all(
        ...     "feedback.ranked_preference" in r["evaluation_results"]
        ...     for r in eval_results
        ... )  # doctest: +SKIP
        >>> assert all(
        ...     "feedback.length_difference" in r["evaluation_results"]
        ...     for r in eval_results
        ... )  # doctest: +SKIP
       z7Comparative evaluation requires at least 2 experiments.z>At least one evaluator is required for comparative evaluation.r   z+max_concurrency must be a positive integer.c                0    g | ]}t          |          S rS   )r_   )rW   r=   r9   s     rJ   rZ   z(evaluate_comparative.<locals>.<listcomp>  s$    SSS V44SSSrL   c                6    g | ]}t          |j                  S rS   )r   reference_dataset_idrW   ps     rJ   rZ   z(evaluate_comparative.<locals>.<listcomp>  s#    CCCQS/00CCCrL   r)   z5All experiments must have the same reference dataset.c                    g | ]	}|j         
S rS   idr   s     rJ   rZ   z(evaluate_comparative.<locals>.<listcomp>  s    ---qad---rL   Nc                *    g | ]}|j         	|j         S r   namer   s     rJ   rZ   z(evaluate_comparative.<locals>.<listcomp>  s!    KKKq8JAF8J8J8JrL   z vs. -      )r   r4   r0   r   c                4    g | ]}t          |           S )rQ   )r`   )rW   r=   r9   rM   s     rJ   rZ   z(evaluate_comparative.<locals>.<listcomp>  s8        	Z[AAA  rL   c                    h | ]	}|j         
S rS   )rV   )rW   rX   s     rJ   	<setcomp>z'evaluate_comparative.<locals>.<setcomp>  s    III33IIIrL   c                    g | ]}||S r   rS   )rW   eids     rJ   rZ   z(evaluate_comparative.<locals>.<listcomp>  s    JJJ3#/3///rL   c   dataset_version)
dataset_idas_ofexample_idsc                ,    g | ]}t          |          S rS   )r$   )rW   	evaluators     rJ   rZ   z(evaluate_comparative.<locals>.<listcomp>  s!    UUUy'	22UUUrL   	runs_listlist[schemas.Run]ri   rh   
comparatorr   executorcf.ExecutorrB   r   c                T   t          j                    }rt          j        |            t	          j        d	          5  |                    | |          	t          d          	 d d d            n# 1 swxY w Y   t          j	        t                    rfdj        D             nj	        pi }j                                        D ]V\  }}|                    	j        |j        ||                    t          |                    
j        j        |           WS )Nr,   )project_namer9   z&Client is required to submit feedback.c                :    i | ]}t          |          j        S rS   )r   comment)rW   ridresults     rJ   
<dictcomp>zNevaluate_comparative.<locals>.evaluate_and_submit_feedback.<locals>.<dictcomp>  s#    ???#SXXv~???rL   )run_idkeyscorer   comparative_experiment_idsource_run_idfeedback_group_id)rT   uuid4randomshufflerF   tracing_contextcompare_runsrH   r]   r   r   scoresitemssubmitcreate_feedbackr   r   r   r   )r   ri   r   r   r   commentsr   r   r   r9   comparative_experimentr   s           @rJ   evaluate_and_submit_feedbackz:evaluate_comparative.<locals>.evaluate_and_submit_feedback  s    !JLL 	&N9%%%\&III 	K 	K,,Y@@F~ !IJJJ 	K 	K 	K 	K 	K 	K 	K 	K 	K 	K 	K 	K 	K 	K 	K &.#..(????????.&B 	
 $]0022 
	 
	MFEOO&J S[[11*@*C$2"3  	 	 	 	 s   (A77A;>A;max_workersrc   	feedback.)
r   r   ri   rh   r   r   r   r   rB   r   )'r   rH   r[   r\   r   r   r   rT   r   hexcreate_comparative_experiment#_print_comparative_experiment_startr   r   r   TracerSessionResulttuplelistrangelist_examplesr   r0   r   r   collectionsdefaultdictrV   rU   r   r   ls_utilsContextThreadPoolExecutorr   r   r   cfr   r   r   )'r   r,   r2   r4   r5   r9   r0   rM   r   projectsref_datasets_experiment_idsexperiment_namesr   r   rc   examples_intersectionr   example_ids_setexample_ids_nullabler   
batch_sizer*   iexample_ids_batche	runs_dictrX   comparatorsr   r   r   r   futures
example_idr   futurer   r   s'        ` ``                             @rJ   evaluate_comparativer    s   H ;!RSSS 
L
 
 	
 FGGG-r+--F TSSS{SSSHCC(CCCMs=!!""a''PQQQ--H---N KKHKKKLL)**S03tz||7G7K3L3LL 	 ,c1C
8H!8L4M4MM $
#AA"$ B   ('-w/JJK(OO	
 	
 	      %  D
 ! 5 5	IIyIII ($3!!!_4!!'<'H"###b  KJ"6JJJK JD1c+&&
33  'A
N(:;%%{71+&**+<==) & 
 
 	 	A
 DJJ	 5@4KD4Q4QI Q Q	 	Q 	QC'4//$ty#*BCCDKKCPPP	Q VUJDTRTUUUKG       > <<D		+#(q
 
 
 K	%)T)//*;*;%<%< 	K 	K!J	#)9"5GJ) K K
"Q&&%__4!Z("  F NN6****99!4
#3Z F EKGJ'(@FJ(@(@AA K   % K KF#]]__FDJGJ'(@FJ(@(@AA+	K	K K K K K K K K K K K K K K K6 (666s   $CPPPc                  *    e Zd ZdZ	 d
ddZd Zd	 ZdS )r   a  Represents the results of an evaluate_comparative() call.

    This class provides an iterator interface to iterate over the experiment results
    as they become available. It also provides methods to access the experiment name,
    the number of results, and to wait for the results to be processed.

    Methods:
        experiment_name() -> str: Returns the name of the experiment.
        wait() -> None: Waits for the experiment data to be processed.
    Nr   dictexamples*Optional[Dict[uuid.UUID, schemas.Example]]c                "    || _         || _        d S r   )rv   	_examples)r   r   r  s      rJ   r   z%ComparativeExperimentResults.__init__N  s    
  !rL   c                    | j         |         S )z0Return the result associated with the given key.)rv   )r   r   s     rJ   __getitem__z(ComparativeExperimentResults.__getitem__V  s    }S!!rL   c              #  ~   K   | j                                         D ] \  }}| j        r| j        |         nd |dV  !d S )N)ri   rj   )rv   r   r!  )r   r   values      rJ   r   z%ComparativeExperimentResults.__iter__Z  sh      ---// 	 	JC26.J4>#..d&+     	 	rL   r   )r   r  r  r  )rl   rm   rn   r   r   r#  r   rS   rL   rJ   r   r   B  s\        	 	 @D" " " " "" " "    rL   3Tuple[schemas.TracerSession, schemas.TracerSession]r   schemas.ComparativeExperimentr   c                @   | d         j         p| d         j         }|r|                    d          d         }|j        }|                    d          d         }| d| dd                    d | D                        d	|j         }t          d
| d           d S d S )Nr   r)   ?/projects/p/
/datasets//compare?selectedSessions=z%2Cc                6    g | ]}t          |j                  S rS   )r   r   rW   r  s     rJ   rZ   z7_print_comparative_experiment_start.<locals>.<listcomp>p  s     +K+K+K!CII+K+K+KrL   z&comparativeExperiment=z)View the pairwise evaluation results at:


)urlsplitr   r   r   print)r   r   r0  project_urlr   base_urlcomparison_urls          rJ   r   r   e  s     a.

2A 2C
 
iinnQ'+@
$$^44Q7 B B: B B %

+K+K{+K+K+K L LB B&<&?B B 	
 	MMMM	
 	
 	
 	
 	

 
rL   0Union[TARGET_T, Iterable[schemas.Run], Runnable]c                >    t          |           pt          |           S r   )rE   _is_langchain_runnablert   s    rJ   _is_callabler9  x  s    F=5f===rL   c          
        |	pt          j                    }	t          |           rd n$t          t          t
          j                 |           }t          |||	          \  }}t          ||	||p|||||          	                                }t          j        d           }|rt          j        |          |j         dz  nd }t          j        ||	j        g          5  t          |           r*|                    t          t$          |           |          }|r|                    ||          }|r|                    |          }t+          ||
          }|cd d d            S # 1 swxY w Y   d S )N)r9   r0   r=   r4   r7   rc   r?   z.yaml)ignore_hostsr5   )r;   )r[   r\   r9  r   r   r   Run_resolve_experimentrr   r   r  get_cache_dirpathlibPathr   with_optional_cacheapi_urlwith_predictionsTARGET_Twith_evaluatorswith_summary_evaluatorsrC   )r@   r*   r,   r.   r0   r2   r4   r5   r7   r9   r;   r=   r?   rc   experiment_manager	cache_dir
cache_pathr   s                      rJ   rI   rI   |  s   " -r+--F''P44T(7;2G-P-PD+ K !3"3'%   egg  &t,,IBKUYW%7">">">>>QU  
	%j?O	P	P	P   	..Xv&& /  G  	--O .  G  	J556HIIG#Gh???!                 s   A<E''E+.E+r%  r   c                R    	 t          j        |            dS # t          $ r Y dS w xY w)NTF)rT   rU   rH   )r%  s    rJ   _is_uuidrM    s>    	%t   uus    
&&rb   Union[str, uuid.UUID]langsmith.Clientschemas.TracerSessionResultc                    t          | t          j                  st          |           r|                    |           S |                    |           S )N
project_id)r   )r]   rT   rU   rM  read_project)rb   r9   s     rJ   r_   r_     sU     '49%% 7'):): 7""g"666G444rL   List[schemas.Run]c                   |rdnd}t          | t          j                  r|                    | j        |          }nXt          | t
          j                  st          |           r|                    | |          }n|                    | |          }|st          |          S t          j
        t                    }g }i }|D ]I}|j        !||j                                     |           n|                    |           |||j        <   J|                                D ]"\  }	}
t          |
d           ||	         _        #|S )z'Load nested traces for a given project.NT)rS  is_root)r   rW  c                    | j         S r   )dotted_order)rs    rJ   <lambda>z_load_traces.<locals>.<lambda>  s    q~ rL   )r   )r]   r   r^   	list_runsr   rT   rU   rM  r  r  r  parent_run_idr   r   sorted
child_runs)rb   r9   rM   rW  rc   treemapr   all_runsrX   r   r_  s              rJ   r`   r`     sf    "+ddtG'7011 G7:wGG	GTY	'	' G8G+<+< G7GDDWgFF Dzz9D9PQU9V9VGGH  (C%&--c2222NN3%mmoo W W
&,Z=U=U&V&V&V##NrL   schemas.TracerSession Dict[uuid.UUID, schemas.Example]c                ~    d |                      |j        |j                            d                    D             S )Nc                    i | ]
}|j         |S rS   r   r.  s     rJ   r   z&_load_examples_map.<locals>.<dictcomp>  s,        	
a  rL   r   )r   r   )r  r   r0   r   )r9   rb   s     rJ   ra   ra     sS     %%3"&&'899 & 
 
   rL   ITCallable[[IT], IT]c                 <    	 ddl m}  n# t          $ r d cY S w xY w| S )Nr   r   c                    | S r   rS   )xs    rJ   r[  z_load_tqdm.<locals>.<lambda>  s     rL   )	tqdm.autor   ImportErrorri  s    rJ   r   r     sJ    """""""   {Ks   	 ET_ExperimentManagerMixin)boundc                  \    e Zd Z	 	 	 ddd
Zedd            ZddZd Zd dZd!dZ	d"dZ
dS )#ro  Nr=   +Optional[Union[schemas.TracerSession, str]]r0   r1   r9   r:   r4   r3   c                  |pt          j                    | _        d | _        |t	                      | _        ntt          |t                    r9|dz   t          t          j	                    j
        d d                   z   | _        n&t          t          |j                  | _        || _        |pi }|                    d          s*dt          j                                        d          i|}|pi | _        || _        d S )Nr   r   revision_id)r[   r\   r9   _experiment_get_random_name_experiment_namer]   r   rT   r   r   r   r   r   ls_envget_langchain_env_var_metadata	_metadata_description)r   r=   r0   r9   r4   s        rJ   r   z _ExperimentManagerMixin.__init__  s     6 4 6 6<@$4$6$6D!!
C(( 	*$.$4s4:<<;KBQB;O7P7P$PD!!$(jo$>$>D!)D>r||M** 	vDFFJJ!    	H "R'rL   rB   r   c                <    | j         | j         S t          d          )Nz=Experiment name not provided, and experiment not yet started.)rw  rH   r   s    rJ   r   z'_ExperimentManagerMixin.experiment_name  s)     ,((K
 
 	
rL   rb  c                <    | j         t          d          | j         S )NExperiment not started yet.)ru  rH   r   s    rJ   _get_experimentz'_ExperimentManagerMixin._get_experiment&  s#    #:;;;rL   c                    | j         pi }t          j                    }|ri |d|i}| j        ri | j        j        |}|S )Ngit)rz  rx  get_git_inforu  r0   )r   project_metadatagit_infos      rJ   _get_experiment_metadataz0_ExperimentManagerMixin._get_experiment_metadata+  sw    >/R&(( 	 " x     	 "+ "   rL   r   	uuid.UUIDr  c           	     J   | j         }d}t          |          D ]x}	 | j                            | j         | j        ||          c S # t
          j        $ r: | dt          t          j	                    j
        d d                    | _         Y uw xY wt          d| d          )N
   )r4   r   r0   r      z+Could not find a unique experiment name in z= attempts. Please try again with a different experiment name.)rw  r  r9   create_projectr{  r  LangSmithConflictErrorr   rT   r   r   rH   )r   r   r0   starting_namenum_attempts_s         rJ   _create_experimentz*_ExperimentManagerMixin._create_experiment:  s     -|$$ 		W 		WAW{11) $ 1)3%	 2      2 W W W+8(V(V3tz||?OPRQRPR?S;T;T(V(V%%%WB, B B B
 
 	
s   'AABBfirst_examplerh   c                    | j         0|                                 }|                     |j        |          }n| j         }|S r   )ru  r  r  r   )r   r  r  rb   s       rJ   _get_projectz$_ExperimentManagerMixin._get_projectO  sL    ##<<>>--(*: GG &GrL   rb   Optional[schemas.TracerSession]r   c                   |ru|j         rn|j                             d          d         }|j        }|                    d          d         }| d| d|j         }t	          d| j         d| d           d S t	          d	| j                   d S )
Nr)  r   r*  r+  r,  z-View the evaluation results for experiment: 'z' at:
r/  z%Starting evaluation of experiment: %s)r0  r1  r   r   r2  r   )r   rb   r  r3  r   r4  r5  s          rJ   _print_experiment_startz/_ExperimentManagerMixin._print_experiment_startY  s      	w{ 	!+++C003K&1J"((88;H 1 1z 1 1$+J1 1  .@T . .'. . .     79M    rL   )NNN)r=   rr  r0   r1   r9   r:   r4   r3   r   )rB   rb  )r   r  r0   r  rB   rb  )r  rh   rB   rb  )rb   r  r  rh   rB   r   )rl   rm   rn   r   r   r   r  r  r  r  r  rS   rL   rJ   ro  ro     s        
 $(-1%)( ( ( ( (: 
 
 
 X
       
     
 
 
 
*        rL   c                      e Zd ZdZ	 	 	 	 	 	 	 	 dCdD fdZedEd            ZedFd            ZedGd            ZedHd!            Z	dId"Z
	 dJdKd'Zdd(dLd+ZdMd.ZdNd0ZdOd2Z	 dJdPd4ZdQd:Z	 dJdRd;ZdSd=ZdTd>ZdUd@ZdVdBZ xZS )Wrr   aR  Manage the execution of experiments.

    Supports lazily running predictions and evaluations in parallel to facilitate
    result streaming and early debugging.

    Args:
        data (DATA_T): The data used for the experiment. Can be a dataset name or ID OR
            a generator of examples.
        num_repetitions (int): The number of times to run over the data.
        runs (Optional[Iterable[schemas.Run]]): The runs associated with the experiment
            predictions.
        experiment (Optional[schemas.TracerSession]): The tracer session
            associated with the experiment.
        experiment_prefix (Optional[str]): The prefix for the experiment name.
        metadata (Optional[dict]): Additional metadata for the experiment.
        client (Optional[langsmith.Client]): The Langsmith client used for
             the experiment.
        evaluation_results (Optional[Iterable[EvaluationResults]]): The evaluation
            sresults for the experiment.
        summary_results (Optional[Iterable[EvaluationResults]]): The aggregate results
            for the experiment.
    Nr)   Tr=   rr  r0   r1   r9   r:   rc   Optional[Iterable[schemas.Run]]rj   %Optional[Iterable[EvaluationResults]]summary_resultsr4   r3   r7   r8   r?   r<   r*   r+   c                   t                                          ||||           || _        d | _        || _        || _        || _        |	| _        |
| _        d S )N)r=   r0   r9   r4   )	superr   _datar!  _runs_evaluation_resultsr   _num_repetitions_upload_results)r   r*   r=   r0   r9   rc   rj   r  r4   r7   r?   	__class__s              rJ   r   z_ExperimentManager.__init__  so     	!#	 	 	
 	
 	
 
>B
#5  / /-rL   rB   Iterable[schemas.Example]c                .   | j         lt          | j        | j                  | _         | j        dk    rAt
          j                            t          j        | j         | j                            | _         t          j        | j                   \  | _         }|S )N)r9   r)   )	r!  _resolve_datar  r9   r  	itertoolschainfrom_iterabletee)r   examples_iters     rJ   r  z_ExperimentManager.examples  s}    >!*4:dkJJJDN$q((!*!>!>M$.$2GHH" " )2dn(E(E%rL   r   c                   | j         t          | j         dd           s5t          t          | j                            }t          |j                  S t          t          t          j	        | j                   j
                  S )Nr   )ru  getattrnextiterr  r   r   r   r   r  r   )r   ri   s     rJ   r   z_ExperimentManager.dataset_id  sw    #74d,
 ,
# 4..//Gw)***,d.>??T
 
 	
rL   Iterable[EvaluationResults]c                @    | j         d | j        D             S | j         S )Nc              3     K   | ]}d g iV  	dS )r   NrS   )rW   r  s     rJ   	<genexpr>z8_ExperimentManager.evaluation_results.<locals>.<genexpr>  s&      ;;YO;;;;;;rL   )r  r  r   s    rJ   rj   z%_ExperimentManager.evaluation_results  s*    #+;;T];;;;''rL   Iterable[schemas.Run]c                t    | j         t          d          t          j        | j                   \  | _         }|S )Nz;Runs not provided in this experiment. Please predict first.)r  rH   r  r  )r   	runs_iters     rJ   rc   z_ExperimentManager.runs  s?    :P   !*dj 9 9
IrL   c           	     J   t          t          j        | j        d                    }| j        r|                     |          nd }|                     ||           | j        | j        d<   | 	                    | j        || j        | j
        | j        | j        | j                  S )Nr)   r7   )r=   r0   r9   rc   rj   r?   )r  r  islicer  r  r  r  r  rz  r  r9   r  r  )r   r  rb   s      rJ   r   z_ExperimentManager.start  s    Y-dmQ??@@6:6JT$##M222PT$$Wm<<<,0,A()~~M^;#7/  
 
 	
rL   r5   r6   r@   rE  c                  t                      }|                    | j        ||          }t          j        |d          \  }}t          d |D             | j        | j        | j        d |D             | j	                  S )z3Lazily apply the target function to the experiment.r<  r   c              3  &   K   | ]}|d          V  dS ri   NrS   rW   preds     rJ   r  z6_ExperimentManager.with_predictions.<locals>.<genexpr>  s&      ,,T)_,,,,,,rL   c              3  &   K   | ]}|d          V  dS rX   NrS   r  s     rJ   r  z6_ExperimentManager.with_predictions.<locals>.<genexpr>  s&      --$$u+------rL   )r=   r0   r9   rc   r?   )
r   rX   _predictr  r  rr   ru  rz  r9   r  )r   r@   r5   context_experiment_resultsr1r2s          rJ   rD  z#_ExperimentManager.with_predictions  s     ..%kkM6? * 
 
 2A66B!,,,,,'^;--"---/
 
 
 	
rL   r<  r,   *Sequence[Union[EVALUATOR_T, RunEvaluator]]c          
     D   t          |          }t                      }|                    | j        ||          }t	          j        |d          \  }}}t          d |D             | j        | j        | j	        d |D             d |D             | j
        | j                  S )z7Lazily apply the provided evaluators to the experiment.r<     c              3  &   K   | ]}|d          V  dS r  rS   rW   r   s     rJ   r  z5_ExperimentManager.with_evaluators.<locals>.<genexpr>  s'      006VI000000rL   c              3  &   K   | ]}|d          V  dS r  rS   r  s     rJ   r  z5_ExperimentManager.with_evaluators.<locals>.<genexpr>  s&      11F&-111111rL   c              3  &   K   | ]}|d          V  dS )rj   NrS   r  s     rJ   r  z5_ExperimentManager.with_evaluators.<locals>.<genexpr>  s(      NN'; <NNNNNNrL   r=   r0   r9   rc   rj   r  r?   )_resolve_evaluatorsr   rX   _scorer  r  rr   ru  rz  r9   r   r  )r   r,   r5   r  experiment_resultsr  r  r3s           rJ   rF  z"_ExperimentManager.with_evaluators  s     )44
..$[[K_ ) 
 

 ]#5q99
B!00R000'^;11b111NN2NNN 1/	
 	
 	
 		
rL   r.   Sequence[SUMMARY_EVALUATOR_T]c           
         t          |          }t                      }|                    | j        |          }t	          | j        | j        | j        | j        | j	        | j
        || j                  S )z?Lazily apply the provided summary evaluators to the experiment.r  )_wrap_summary_evaluatorsr   rX   _apply_summary_evaluatorsrr   r  ru  rz  r9   rc   r  r  )r   r.   wrapped_evaluatorsr  aggregate_feedback_gens        rJ   rG  z*_ExperimentManager.with_summary_evaluators  s{    
 66HII..!(*,>"
 "
 "M'^;#72/	
 	
 	
 		
rL   Iterable[ExperimentResultRow]c              #     K   t          | j        | j        | j                  D ]\  }}}t	          |||          V  dS )z?Return the traces, evaluation results, and associated examples.rX   ri   rj   N)ziprc   r  rj   rf   )r   rX   ri   rj   s       rJ   r   z_ExperimentManager.get_results"  sp      03It}d&=1
 1
 	 	,C, &#5      	 	rL   Dict[str, List[dict]]c                >    | j         dg iS dd | j         D             iS )zCIf summary_evaluators were applied, consume and return the results.Nr   c                (    g | ]}|d          D ]}|S r   rS   )rW   r   ress      rJ   rZ   z9_ExperimentManager.get_summary_scores.<locals>.<listcomp>3  sD       "9-      rL   )r   r   s    rJ   r   z%_ExperimentManager.get_summary_scores-  sC     (r?"   #4  
 	
rL   &Generator[_ForwardResults, None, None]c          	   #     K   t          |          |dk    r5 j        D ],}t          | j         j         j         j                  V  -not          j        |          5  fd j        D             }t          j
        |          D ]}|                                V  	 ddd           n# 1 swxY w Y                                     dS )z(Run the target function on the examples.r   c                x    g | ]6}                     t          |j        j        j        j                  7S rS   )r   _forwardr   rz  r9   r  )rW   ri   r   fnr   s     rJ   rZ   z/_ExperimentManager._predict.<locals>.<listcomp>N  sX          OO ,,   rL   N)_ensure_traceabler  r  r   rz  r9   r  r  r	  r
  as_completedr   _end)r   r@   r5   ri   r  r  r   r  s   `     @@rJ   r  z_ExperimentManager._predict<  s[      v&&a=  (NK(      3ODD *      $(=   !og66 * *F --//))))** * * * * * * * * * * * * * *  			s   $AB44B8;B8Sequence[RunEvaluator]current_resultsrf   r   cf.ThreadPoolExecutorc                |   t          j                    }i |d         pi | j        |d         j        |d         j        d}t          j        di i |d|| j        sdnd| j        d5  |d         |d         }|d	         }|D ]}	 |                    |
          }	|d                             | j        	                    |	                     | j        r| j        
                    |	|           s# t          $ r	 t          |          }
t          fd|
D                       }|d                             | j        	                    |                     | j        r| j        
                    ||           n4# t          $ r'}t                              d|            Y d }~nd }~ww xY wt                              dt#          |           drj        nd dt#                     d           Y d d ww xY wt%          ||          cd d d            S # 1 swxY w Y   d S )Nr0   ri   rX   )r=   rV   reference_run_idr,   localT)r   r0   enabledr9   rj   rX   ri   r   )rX   	_executorc           	     ^    g | ])}t          |j        t                    d di          *S )errorT)r   r   r   extra)r    r   repr)rW   r   r  rX   s     rJ   rZ   z6_ExperimentManager._run_evaluators.<locals>.<listcomp>  sR     % % % %( !1(+25&,0GG+2D/	!" !" !"% % %rL   r  zError parsing feedback keys: zError running evaluator z on run  : exc_infor  rS   )rF   get_tracing_contextr   r   r   r  r9   evaluate_runextend_select_eval_results_log_evaluation_feedback	Exception_extract_feedback_keysr!   loggerdebugr  r  rf   )r   r,   r  r   current_contextr0   ri   eval_resultsr   evaluator_responsefeedback_keyserror_responsee2r  rX   s                @@rJ   _run_evaluatorsz"_ExperimentManager._run_evaluators_  s    022
z*0b
 #2(7	(B(E$3E$:$= 
  
 
! ,$*.*>H77D+  
 
 >	 >	 "%(C%i0G*+?@L' - -	,)2)?)? ' *@ * *&
 !+22889KLL   + <<.C8 =    !   (>y(I(I):% % % % % ,9% % %
* 
* 
* %Y/66 K<<^LL    /  K@@ .C8 A    %   %IR%I%IJJJ LLC4	?? C C*- 52C C9=aC C!% !        5> '#/  u>	 >	 >	 >	 >	 >	 >	 >	 >	 >	 >	 >	 >	 >	 >	 >	 >	 >	sc   +H1
A.C98H19HBF	H	
F:	F5	0H5F:	:AHH1HH11H58H5c           
   #    K   t          j        |          5 }|dk    rEt                      }|                                 D ]!}|                    | j        |||          V  "nt                      }|                                 D ]}|                    |                    | j        |||                     	 t          j
        |d          D ]-}|                                V  |                    |           .x# t          j        t          f$ r Y w xY wt          j
        |          D ]}|                                }|V  ddd           dS # 1 swxY w Y   dS )zRun the evaluators on the prediction stream.

        Expects runs to be available in the manager.
        (e.g. from a previous prediction step)
        r   r   gMbP?)r   N)r  r	  r   r   rX   r  r   addr   r
  r  r   removeTimeoutError)	r   r,   r5   r   r  r  r  r  r   s	            rJ   r  z_ExperimentManager._score  s      /'
 
 
 !	!!##&..'+'7'7'9'9  O!++,"' 	      %%'+'7'7'9'9  OKK  0&+$	    ')ogu&M&M&M 3 3F"(--//111#NN622223 O\:    og66 ! !F#]]__F LLLLC!	! !	! !	! !	! !	! !	! !	! !	! !	! !	! !	! !	! !	! !	! !	! !	! !	! !	!s7   B E9AC>=E>DED2EEE(Generator[EvaluationResults, None, None]c              #    K   g g }}t          | j        | j                  D ]/\  }}|                    |           |                    |           0g }t	          j                    5 }| j        r|                                 j        nd }t          j
                    }	i |	d         pi | j        |d}
t          j        di i |	d|
| j        | j        sdndd5  |D ]}	  |||          }| j                            ||j                  }|                    |           | j        rP|D ]M}|                    dh	          }|                    d
d           } |j        | j        j        fi |d ||d N# t*          $ r9}t,                              dt1          |           d| d           Y d }~d }~ww xY w	 d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   d|iV  d S )Nr0   )r=   experiment_idr,   r  T)r   r0   r9   r  )fn_nametarget_run_id)excludeevaluator_info)r   rS  source_infoz Error running summary evaluator r  r  r   rS   )r  rc   r  r   r  r	  r  r  r   rF   r  r   r   r9   r  rl   r  r  popr   r   r  r  r  r  )r   r.   rc   r  rX   ri   aggregate_feedbackr   rS  r   r0   r   summary_eval_resultflattened_resultsr   feedbackr  r  s                     rJ   r  z,_ExperimentManager._apply_summary_evaluators  sU      Rh	4=99 	% 	%LCKKOOG$$$$/11 +	X6:6JT--//22PTJ 466O":.4" #'"6%/ H #  %$0 ("k.2.BLww    ! ! "4  I.7ih.G.G+,0K,L,L/$-$6 -M - -) +112CDDD/ 
"*; 	" 	"+1;;?P;+Q+Q19>NPT1U1U /$(K$?!" !"&.!" ,0/90>!" !" !" !" !" !" %   UtIUURSUU%) %        )! ! ! ! ! ! ! ! ! ! ! ! ! ! !+	 +	 +	 +	 +	 +	 +	 +	 +	 +	 +	 +	 +	 +	 +	X ,------sb   &A4G$G BE:9G:
F=	/F8	3G8F=	=GG$G	G$G	G$$G(+G(c                    t          | j                  }d |D             }|rt          |          nd }|r|                                nd S )Nc                *    g | ]}|j         	|j         S rS   )modified_at)rW   exs     rJ   rZ   z;_ExperimentManager._get_dataset_version.<locals>.<listcomp>  s!    KKK"BNKr~KKKrL   )r  r  max	isoformat)r   r  r  max_modified_ats       rJ   _get_dataset_versionz'_ExperimentManager._get_dataset_version  sY    &&KKKKK /:C#k***t.=G((***4GrL   Optional[list[str]]c                   t          | j                  }t                      }|D ]}|j        ru|j                            d          r[t          |j        d         t                     r;|j        d         D ],}t          |t                    r|                    |           -~|                    d           t          |          S )Ndataset_splitbase)r  r  r   r0   r   r]   r   r  )r   r  splitsri   r1  s        rJ   _get_dataset_splitsz&_ExperimentManager._get_dataset_splits  s    && 
	# 
	#G 	#$((99	# w/@$GG	#
 %-o> * *E!%-- *

5)))* 

6""""F||rL   r   c                   | j         sd S | j        }|t          d          |                                 }|                                 |d<   |                                 |d<   | j                            |j        |j	        p-t          j
                            t          j        j                  i |j        |           d S )Nr~  r   dataset_splits)end_timer0   )r  ru  rH   r  r  r%  r9   update_projectr   r(  datetimenowtimezoneutcr0   )r   r=   r  s      rJ   r  z_ExperimentManager._end*  s    # 	F%
:;;;88::.2.G.G.I.I*+-1-E-E-G-G)*""M( < $$X%6%:;;%"	 	# 	
 	
 	
 	
 	
rL   )NNNNNNr)   T)r=   rr  r0   r1   r9   r:   rc   r  rj   r  r  r  r4   r3   r7   r8   r?   r<   r*   r+   )rB   r  r   )rB   r  )rB   r  )rB   rr   r   )r5   r6   r@   rE  rB   rr   )r,   r  r5   r6   rB   rr   )r.   r  rB   rr   )rB   r  )rB   r  )r5   r6   r@   rE  rB   r  )r,   r  r  rf   r   r  rB   rf   )r,   r  r5   r6   rB   r  )r.   r  rB   r  )rB   r3   )rB   r   r   )rl   rm   rn   r   r   r   r  r   rj   rc   r   rD  rF  rG  r   r   r  r  r  r  r  r%  r  __classcell__)r  s   @rJ   rr   rr   p  s%        8 $(-104DHAE%) #. . . . . . .8    X 
 
 
 X
 ( ( ( X(
    X
 
 
 
& *.	
 
 
 
 
> *.
 
 
 
 
 
>
 
 
 
*	 	 	 	
 
 
 
  EI! ! ! ! !FM M M Md *.+! +! +! +! +!Z4. 4. 4. 4.lH H H H   "
 
 
 
 
 
 
 
rL   rr   8Sequence[Union[EVALUATOR_T, RunEvaluator, AEVALUATOR_T]]r  c                (   g }| D ]}t          |t                    r|                    |           -t          |t                    r(|                    |                                           j|                    t          |                     |S r   )r]   r"   r   r&   as_run_evaluatorr%   )r,   r   r   s      rJ   r  r  ?  s     G 5 5	i.. 	5NN9%%%%	#;<< 	5NN955778888NN=334444NrL   r  List[SUMMARY_EVALUATOR_T]c                X    dd}g }| D ] }|                      ||                     !|S )Nr   r   rB   c                     t           dd          t                      t          j                   d
 fd	            }|S )Nrl   BatchEvaluatorrc   Sequence[schemas.Run]r  Sequence[schemas.Example]rB   *Union[EvaluationResult, EvaluationResults]c                     t          j                  d fd            } |dt                      d	d
t                     d	          S )Nr   runs_r   	examples_rB   r8  c                P     t                    t                              S r   )r  )r:  r;  r   r  rc   s     rJ   _wrapper_super_innerz]_wrap_summary_evaluators.<locals>._wrap.<locals>._wrapper_inner.<locals>._wrapper_super_innerX  s#     !ydT(^^<<<rL   zRuns[] (Length=)zExamples[] (Length=)r:  r   r;  r   rB   r8  )rF   	traceabler   )rc   r  r=  	eval_namer   s   `` rJ   _wrapper_innerz?_wrap_summary_evaluators.<locals>._wrap.<locals>._wrapper_innerT  s     \y)))= = = = = = = *)=
 ('.#d))...0Vc(mm0V0V0V  rL   )rc   r6  r  r7  rB   r8  )r  r#   	functoolswraps)r   rA  r@  s   ` @rJ   _wrapz'_wrap_summary_evaluators.<locals>._wrapP  sc    Iz3CDD	0;;			#	#	 	 	 	 	 	 
$	#	 rL   )r   r   rB   r   )r   )r,   rD  r   r   s       rJ   r  r  M  sP       ( G ) )	uuY''((((NrL   c                  $    e Zd ZU ded<   ded<   dS )_ForwardResultsrg   rX   rh   ri   Nrk   rS   rL   rJ   rF  rF  j  s*         rL   rF  r  rh.SupportsLangsmithExtrari   rh   r   r  c           	       
 d 
d
fd}t          j        |sdnd          5  |j        r|j                                        n|j                                        }t          j        |j        ||i |d	|i|
          }	  | |j        |           n7# t          $ r*}	t          
                    d|	 dd           Y d }	~	nd }	~	ww xY wt          t          t          j        
          |          cd d d            S # 1 swxY w Y   d S )NrZ  
rt.RunTreerB   r   c                    | d S r   rS   )rZ  rX   s    rJ   _get_runz_forward.<locals>._get_runy  s    rL   r  T)r  example_version)rV   on_endr   r0   r9   )langsmith_extrazError running target function: r)   )r  
stacklevelr  )rZ  rI  rB   r   )rF   r   r  r  
created_atLangSmithExtrar   inputsr  r  r  rF  r   r   r=  )r  ri   r   r0   r9   r?   rK  rL  rN  r  rX   s             @rJ   r  r  o  s    &*C      
	>$KGGt	L	L	L L L "0G))+++#--// 	
 +!((EE"3_EE
 
 
	Bw~????? 	 	 	LL5!55QR         	 4S#9#97KKK'L L L L L L L L L L L L L L L L L Ls<   ADBD
C	 C?DC		+DDDr  c               &   t          | t                    r|                    |           S t          | t          j                  r|                    |           S t          | t
          j                  r|                    | j                  S | S )z*Return the examples for the given dataset.)dataset_name)r   )r]   r   r  rT   rU   r   Datasetr   )r*   r9   s     rJ   r  r    s     $ 8###666	D$)	$	$ 8##t#444	D'/	*	* 8##tw#777KrL   =TARGET_T | rh.SupportsLangsmithExtra[[dict], dict] | Runnable'rh.SupportsLangsmithExtra[[dict], dict]c                    t          |           st          d          t          j        |           r| }nGt	          |           r| j        }  t          j        d          t          t          |                     }|S )z(Ensure the target function is traceable.zTarget must be a callable function or a langchain/langgraph object. For example:

def predict(inputs: dict) -> dict:
    # do work, like chain.invoke(inputs)
    return {...}

evaluate(
    predict,
    ...
)Targetr   )	r9  rH   rF   is_traceable_functionr8  invoker?  r   r   )r@   r  s     rJ   r  r    s      


 

 
	
 
'' A6<!&)) 	#]F(R\x(((h)?)?@@IrL   rc   r  STuple[Optional[Union[schemas.TracerSession, str]], Optional[Iterable[schemas.Run]]]c                ~   | ]t          | t          j                  r| }nt          | |          }|j        st          d          |j        st          d          ||fS |[t          j        |          \  }}t          |          }|
                    |j                  }|j        st          d          ||fS dS )Nz,Experiment name must be defined if provided.zOExperiment must have an associated reference_dataset_id, but none was provided.rR  z,Experiment name not found for provided runs.)NN)r]   r   r^   r_   r   rH   r   r  r  r  rT  
session_id)r=   rc   r9   rH  r:  	first_runs         rJ   r>  r>    s     j'"788 	?$KK*:v>>K 	MKLLL/ 	)   D  mD))tKK	))Y5I)JJ 	MKLLLD  :rL   c                 "    ddl m}   |             S )Nr   random_name)%langsmith.evaluation._name_generationrb  ra  s    rJ   rv  rv    s     AAAAAA;==rL   r   r"   c                <   t          | t                    rJt          | dd           rt          | j                  S t          | dd           rt          | j                  S t          | d          r,t          t          | d          dd           r| j        j        gS g S )Nfuncafuncr   evaluation_name)	r]   r   r  %_extract_code_evaluator_feedback_keysre  rf  hasattrr   rg  )r   s    rJ   r  r    s    )011 J9fd++ 	J8HHHY.. 	J8IIIy+&& 979k224EtLL 	9'788IrL   re  r   	list[str]c                   t          j        |           }d }d fd}t          j        |          }	 t	          j        |          }|j        d         }t          |t          j        t          j	        f          sg S i }g }t	          j
        |          D ]7}t          |t          j                  rt          |j        t          j                  rig }	|j        j        D ] }
|	                     |
                     !t          |j        d         t          j                  r|	||j        d         j        <   t          |t          j                  rw|j        p ||j                  } |j                  } ||j        |          }|                    |           |                    |           |                    |           9|r|n|j        gS # t*          $ r g cY S w xY w)Nc                Z   t          | t          j                  rg }d }t          | j        | j                  D ]\  }}t          |t          j        t          j        f          r|t          |t          j                  r|j        n|j	        }|dk    rNt          |t          j        t          j        f          r(t          |t          j                  r|j        n|j	        }|r|gn|S t          | t          j
                  rt          | j        t          j                  r| j        j        dk    rz| j        D ]r}|j        dk    ret          |j	        t          j        t          j        f          r:t          |j	        t          j                  r|j	        j        n|j	        j	        gc S sg S )Nr   r  )r]   astr
   r  keysvaluesStrConstantsr%  Callre  Namer   keywordsarg)nodern  	key_valuer   r%  key_strkeywords          rJ   extract_dict_keysz@_extract_code_evaluator_feedback_keys.<locals>.extract_dict_keys  s   dCH%% 	DI!$)T[99  
UcCGS\#:;; '1#sw'?'?NceeSYG%''Jusw>U,V,V''1%'A'AREGGu{ " #,5I;;5tSX&&	49ch//	 	&&= 
 
;%''JMCGS\#:- -'  *'-AA5GMOO!(!4	    	rL   c                   t          | t          j                  rt          | j        t          j                  r| j        j        dk    rz| j        D ]r}|j        dk    ret          |j        t          j	        t          j
        f          r:t          |j        t          j	                  r|j        j        n|j        j        gc S sg S )Nr    r   )r]   rm  rs  re  rt  r   ru  rv  r%  rp  rq  rr  )rw  rz  s     rJ   extract_evaluation_result_keyzL_extract_code_evaluator_feedback_keys.<locals>.extract_evaluation_result_key  s    tSX&&	49ch//	 	 222= 
 
;%''JMCGS\#:- -'  *'-AA5GMOO!(!4	    	rL   c           	     "   t          | t          j                  rt          | j        t          j                  r| j        j        dk    r| j        D ]}|j        dk    rt          |j        t          j                  r"|	                    |j        j        g           c S t          |j        t          j
                  r3g }|j        j        D ] }|                     	|                     !|c S nt          | t          j                  rt          | j        | j                  D ]\  }}t          |t          j        t          j        f          r|j        dk    rt          |t          j
                  rg }|j        D ]}t          |t          j                  rt          |j        |j                  D ]v\  }}t          |t          j        t          j        f          rK|j        dk    r@t          |t          j        t          j        f          r|                    |j                   wt          |t          j                  rt          |j        t          j                  r|j        j        dv r|j        D ]}|j        dk    rut          |j        t          j        t          j        f          rJ|                    t          |j        t          j                  r|j        j        n|j        j                   |c S g S )Nr!   r   r   )r    r  )r]   rm  rs  re  rt  r   ru  rv  r%  r   r   eltsr  r
   r  rn  ro  rp  rq  rr  r   )
rw  	variablesrz  rn  eltr   r%  elt_key	elt_valuer}  s
            rJ   extract_evaluation_results_keyszN_extract_code_evaluator_feedback_keys.<locals>.extract_evaluation_results_keys"  s   tSX&&-	$49ch//-	$ 	 333= $ $;)++!'-:: $(}}W]-=rBBBBB#GM38<< $!#*=#5 L LC KK(E(Ec(J(JKKKK#$ ch'' 	$!$)T[99 $ $
UcCGS\#:;; $@R@R!%22 $!#(: * *C)#sx88 *:=ch
:S:S !E !E$6GY(27SWcl<S(T(T%E,3I,>,>+5,57N,* ,* )E -1KK	,D,D,D!E !+3 9 9*$.sx$B$B* %(HK3O$O$O/2| !* !*G'.{e';';
(/7NA& A&'; )-/9'-/Q/Q-EGMOO181D)* )* )*  $	rL   r   )inspect	getsourcetextwrapdedentrm  parsebodyr]   FunctionDefAsyncFunctionDefwalkAssignr%  r   r  r  targetsrt  r   Returnr   SyntaxError)re  python_coder{  r  treefunction_defr  rn  rw  	list_keysr  	dict_keyseval_result_keyeval_results_keysr}  s                 @rJ   rh  rh    s$   #D))K  <  &/ / / / /b /+..K y%%y|,#:N(OPP 	I	H\** 	/ 	/D$
++ /dj#(33 B "I#z M M!(()F)Fs)K)KLLLL!$,q/38<< B8A	$,q/"45D#*-- 	/$*2H--dj99	"?"?
"K"K$C$CJ	% %! I&&&O,,,-... 4tt,"3!44   			s   AG ?EG G+*G+r   list[ExperimentResultRow]r   r   c                    	 dd l }n"# t          $ r}t          d          |d }~ww xY w |j        t          | ||                    S )Nr   zThe 'pandas' library is required to use the 'to_pandas' function. Please install it using 'pip install pandas' or 'conda install pandas' before calling this method.r   )r   rm  r   _flatten_experiment_results)r   r   r   pdr  s        rJ   r   r   x  sv    
   A
 
 		 2<3G5cRRRSSSs    
&!&c                *    d | ||         D             S )Nc                (   g | ]}i d  |d         j                                         D             d |d         j        pi                                 D             d|d         j        i|d         j        )d |d         j                                        D             ni d |d         d	         D             |d         j        r-|d         j        |d         j        z
                                  nd|d         j        |d         j        d
S )c                     i | ]\  }}d | |S )zinputs.rS   rW   kvs      rJ   r   z:_flatten_experiment_results.<locals>.<listcomp>.<dictcomp>  s$    HHHDAq}}}aHHHrL   ri   c                     i | ]\  }}d | |S )zoutputs.rS   r  s      rJ   r   z:_flatten_experiment_results.<locals>.<listcomp>.<dictcomp>  s$    NNNTQ~!~~qNNNrL   rX   r  Nc                     i | ]\  }}d | |S )z
reference.rS   r  s      rJ   r   z:_flatten_experiment_results.<locals>.<listcomp>.<dictcomp>  s'    NNNA!a!!1NNNrL   c                J    i | ] }d |j          |j        |j        n|j        !S )r   )r   r   r%  )rW   rZ  s     rJ   r   z:_flatten_experiment_results.<locals>.<listcomp>.<dictcomp>  sC        $AE##0CQWW  rL   rj   r   )execution_timer  r   )	rR  r   outputsr  r(  
start_timetotal_secondsrV   r   )rW   rk  s     rJ   rZ   z/_flatten_experiment_results.<locals>.<listcomp>  s_      , +	
HHAiL,?,E,E,G,GHHH	
NNah.>.D"-K-K-M-MNNN	
 QuX^	
 	
 Y<'3 ON91E1K1K1M1MNNNN	
 /0;  	
 U8$5"QuX%88GGIIIE(7E(+'	
 	
 	
  rL   rS   )r   r   r   s      rJ   r  r    s.    
 , s#-   rL   )maxsizeOptional[type]c                 6    	 ddl m}  | S # t          $ r Y d S w xY w)Nr   r'   )langchain_core.runnablesr(   rm  r'   s    rJ   _import_langchain_runnabler    s@    555555   tts   
 
or   c                \    t          t                      x}ot          | |                    S r   )r<   r  r]   )r  r(   s     rJ   r8  r8    s+    7999Vz!X?V?VWWWrL   )NNNNNNr)   NTNT)r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r3   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r<   r@   rA   rB   rC   )NNNNNFT)r,   r-   r.   r/   r0   r1   r5   r6   r9   r:   rM   r<   r;   r<   r=   rN   rB   rC   )NNr   NNFF)r,   r   r2   r3   r4   r3   r5   r8   r9   r:   r0   r1   rM   r<   r   r<   r   r   rB   r   )r   r&  r   r'  rB   r   )r@   r6  rB   r<   )r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r3   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r<   r@   r6  rB   rC   )r%  r   rB   r<   )rb   rN  r9   rO  rB   rP  )F)rb   rN   r9   rO  rM   r<   rB   rU  )r9   rO  rb   rb  rB   rc  )rB   rg  )r,   r/  rB   r  )r,   r  rB   r2  )r  rG  ri   rh   r   r   r0   r  r9   rO  r?   r<   rB   rF  )r*   r+   r9   rO  rB   r  )r@   rV  rB   rW  )r=   r>   rc   r  r9   rO  rB   r\  r   )r   r"   )re  r   rB   rj  r   )r   r  r   r6   r   r6   )rB   r  )r  r   rB   r<   )ur   
__future__r   rm  r  concurrent.futuresr  r
  r*  rB  r  r  loggingr@  rw   r   r  rz   rT   contextvarsr   typingr   r   r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   typing_extensionsr   	langsmithr   rx  r   rF   r   r[   r   r   r  #langsmith._internal._beta_decoratorr   langsmith.evaluation.evaluatorr   r   r   r   r    r!   r"   r#   r$   r%   !langsmith.evaluation.integrationsr&   r   r  r  r(   r   	getLoggerrl   r  r  rE  r   rU   ExamplerU  r+   r=  EVALUATOR_TAEVALUATOR_TrK   rd   rf   rC   COMPARATIVE_EVALUATOR_Tr  r   r   r9  rI   rM  r_   r`   ra   rf  r   rn  ro  rr   r  r  rF  r  r  r  r>  rv  r  rh  r   r  	lru_cacher  r8  rS   rL   rJ   <module>r     s     " " " " " " 



                               $ $ $ $ $ $                                   & ( ' ' ' ' '     # # # # # # ' ' ' ' ' ' % % % % % %       ' ' ' ' ' ' : : : : : :                        G F F F F F 111111III		8	$	$TFDL!	sDIx8'/I	J 	hw/0 112	4 S%/1AABBCE 	hw/0%(*;;<=	?  37BF#'+!%%))-IMM M M M Mf 37BF#%))-[ [ [ [ [|* * * * *) * * *U  U  U  U  U  U  U  U v #gkHW_56	($./%2D89:	<  (,!%)-#!J7 J7 J7 J7 J7Z
       F
 
 
 
&> > > > 37BF#'+!%%))-IM9 9 9 9 9x   5 5 5 5     <	 	 	 	 WT]]    WT2333m m m m m m m m`L
 L
 L
 L
 L
0 L
 L
 L
^      :    i   
!L !L !L !LH
 
 
 
   2   B      G G G GX T T T T T&     > Q    X X X X X XrL   