
    Ng                    h   d Z ddlmZ ddlZddlmZ ddlZddlZddl	Z	ddl
Z
ddlmZmZmZmZmZmZmZmZmZmZmZmZmZmZ ddlZddlmZ ddlmZmZ ddlmZ ddlm Z! dd	l"m#Z$ dd
l%m&Z& ddl'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8 ddl9m:Z:m;Z;m<Z<m=Z= erddl>Z?e?j@        Z@neZ@ ejA        eB          ZCeeDgeeD         f         ZE	 	 	 	 	 	 	 	 	 	 	 dHdId)ZF	 	 	 	 	 	 	 dJdKd.ZG	 	 	 	 	 	 	 	 	 	 	 dHdLd1ZH G d2 d3e,          ZI G d4 d(          ZJdMd>ZKdNd@ZLdOdBZM edC          ZNdPdGZOdS )QzV2 Evaluation Interface.    )annotationsN)TYPE_CHECKINGAnyAsyncIterableAsyncIterator	AwaitableCallableDictIterableListOptionalSequenceTypeVarUnioncast)run_helpers)	run_treesschemas)r   )utils)_aiter)
_warn_once)AEVALUATOR_TDATA_TEVALUATOR_TExperimentResultRow_ExperimentManagerMixin_extract_feedback_keys_ForwardResults_is_langchain_runnable_load_examples_map_load_experiment
_load_tqdm_load_traces_resolve_data_resolve_evaluators_resolve_experiment
_to_pandas_wrap_summary_evaluators)SUMMARY_EVALUATOR_TEvaluationResultEvaluationResultsRunEvaluator   TdataHUnion[DATA_T, AsyncIterable[schemas.Example], Iterable[schemas.Example]]
evaluators4Optional[Sequence[Union[EVALUATOR_T, AEVALUATOR_T]]]summary_evaluators'Optional[Sequence[SUMMARY_EVALUATOR_T]]metadataOptional[dict]experiment_prefixOptional[str]descriptionmax_concurrencyOptional[int]num_repetitionsintclientOptional[langsmith.Client]blockingbool
experiment6Optional[Union[schemas.TracerSession, str, uuid.UUID]]upload_resultstarget%Union[ATARGET_T, AsyncIterable[dict]]returnAsyncExperimentResultsc                  K   |st          d           |r|rt          d| d|           t          | |||||||||	|
||           d{V S )a  Evaluate an async target system or function on a given dataset.

    Args:
        target (Union[AsyncCallable[[dict], dict], AsyncIterable[dict]]): The async target system or function to evaluate.
        data (Union[DATA_T, AsyncIterable[schemas.Example]]): The dataset to evaluate on. Can be a dataset name, a list of
            examples, an async generator of examples, or an async iterable of examples.
        evaluators (Optional[Sequence[EVALUATOR_T]]): A list of evaluators to run
            on each example. Defaults to None.
        summary_evaluators (Optional[Sequence[SUMMARY_EVALUATOR_T]]): A list of summary
            evaluators to run on the entire dataset. Defaults to None.
        metadata (Optional[dict]): Metadata to attach to the experiment.
            Defaults to None.
        experiment_prefix (Optional[str]): A prefix to provide for your experiment name.
            Defaults to None.
        description (Optional[str]): A description of the experiment.
        max_concurrency (Optional[int]): The maximum number of concurrent
            evaluations to run. Defaults to None.
        num_repetitions (int): The number of times to run the evaluation.
            Each item in the dataset will be run and evaluated this many times.
            Defaults to 1.
        client (Optional[langsmith.Client]): The LangSmith client to use.
            Defaults to None.
        blocking (bool): Whether to block until the evaluation is complete.
            Defaults to True.
        experiment (Optional[schemas.TracerSession]): An existing experiment to
            extend. If provided, experiment_prefix is ignored. For advanced
            usage only.

    Returns:
        AsyncIterator[ExperimentResultRow]: An async iterator over the experiment results.

    Environment:
        - LANGSMITH_TEST_CACHE: If set, API calls will be cached to disk to save time and
            cost during testing. Recommended to commit the cache files to your repository
            for faster CI/CD runs.
            Requires the 'langsmith[vcr]' package to be installed.

    Examples:
        >>> from typing import Sequence
        >>> from langsmith import Client, aevaluate
        >>> from langsmith.schemas import Example, Run
        >>> client = Client()
        >>> dataset = client.clone_public_dataset(
        ...     "https://smith.langchain.com/public/419dcab2-1d66-4b94-8901-0357ead390df/d"
        ... )
        >>> dataset_name = "Evaluate Examples"

        Basic usage:

        >>> def accuracy(run: Run, example: Example):
        ...     # Row-level evaluator for accuracy.
        ...     pred = run.outputs["output"]
        ...     expected = example.outputs["answer"]
        ...     return {"score": expected.lower() == pred.lower()}

        >>> def precision(runs: Sequence[Run], examples: Sequence[Example]):
        ...     # Experiment-level evaluator for precision.
        ...     # TP / (TP + FP)
        ...     predictions = [run.outputs["output"].lower() for run in runs]
        ...     expected = [example.outputs["answer"].lower() for example in examples]
        ...     # yes and no are the only possible answers
        ...     tp = sum([p == e for p, e in zip(predictions, expected) if p == "yes"])
        ...     fp = sum([p == "yes" and e == "no" for p, e in zip(predictions, expected)])
        ...     return {"score": tp / (tp + fp)}

        >>> import asyncio
        >>> async def apredict(inputs: dict) -> dict:
        ...     # This can be any async function or just an API call to your app.
        ...     await asyncio.sleep(0.1)
        ...     return {"output": "Yes"}
        >>> results = asyncio.run(
        ...     aevaluate(
        ...         apredict,
        ...         data=dataset_name,
        ...         evaluators=[accuracy],
        ...         summary_evaluators=[precision],
        ...         experiment_prefix="My Experiment",
        ...         description="Evaluate the accuracy of the model asynchronously.",
        ...         metadata={
        ...             "my-prompt-version": "abcd-1234",
        ...         },
        ...     )
        ... )  # doctest: +ELLIPSIS
        View the evaluation results for experiment:...

        Evaluating over only a subset of the examples using an async generator:

        >>> async def example_generator():
        ...     examples = client.list_examples(dataset_name=dataset_name, limit=5)
        ...     for example in examples:
        ...         yield example
        >>> results = asyncio.run(
        ...     aevaluate(
        ...         apredict,
        ...         data=example_generator(),
        ...         evaluators=[accuracy],
        ...         summary_evaluators=[precision],
        ...         experiment_prefix="My Subset Experiment",
        ...         description="Evaluate a subset of examples asynchronously.",
        ...     )
        ... )  # doctest: +ELLIPSIS
        View the evaluation results for experiment:...

        Streaming each prediction to more easily + eagerly debug.

        >>> results = asyncio.run(
        ...     aevaluate(
        ...         apredict,
        ...         data=dataset_name,
        ...         evaluators=[accuracy],
        ...         summary_evaluators=[precision],
        ...         experiment_prefix="My Streaming Experiment",
        ...         description="Streaming predictions for debugging.",
        ...         blocking=False,
        ...     )
        ... )  # doctest: +ELLIPSIS
        View the evaluation results for experiment:...

        >>> async def aenumerate(iterable):
        ...     async for elem in iterable:
        ...         print(elem)
        >>> asyncio.run(aenumerate(results))

        Running without concurrency:

        >>> results = asyncio.run(
        ...     aevaluate(
        ...         apredict,
        ...         data=dataset_name,
        ...         evaluators=[accuracy],
        ...         summary_evaluators=[precision],
        ...         experiment_prefix="My Experiment Without Concurrency",
        ...         description="This was run without concurrency.",
        ...         max_concurrency=0,
        ...     )
        ... )  # doctest: +ELLIPSIS
        View the evaluation results for experiment:...

        Using Async evaluators:

        >>> async def helpfulness(run: Run, example: Example):
        ...     # Row-level evaluator for helpfulness.
        ...     await asyncio.sleep(5)  # Replace with your LLM API call
        ...     return {"score": run.outputs["output"] == "Yes"}

        >>> results = asyncio.run(
        ...     aevaluate(
        ...         apredict,
        ...         data=dataset_name,
        ...         evaluators=[helpfulness],
        ...         summary_evaluators=[precision],
        ...         experiment_prefix="My Helpful Experiment",
        ...         description="Applying async evaluators example.",
        ...     )
        ... )  # doctest: +ELLIPSIS
        View the evaluation results for experiment:...
    z&'upload_results' parameter is in beta.zeExpected at most one of 'experiment' or 'experiment_prefix', but both were provided. Got: experiment=z, experiment_prefix=)r.   r0   r2   r4   r6   r8   r9   r;   r=   r?   rA   rC   N)r   
ValueError
_aevaluate)rD   r.   r0   r2   r4   r6   r8   r9   r;   r=   r?   rA   rC   s                Y/var/www/html/ai-engine/env/lib/python3.11/site-packages/langsmith/evaluation/_arunner.py	aevaluaterL   I   s      Z  =;<<< 
' 
S)S S?PS S
 
 	

 -+''%             Fload_nested,Union[str, uuid.UUID, schemas.TracerSession]"AsyncIterator[ExperimentResultRow]c                 K   |pt          j                    }t          | t          j                  r| n t          j        t          | |           d{V }t          j        t          | ||           d{V }	t          j        t          ||           d{V fd|	D             }
t          |	|
|||||||	  	         d{V S )aL  Evaluate existing experiment runs asynchronously.

    Args:
        experiment (Union[str, uuid.UUID]): The identifier of the experiment to evaluate.
        evaluators (Optional[Sequence[EVALUATOR_T]]): Optional sequence of evaluators to use for individual run evaluation.
        summary_evaluators (Optional[Sequence[SUMMARY_EVALUATOR_T]]): Optional sequence of evaluators
            to apply over the entire dataset.
        metadata (Optional[dict]): Optional metadata to include in the evaluation results.
        max_concurrency (Optional[int]): Optional maximum number of concurrent evaluations.
        client (Optional[langsmith.Client]): Optional Langsmith client to use for evaluation.
        load_nested: Whether to load all child runs for the experiment.
            Default is to only load the top-level root runs.
        blocking (bool): Whether to block until evaluation is complete.

    Returns:
        AsyncIterator[ExperimentResultRow]: An async iterator over the experiment results.

    Examples:
        Define your evaluators

        >>> from typing import Sequence
        >>> from langsmith.schemas import Example, Run
        >>> def accuracy(run: Run, example: Example):
        ...     # Row-level evaluator for accuracy.
        ...     pred = run.outputs["output"]
        ...     expected = example.outputs["answer"]
        ...     return {"score": expected.lower() == pred.lower()}
        >>> def precision(runs: Sequence[Run], examples: Sequence[Example]):
        ...     # Experiment-level evaluator for precision.
        ...     # TP / (TP + FP)
        ...     predictions = [run.outputs["output"].lower() for run in runs]
        ...     expected = [example.outputs["answer"].lower() for example in examples]
        ...     # yes and no are the only possible answers
        ...     tp = sum([p == e for p, e in zip(predictions, expected) if p == "yes"])
        ...     fp = sum([p == "yes" and e == "no" for p, e in zip(predictions, expected)])
        ...     return {"score": tp / (tp + fp)}

        Load the experiment and run the evaluation.

        >>> from langsmith import aevaluate, aevaluate_existing
        >>> dataset_name = "Evaluate Examples"
        >>> async def apredict(inputs: dict) -> dict:
        ...     # This can be any async function or just an API call to your app.
        ...     await asyncio.sleep(0.1)
        ...     return {"output": "Yes"}
        >>> # First run inference on the dataset
        ... results = asyncio.run(
        ...     aevaluate(
        ...         apredict,
        ...         data=dataset_name,
        ...     )
        ... )  # doctest: +ELLIPSIS
        View the evaluation results for experiment:...

        Then evaluate the results
        >>> experiment_name = "My Experiment:64e6e91"  # Or manually specify
        >>> results = asyncio.run(
        ...     aevaluate_existing(
        ...         experiment_name,
        ...         evaluators=[accuracy],
        ...         summary_evaluators=[precision],
        ...     )
        ... )  # doctest: +ELLIPSIS
        View the evaluation results for experiment:...


    N)rN   c                *    g | ]}|j                  S  )reference_example_id).0rundata_maps     rK   
<listcomp>z&aevaluate_existing.<locals>.<listcomp>g  s!    ???3HS-.???rM   )r.   r0   r2   r4   r9   r=   r?   rA   )r   get_cached_client
isinstancer   TracerSession
aitertoolsaio_to_threadr!   r#   r    rJ   )rA   r0   r2   r4   r9   r=   rN   r?   projectrunsr.   rW   s              @rK   aevaluate_existingr`     s5     \ 4y244F j'"788	T

,-=z6RRRRRRRR 
 )j&k        D  -.@&'RRRRRRRRH????$???D-'
 
 
 
 
 
 
 
 
 
rM   -Union[DATA_T, AsyncIterable[schemas.Example]]<Union[ATARGET_T, AsyncIterable[dict], Iterable[schemas.Run]]c          
       K   t          j        |           pDt          | d          o%t          j        |                                           pt          |           }|	pt          j                    }	|rd n$t          t          t          j                 |           }t          j        t          |||	           d {V \  }}t          ||	||p|||||                                           d {V }t#          j        d           }|5|                                 d {V }t)          j        |          | dz  }nd }t#          j        ||	j        g          5  |r0|                    t          t2          |           |           d {V }|r|                    ||           d {V }|r|                    |           d {V }t9          |          }|
r|                                 d {V  |cd d d            S # 1 swxY w Y   d S )N	__aiter__)r=   r4   rA   r8   r;   r_   rC   z.yaml)ignore_hostsr9   )asyncioiscoroutinefunctionhasattriscoroutinerd   r   rtrY   r   r   r   Runr\   r]   r&   _AsyncExperimentManagerastartls_utilsget_cache_dirget_dataset_idpathlibPathwith_optional_cacheapi_urlawith_predictions	ATARGET_Tawith_evaluatorsawith_summary_evaluatorsrG   wait)rD   r.   r0   r2   r4   r6   r8   r9   r;   r=   r?   rA   rC   is_async_targetr_   experiment_manager	cache_dirdsid
cache_pathresultss                        rK   rJ   rJ   u  s     " 	#F++ 	*FK((TW-@AQAQASAS-T-T	*!&)) 
 -r+--F"K44Xgk-BF(K(KD(6	       K ,3"3'%	 	 	 fhh	 	 	 	 	 	G &t,,I++--------\),,$~~~=


		%j?O	P	P	P   	#55Y'' 6        G  	#44O 5        G  	Q#<<=OPPPPPPPPG(11 	!,,..                        s   BHHHc                       e Zd ZdZ	 	 	 	 	 	 	 	 	 dBdC fdZdDdZdEdZdFdZdGd!ZdHd"Z		 dIdJd'Z
dd(dKd+ZdLd.ZdMd0ZdNd2Z	 dIdOd4Z	 dIdPd6ZdQd;ZdRd<ZdSd=ZdTd?ZdUdAZ xZS )Vrm   aa  Manage the execution of experiments asynchronously.

    Supports lazily running predictions and evaluations in parallel to facilitate
    result streaming and early debugging.

    Args:
        data (DATA_T): The data used for the experiment. Can be a dataset name or ID OR
            a generator of examples.
        runs (Optional[Iterable[schemas.Run]]): The runs associated with the experiment
            predictions.
        experiment (Optional[schemas.TracerSession]): The tracer session
            associated with the experiment.
        experiment_prefix (Optional[str]): The prefix for the experiment name.
        description (Optional[str]): The description for the experiment.
        metadata (Optional[dict]): Additional metadata for the experiment.
        client (Optional[langsmith.Client]): The Langsmith client used for
             the experiment.
        evaluation_results (Optional[Iterable[EvaluationResults]]): The evaluation
            sresults for the experiment.
        summary_results (Optional[Iterable[EvaluationResults]]): The aggregate results
            for the experiment.
    Nr-   TrA   +Optional[Union[schemas.TracerSession, str]]r4   r5   r_   BOptional[Union[Iterable[schemas.Run], AsyncIterable[schemas.Run]]]r=   r>   evaluation_results*Optional[AsyncIterable[EvaluationResults]]summary_resultsr8   r7   r;   r<   rC   r@   r.   ra   c                   t                                          ||||           || _        d | _        |t	          j        |          nd | _        || _        || _        |	| _	        |
| _
        d S )N)rA   r4   r=   r8   )super__init___data	_examplesr\   ensure_async_iterator_runs_evaluation_results_summary_results_num_repetitions_upload_results)selfr.   rA   r4   r_   r=   r   r   r8   r;   rC   	__class__s              rK   r   z _AsyncExperimentManager.__init__  s     	!#	 	 	
 	
 	
 
CG6:6FJ,T222D 	
 $6  / /-rM   rF   AsyncIterator[schemas.Example]c                ^  K   | j         \t          | j        | j                  | _         | j        dk    r1t          t          j        | j         | j                            | _         t          j        t          j        | j                   dt          j
                              \  | _         }|S )Nr=   r-      lock)r   _aresolve_datar   r=   r   async_chain_from_iterabler\   ateer   rg   Lock)r   examples_iters     rK   aget_examplesz%_AsyncExperimentManager.aget_examples  s      >!+DJt{KKKDN$q((!:ODND4IJJ" " )3,T^<<aglnn)
 )
 )
% rM   strc                   K   | j         t          | j         dd           sWt          j        |                                  d {V            d {V }|t          d          t          |j                  S t          | j         j                  S )Nreference_dataset_idz!No examples found in the dataset.)	_experimentgetattrr\   py_anextr   rI   r   
dataset_idr   )r   examples     rK   rq   z&_AsyncExperimentManager.get_dataset_id  s      #74d,
 ,
# '/d6H6H6J6J0J0J0J0J0J0JKKKKKKKKG !DEEEw)***4#8999rM   AsyncIterator[schemas.Run]c                  K   | j         t          d          t          j        t          j        | j                   dt          j                              \  | _         }|2 3 d {V }|W V  6 d S )NzRuns not loaded yet.r   r   )r   rI   r\   r   r   rg   r   )r   r_   rV   s      rK   	aget_runsz!_AsyncExperimentManager.aget_runs   s      :3444%?,TZ88!',..
 
 

D  	 	 	 	 	 	 	#IIIII s   "A/ AsyncIterator[EvaluationResults]c                 K   | j         ,|                                  d {V 2 3 d {V }dg iW V  6 d S t          j        t          j        | j                   dt          j                              \  | _         }|2 3 d {V }|W V  6 d S )Nr   r   r   )r   r   r\   r   r   rg   r   )r   _r   results       rK   aget_evaluation_resultsz/_AsyncExperimentManager.aget_evaluation_results	  s      #+!%!3!3!5!5555555 & & & & & & &a "o%%%%% 655 <F?01IJJ\^^< < <8D$&8
 !3       f !3 2 2s
   3?Bc           	       K   	 t          j        |                                  d {V            d {V }n# t          $ r t	          d          w xY w|st	          d          | j        r|                     |          nd }|                     ||           | j        | j	        d<   | 
                    |                                  d {V || j	        | j        | j        | j        | j                  S )Nz\No examples found in the dataset. Please ensure the data provided to aevaluate is not empty.z[No examples found in the dataset.Please ensure the data provided to aevaluate is not empty.r;   )rA   r4   r=   r_   r   rC   )r\   r   r   StopAsyncIterationrI   r   _get_project_print_experiment_startr   	_metadatar   r=   r   r   )r   first_exampler^   s      rK   rn   z_AsyncExperimentManager.astart  sE     	","5D<N<N<P<P6P6P6P6P6P6P"Q"QQQQQQQMM! 	 	 	M  	
  	M   7;6JT$##M222PT$$Wm<<<,0,A()~~$$&&&&&&&&^;#7/  
 
 	
s	   27 Ar9   r:   rD   rw   c                 K   |                      ||          }t          j        |dt          j                              \  }}t          d |2             | j        | j        | j        d |2             | j	                  S )Nrf   r   r   c               4   K   | 3 d {V }|d         W V  6 d S Nr   rS   rU   preds     rK   	<genexpr>z<_AsyncExperimentManager.awith_predictions.<locals>.<genexpr>9  s;      2222222tT)_22222222   c               4   K   | 3 d {V }|d         W V  6 d S NrV   rS   r   s     rK   r   z<_AsyncExperimentManager.awith_predictions.<locals>.<genexpr>=  s;      3333333$u+33333333r   )rA   r4   r=   r_   rC   )
	_apredictr\   r   rg   r   rm   r   r   r=   r   )r   rD   r9   _experiment_resultsr1r2s         rK   rv   z)_AsyncExperimentManager.awith_predictions0  s       #nnV_nUU!4aglnnMMMB&22r222'^;33333/
 
 
 	
rM   rf   r0   *Sequence[Union[EVALUATOR_T, AEVALUATOR_T]]c          
     F  K   t          |          }|                     ||          }t          j        |dt	          j                              \  }}}t          d |2             | j        | j        | j	        d |2             d |2             | j
        | j                  S )Nrf      r   c               4   K   | 3 d {V }|d         W V  6 d S r   rS   rU   r   s     rK   r   z;_AsyncExperimentManager.awith_evaluators.<locals>.<genexpr>K  s<      6666666VI66666666r   c               4   K   | 3 d {V }|d         W V  6 d S r   rS   r   s     rK   r   z;_AsyncExperimentManager.awith_evaluators.<locals>.<genexpr>O  s;      7777777&&-77777777r   c               4   K   | 3 d {V }|d         W V  6 d S )Nr   rS   r   s     rK   r   z;_AsyncExperimentManager.awith_evaluators.<locals>.<genexpr>P  s=      TTTTTTTv'; <TTTTTTTTr   rA   r4   r=   r_   r   r   rC   )r%   _ascorer\   r   rg   r   rm   r   r   r=   r   r   )r   r0   r9   experiment_resultsr   r   r3s          rK   rx   z(_AsyncExperimentManager.awith_evaluatorsA  s       )44
!\\*o\VV_%7PPP
B&662666'^;77B777TTQSTTT 1/	
 	
 	
 		
rM   r2   Sequence[SUMMARY_EVALUATOR_T]c           
       K   t          |          }|                     |          }t          |                                  d {V | j        | j        | j        |                                 | j        || j	                  S )Nr   )
r(   _aapply_summary_evaluatorsrm   r   r   r   r=   r   r   r   )r   r2   wrapped_evaluatorsaggregate_feedback_gens       rK   ry   z0_AsyncExperimentManager.awith_summary_evaluatorsU  s       66HII!%!@!@AS!T!T&$$&&&&&&&&'^;!!#72/	
 	
 	
 		
rM   rP   c                  K   t          j        |                                 |                                  d {V |                                           2 3 d {V \  }}}t          |||          W V  !6 d S )NrV   r   r   )r\   	async_zipr   r   r   r   )r   rV   r   r   s       rK   aget_resultsz$_AsyncExperimentManager.aget_resultsf  s      6@6JNND$6$6$8$8888888$:V:V:X:X7
 7
 	 	 	 	 	 	 	2#w 2 &#5       7
 7
 7
s   A6Dict[str, List[dict]]c                N   K   | j         dg iS dd | j         2              d {V iS )Nr   c                :   K   g | 3 d {V 	 }|d         D ]}|6 S )Nr   rS   )rU   r   ress      rK   rX   z?_AsyncExperimentManager.aget_summary_scores.<locals>.<listcomp>t  sk               !"9-       s   )r   r   s    rK   aget_summary_scoresz+_AsyncExperimentManager.aget_summary_scoresp  sc       (r?"  %)%:        
 	
rM   AsyncIterator[_ForwardResults]c                  K   t          |           fd}t          j        | |            d          2 3 d {V }|W V  6                                   d {V  d S )Nc                   K                                     d {V 2 3 d {V } t          | j        j        j                  W V  -6 d S N)r   	_aforwardexperiment_namer   r=   )r   fnr   s    rK   predict_allz6_AsyncExperimentManager._apredict.<locals>.predict_all  s      '+'9'9';';!;!;!;!;!;!;       g!5t~t{       "<!;!;s   AMbP?_eager_consumption_timeout)_ensure_async_traceabler\   aiter_with_concurrency_aend)r   rD   r9   r   r   r   s   `    @rK   r   z!_AsyncExperimentManager._apredict}  s       %V,,	 	 	 	 	 	 '=[[]]u
 
 
 	 	 	 	 	 	 	& LLLLL

 jjlls   ASequence[RunEvaluator]c                   K   t          j        d          5  fd}t          j        | |            d          2 3 d {V }|W V  6 	 d d d            d S # 1 swxY w Y   d S )N   )max_workersc                |   K                                    2 3 d {V }                     |           W V  #6 d S )N)executor)r   _arun_evaluators)current_resultsr0   r   r   s    rK   	score_allz2_AsyncExperimentManager._ascore.<locals>.score_all  s      -1->->-@-@       ///"Oh 0        .A-@-@s   ;r   r   )cfThreadPoolExecutorr\   r   )r   r0   r9   r   r   r   s   ``   @rK   r   z_AsyncExperimentManager._ascore  s     
 "q111 	X       !+ A! ! !       f ! !	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   'AAAA"%A"r   r   r   cf.ThreadPoolExecutorc                  K   t          j                    }i |d         pi d| j        i}t          j        di i |d|| j        sdnd| j        d5  |d         |d         }|d	         }|D ]}	 |                    |
           d {V }	|d                             | j                            |	                     | j        r| j        	                    |	|           y# t          $ rL	 t          |          }
t          fd|
D                       }|d                             | j                            |                     | j        r| j        	                    ||           n4# t          $ r'}t                              d|            Y d }~nd }~ww xY wt                              dt!          |           dj         dt!                     d           t                              dt!          |           dj         dt!                     d           Y d d ww xY wt%          ||          cd d d            S # 1 swxY w Y   d S )Nr4   rA   r0   localTproject_namer4   enabledr=   rV   r   r   rV   r   r   )rV   	_executorc           	     ^    g | ])}t          |j        t                    d di          *S )errorT)keysource_run_idcommentextra)r*   idrepr)rU   r  erV   s     rK   rX   z<_AsyncExperimentManager._arun_evaluators.<locals>.<listcomp>  sR     % % % %( !1(+25&,0GG+2D/	!" !" !"% % %rM   )r   zError parsing feedback keys: zError running evaluator z on run : exc_infor   rS   )rhget_tracing_contextr   tracing_contextr   r=   aevaluate_runextend_select_eval_results_log_evaluation_feedback	Exceptionr   r+   loggerdebugr   r  r  r   )r   r0   r   r   current_contextr4   r   eval_results	evaluatorevaluator_responsefeedback_keyserror_responsee2r  rV   s                @@rK   r   z(_AsyncExperimentManager._arun_evaluators  s      022
z*0b
T12
  
 
! ,$*.*>H77D+  
 
 @	 @	 "%(C%i0G*+?@L' / /	./8/F/F ' 0G 0 0 * * * * * *& !+22889KLL   + <<.C8 =    ! " " "(>y(I(I):% % % % % ,9% % %
* 
* 
* %Y/66 K<<^LL    /  K@@ .C8 A    %   %IR%I%IJJJ LL44	?? 4 4 #4 4*.q''4 4!% !   
 LL44	?? 4 4 #4 4*.q''4 4!% !        ="F '#/  y@	 @	 @	 @	 @	 @	 @	 @	 @	 @	 @	 @	 @	 @	 @	 @	 @	 @	sc   I!4A4C)(I!)I 5BE98H;9
F*	F%	 H;%F*	*BH;5I!;I  I!!I%(I%c               ,  K   g g }}t          j        |                                  d {V           }t          j        |                                 |          2 3 d {V \  }}|                    |           |                    |           56 g }| j        r|                                 j        nd }t          j
                    }	i |	d         pi | j        |d}
t          j        di i |	d|
| j        sdnd| j        d5  |D ]}	  |||          }| j                            ||j                  }|                    |           | j        rZ|D ]W}|                    dh	          }|                    d
d           }t          j        | j        j        fi |d ||d d {V  X# t*          $ r9}t,                              dt1          |           d| d           Y d }~d }~ww xY w	 d d d            n# 1 swxY w Y   d|iW V  d S )Nr4   )rA   experiment_idr0   r   Tr   )fn_nametarget_run_id)excludeevaluator_info)run_id
project_idsource_infoz Error running summary evaluator r  r	  r   rS   )r\   r   r   r   r   appendr   _get_experimentr  r  r  r   r  r=   r  __name__r  dictpopr]   create_feedbackr  r  r   r  )r   r2   r_   examplesasync_examplesrV   r   aggregate_feedbackr#  r  r4   r  summary_eval_resultflattened_resultsr   feedbackr!  r  s                     rK   r   z2_AsyncExperimentManager._aapply_summary_evaluators  so      Rh#9@R@R@T@T:T:T:T:T:T:TUU","6NNn#
 #
 	% 	% 	% 	% 	% 	% 	%,#w KKOOG$$$$	#

  262FPT))++..D
022
z*0b
 #2!+ 
  
 
! ,$*.*>H77D+  
 
  	  	 0  	*3)D(*C*C'(,(H(H+ ) 2 )I ) )% '--.?@@@+ 
&7 	 	F'-{{O;L{'M'MH-5\\:JD-Q-QN",": $ ;# #"*# (,+5,:# # # #         !   LLQ4	??QQaQQ!% !        ' 	  	  	  	  	  	  	  	  	  	  	  	  	  	  	B ,-------sC   BHB#F/.H/
G29/G-(H-G22HH	Hc                   K   g }|                                   d {V 2 3 d {V }|j        r|                    |j                   )6 |rt          |          nd }|r|                                nd S r   )r   modified_atr%  max	isoformat)r   r2  r   max_modified_ats       rK   _get_dataset_versionz,_AsyncExperimentManager._get_dataset_version'  s      #'#5#5#7#7777777 	8 	8 	8 	8 	8 	8 	8'" 8 ""7#6777	 8 /:C#k***t.=G((***4Gs   AOptional[list[str]]c                  K   t                      }|                                  d {V 2 3 d {V }|j        ru|j                            d          r[t	          |j        d         t
                    r;|j        d         D ],}t	          |t                    r|                    |           -|                    d           6 t          |          S )Ndataset_splitbase)setr   r4   getrZ   listr   add)r   splitsr   splits       rK   _get_dataset_splitsz+_AsyncExperimentManager._get_dataset_splits2  s     #'#5#5#7#7777777 
	# 
	# 
	# 
	# 
	# 
	# 
	#' 	#$((99	# w/@$GG	#
 %-o> * *E!%-- *

5)))* 

6"""" 8 F||s   CNonec                  K   | j         sd S | j        }|t          d          |                                 }|                                  d {V |d<   |                                  d {V |d<   | j                            |j        |j	        p-t          j
                            t          j        j                  i |j        |           d S )NzExperiment not started yet.dataset_versiondataset_splits)end_timer4   )r   r   rI   _get_experiment_metadatar6  rA  r=   update_projectr  rF  datetimenowtimezoneutcr4   )r   rA   project_metadatas      rK   r   z_AsyncExperimentManager._aendB  s     # 	F%
:;;;88::484M4M4O4O.O.O.O.O.O.O*+373K3K3M3M-M-M-M-M-M-M)*""M( < $$X%6%:;;%"	 	# 	
 	
 	
 	
 	
rM   )	NNNNNNNr-   T)rA   r   r4   r5   r_   r   r=   r>   r   r   r   r   r8   r7   r;   r<   rC   r@   r.   ra   )rF   r   rF   r   )rF   r   )rF   r   )rF   rm   r   )r9   r:   rD   rw   rF   rm   )r0   r   r9   r:   rF   rm   )r2   r   rF   rm   rF   rP   )rF   r   )r9   r:   rD   rw   rF   r   )r0   r   r9   r:   rF   rP   )r0   r   r   r   r   r   rF   r   )r2   r   rF   r   )rF   r7   )rF   r7  rF   rB  )r'  
__module____qualname____doc__r   r   rq   r   r   rn   rv   rx   ry   r   r   r   r   r   r   r6  rA  r   __classcell__)r   s   @rK   rm   rm     s        6 CG#'SW-1IMFJ%) #. . . . . . .<   : : : :      
 
 
 
< *.	
 
 
 
 
* *.	
 
 
 
 
 
(
 
 
 
"   	
 	
 	
 	
 FJ    , *.    &K K K KZ5. 5. 5. 5.n	H 	H 	H 	H    
 
 
 
 
 
 
 
rM   rm   c                  r    e Zd ZddZedd            ZddZdd
ZddZ	 dd dZ	ddZ
d!dZddZd"dZdS )#rG   experiment_managerrm   c                    || _         g | _        t          j                    | _        t          j        |                     | j                             | _        d| _        d S )Nr   )	_manager_resultsrg   r   _lockcreate_task_process_data_task_processed_count)r   rV  s     rK   r   zAsyncExperimentResults.__init__X  sO     +35\^^
(););DM)J)JKK
 !rM   rF   r   c                    | j         j        S r   )rX  r   r   s    rK   r   z&AsyncExperimentResults.experiment_nameb  s    },,rM   rP   c                    | S r   rS   r   s    rK   rd   z AsyncExperimentResults.__aiter__f  s    rM   r   c           	        K   d	 fd}	  j         4 d {V   j        t           j                  k     r6 j         j                 } xj        dz  c_        |cd d d           d {V  S  j                                        rt          	 d d d           d {V  n# 1 d {V swxY w Y   t          j        t          j	         |t           j                            d                      d {V  )
Nindexr<   rF   rB  c                p   K   j         | k     r't          j        d           d {V  j         | k     %d S d S )Ng?)r^  rg   sleep)rb  r   s    rK   _wait_until_indexz;AsyncExperimentResults.__anext__.<locals>._wait_until_indexj  sS      '%//mD))))))))) '%//////rM   Tr-   )timeout)rb  r<   rF   rB  )
rZ  r^  lenrY  r]  doner   rg   shieldwait_for)r   re  r   s   `  rK   	__anext__z AsyncExperimentResults.__anext__i  s     	* 	* 	* 	* 	* 	*	z - - - - - - - -(3t}+=+===!]4+@AF))Q.))!	- - - - - - - - - - - - - -
 Z__&& -,,,- - - - - - - - - - - - - - - - - - - - - - - - - - - . !2!23t}3E3E!F!FPTUUU        	s   AB, B
B),B)r}   rB  c                  K   t                      } ||                                          2 3 d {V }| j        4 d {V  | j                            |           d d d           d {V  n# 1 d {V swxY w Y   S6 |                                 d {V }| j        4 d {V  || _        d d d           d {V  d S # 1 d {V swxY w Y   d S r   )r"   r   rZ  rY  r%  r   r   )r   r}   tqdmitemsummary_scoress        rK   r\  z$AsyncExperimentResults._process_data{  s     ||$w335566 	+ 	+ 	+ 	+ 	+ 	+ 	+$z + + + + + + + +$$T***+ + + + + + + + + + + + + + + + + + + + + + + + + + + 7  '::<<<<<<<<: 	3 	3 	3 	3 	3 	3 	3 	3$2D!	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3s)   BA//
A9	<A9	)C
CCr   Nstartr:   end	DataFramec                0    t          | j        ||          S )N)rp  rq  )r'   rY  )r   rp  rq  s      rK   	to_pandasz AsyncExperimentResults.to_pandas  s     $-u#>>>>rM   c                    dd l }| j        rD|j                            d          r*|                     dd          }|                                S |                                 S )Nr   pandas   )importlib.utilrY  util	find_specrt  _repr_html___repr__)r   	importlibdfs      rK   r{  z"AsyncExperimentResults._repr_html_  s`    = 	#Y^55h?? 	#1%%B>>###==??"rM   r<   c                *    t          | j                  S r   )rg  rY  r   s    rK   __len__zAsyncExperimentResults.__len__  s    4=!!!rM   c                    d| j          dS )Nz<AsyncExperimentResults >)r   r   s    rK   r|  zAsyncExperimentResults.__repr__  s    A$*>AAAArM   c                $   K   | j          d {V  d S r   )r]  r   s    rK   rz   zAsyncExperimentResults.wait  s&      jrM   )rV  rm   rN  rO  )rF   r   )r}   rm   rF   rB  )r   N)rp  r:   rq  r:   rF   rr  )rF   r<   rP  )r'  rQ  rR  r   propertyr   rd   rk  r\  rt  r{  r  r|  rz   rS   rM   rK   rG   rG   W  s        " " " " - - - X-      $3 3 3 3 >B? ? ? ? ?
# # # #" " " "B B B B     rM   r   ,rh.SupportsLangsmithExtra[[dict], Awaitable]r   schemas.Exampler   r   r(  langsmith.Clientr   c                  K   d dfd}t          j        d          5  	  | |j        t          j        |j        ||i |d|j        r|j                                        n|j                                        i|	          
           d {V  n7# t          $ r*}t          
                    d| dd           Y d }~nd }~ww xY wt          t          t          j                  |          cd d d            S # 1 swxY w Y   d S )Nrrun_trees.RunTreerF   rB  c                    | d S r   rS   )r  rV   s    rK   _get_runz_aforward.<locals>._get_run  s    rM   T)r   example_version)rT   on_endr   r4   r=   )langsmith_extrazError running target function: r-   )r
  
stacklevelr   )r  r  rF   rB  )r  r  inputsLangSmithExtrar  r2  r4  
created_atr  r  r   r   r   r   rl   )r   r   r   r4   r=   r  r  rV   s          @rK   r   r     s      &*C      
	D	)	)	) 
 
	" " 1)0#!0")&2@G/99;;;!(!3!=!=!?!?  "! ! !          "  	 	 	LL5!55QR         	 W[#&&
 
 
/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
s;   DA1BD
C	 C?DC		+DDDrw   c                6   t          j        |           s<t          |           s-t          |           rt	          d          t	          d          t          j        |           r| S t          |           r| j        }  t          j        d          |           S )NzTarget must be an async function. For sync functions, use evaluate. Example usage:

async def predict(inputs: dict) -> dict:
    # do work, like chain.invoke(inputs)
    return {...}
await aevaluate(predict, ...)zTarget must be a callable async function. Received a non-callable object. Example usage:

async def predict(inputs: dict) -> dict:
    # do work, like chain.invoke(inputs)
    return {...}
await aevaluate(predict, ...)AsyncTarget)name)	rg   rh   r   callablerI   r  is_traceable_functionainvoke	traceable)rD   s    rK   r   r     s     &v.. 7Mf7U7U F 	0   0   
'' 8!&)) 	$^F/r|///777rM   r   c                   t          | t                    rt          j        |           S t          j        t	          | |                    S )z*Return the examples for the given dataset.r   )rZ   r   r\   r   r$   )r.   r=   s     rK   r   r     sE     $&& 6/555+M$v,N,N,NOOOrM   TiterableIterable[AsyncIterable[T]]AsyncIterator[T]c               4   K   | D ]}|2 3 d{V }|W V  6 dS )zChain multiple async iterables.NrS   )r  sub_iterablern  s      rK   r   r     s]       !  & 	 	 	 	 	 	 	$JJJJJ ', s   )NNNNNNr-   NTNT)r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r7   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   r@   rD   rE   rF   rG   )NNNNNFT)r0   r1   r2   r3   r4   r5   r9   r:   r=   r>   rN   r@   r?   r@   rA   rO   rF   rP   )r.   ra   r0   r1   r2   r3   r4   r5   r6   r7   r8   r7   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   r@   rD   rb   rF   rG   )r   r  r   r  r   r   r4   r(  r=   r  rF   r   )rD   rw   rF   r  )r.   ra   r=   r  rF   r   )r  r  rF   r  )PrS  
__future__r   rg   concurrent.futuresfuturesr   rI  loggingrr   uuidtypingr   r   r   r   r   r	   r
   r   r   r   r   r   r   r   	langsmithr   r  r   r   rk   r   ro   langsmith._internalr   r\   #langsmith._internal._beta_decoratorr   langsmith.evaluation._runnerr   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   langsmith.evaluation.evaluatorr)   r*   r+   r,   rv  pdrr  	getLoggerr'  r  r(  rw   rL   r`   rJ   rm   rG   r   r   r   r  r   rS   rM   rK   <module>r     sO     " " " " " "                                          "     ' ' ' ' ' ' ( ( ( ( ( ( ( ( % % % % % % ' ' ' ' ' ' 4 4 4 4 4 4 : : : : : :                                     &             III		8	$	$dVYt_,-	 HLBF#'+!%%))-IMC C C C CR HLBF#%))-c c c c cT HLBF#'+!%%))-IM; ; ; ; ;|a
 a
 a
 a
 a
5 a
 a
 a
HB B B B B B B BJ'
 '
 '
 '
T8 8 8 8<P P P P GCLL     rM   