
    Ngx                       d Z ddlmZ ddlZddlZddlZddlmZ ddlm	Z	m
Z
mZmZmZmZmZmZmZmZ ddlmZ ddlmZ 	 ddlmZmZmZmZ n# e$ r ddlmZmZmZmZ Y nw xY wddlZdd	lm Z  dd
l!m"Z"m#Z#m$Z$m%Z%  ej&        e'          Z( G d de          Z) G d ded          Z* G d de          Z+ G d ded          Z, G d d          Z-ee+e,e.f         Z/ G d de          Z0ee0e.f         Z1 G d de-          Z2d5dZ3dZ4d6d!Z5d7d%Z6 G d& d'          Z7d8d)Z8d9d,Z9d:d.Z:d;d2Z;eeeej%                 eej$                 gee+e,f         f         eeej%                 eej$                 gee+e,f         f         f         Z<d<d4Z=dS )=z?This module contains the evaluator classes for evaluating runs.    )annotationsN)abstractmethod)
Any	AwaitableCallableDictListLiteralOptionalSequenceUnioncast)	TypedDict)schemas)	BaseModelFieldValidationError	validator)wraps)
SCORE_TYPE
VALUE_TYPEExampleRunc                  *    e Zd ZU dZded<   	 ded<   dS )Categoryz$A category for categorical feedback.Optional[Union[float, int]]valuestrlabelN__name__
__module____qualname____doc____annotations__     Z/var/www/html/ai-engine/env/lib/python3.11/site-packages/langsmith/evaluation/evaluator.pyr   r   1   s0         ..&&&&CJJJ&&r'   r   c                  B    e Zd ZU dZded<   	 ded<   	 ded<   	 ded<   d	S )
FeedbackConfigziConfiguration to define a type of feedback.

    Applied on on the first creation of a feedback_key.
    z0Literal['continuous', 'categorical', 'freeform']typer   minmaxz%Optional[List[Union[Category, dict]]]
categoriesNr    r&   r'   r(   r*   r*   :   sW          
 ;:::$$$$;$$$$A555555r'   r*   F)totalc                  
   e Zd ZU dZded<   	 dZded<   	 dZded<   	 dZd	ed
<   	 dZded<   	  e	e
          Zded<   	 dZded<   	 dZded<   	 dZded<   	 dZded<   	  G d d          Z edd          d             ZdS )EvaluationResultzEvaluation result.r   keyNr   scorer   r   zOptional[str]commentzOptional[Dict]
correction)default_factoryr   evaluator_infoz%Optional[Union[FeedbackConfig, dict]]feedback_configOptional[Union[uuid.UUID, str]]source_run_idtarget_run_idextrac                      e Zd ZdZdZdS )EvaluationResult.ConfigzPydantic model configuration.FN)r!   r"   r#   r$   allow_extrar&   r'   r(   Configr>   d   s        ++r'   r@   T)prec                    d|vs|d         9t          |t          t          f          rt                              d|            |S )z$Check that the value is not numeric.r3   NzJNumeric values should be provided in the 'score' field, not 'value'. Got: )
isinstanceintfloatloggerwarning)clsvvaluess      r(   check_value_non_numericz(EvaluationResult.check_value_non_numerici   s]    
 &  F7O$;!c5\** !! !  
 r'   )r!   r"   r#   r$   r%   r3   r   r4   r5   r   dictr7   r8   r:   r;   r<   r@   r   rK   r&   r'   r(   r1   r1   I   s@        HHH@E0E8!G!!!!2!%J%%%%: 5666N66665=AOAAAA;59M9999659M9999 !E    )       
 YwD!!!  "!  r'   r1   c                      e Zd ZU dZded<   dS )EvaluationResultszqBatch evaluation results.

    This makes it easy for your evaluator to return multiple
    metrics at once.
    zList[EvaluationResult]resultsNr    r&   r'   r(   rN   rN   x   s*           $###!!r'   rN   c                  :    e Zd ZdZe	 ddd	            Z	 ddd
ZdS )RunEvaluatorzEvaluator interface class.Nrunr   exampleOptional[Example]return*Union[EvaluationResult, EvaluationResults]c                    dS )zEvaluate an example.Nr&   selfrR   rS   s      r(   evaluate_runzRunEvaluator.evaluate_run   s      r'   c                n   K   t          j                                        d| j        ||           d{V S )z#Evaluate an example asynchronously.N)asyncioget_running_looprun_in_executorrZ   rX   s      r(   aevaluate_runzRunEvaluator.aevaluate_run   sS       -//??$#S'
 
 
 
 
 
 
 
 	
r'   NrR   r   rS   rT   rU   rV   )r!   r"   r#   r$   r   rZ   r_   r&   r'   r(   rQ   rQ      s^        $$59# # # # ^# 6:
 
 
 
 
 
 
r'   rQ   c                  J    e Zd ZU dZded<   	 ded<   	 dZded<   	 dZd	ed
<   dS )ComparisonEvaluationResultzFeedback scores for the results of comparative evaluations.

    These are generated by functions that compare two or more runs,
    returning a ranking or other feedback.
    r   r2   z'Dict[Union[uuid.UUID, str], SCORE_TYPE]scoresNr9   r:   z6Optional[Union[str, Dict[Union[uuid.UUID, str], str]]]r4   )r!   r"   r#   r$   r%   r:   r4   r&   r'   r(   rc   rc      sb           HHH@3333459M99996FJGJJJJ: :r'   rc   c                       e Zd ZdZ	 d"d#dZ	 d$d%dZd&dZd'dZed(d            Z		 d"d)dZ
d"d* fdZ	 d"d)dZd+d!Z xZS ),DynamicRunEvaluatora  A dynamic evaluator that wraps a function and transforms it into a `RunEvaluator`.

    This class is designed to be used with the `@run_evaluator` decorator, allowing
    functions that take a `Run` and an optional `Example` as arguments, and return
    an `EvaluationResult` or `EvaluationResults`, to be used as instances of `RunEvaluator`.

    Attributes:
        func (Callable): The function that is wrapped by this evaluator.
    NfuncXCallable[[Run, Optional[Example]], Union[_RUNNABLE_OUTPUT, Awaitable[_RUNNABLE_OUTPUT]]]afuncIOptional[Callable[[Run, Optional[Example]], Awaitable[_RUNNABLE_OUTPUT]]]c                   t          |          }|rt          |          } t          |          |            ddlm} |7|                    |t
                    | _        t          |dd          | _        t          j
        |          rJ|t          d          |                    |t
                    | _        t          |dd          | _        dS |                    t          t          t          t          t                    gt"          f         |          t
                    | _        t          |dd          | _        dS )zInitialize the DynamicRunEvaluator with a given function.

        Args:
            func (Callable): A function that takes a `Run` and an optional `Example` as
            arguments, and returns a dict or `ComparisonEvaluationResult`.
        r   run_helpersNprocess_inputsr!   rf   Func was provided as a coroutine function, but afunc was also provided. If providing both, func should be a regular function to avoid ambiguity.)_normalize_evaluator_funcr   	langsmithrm   ensure_traceable_serialize_inputsri   getattr_nameinspectiscoroutinefunction	TypeErrorr   r   r   r   r   _RUNNABLE_OUTPUTrg   rY   rg   ri   rm   s       r(   __init__zDynamicRunEvaluator.__init__   sV   ( ).. 	5-e44EdD))))))$55&7 6  DJ !
4IJJDJ&t,, 	J 3  
 %55%6 6  DJ !z3HIIDJJJ#44XsHW$568HHI4PP0 5  DI !z3HIIDJJJr'   FresultUnion[EvaluationResult, dict]r:   	uuid.UUIDallow_no_keyboolrU   r1   c                \   t          t                    rj        s|_        S 	 st          d           dvr|r
| j        d<   t          fddD                       rt          d           t          di d|iS # t          $ r}t          d           |d }~ww xY w)	NziExpected an EvaluationResult object, or dict with a metric 'key' and optional 'score'; got empty result: r2   c              3      K   | ]}|vV  	d S r`   r&   ).0kr}   s     r(   	<genexpr>z@DynamicRunEvaluator._coerce_evaluation_result.<locals>.<genexpr>   s'      JJq1F?JJJJJJr'   )r3   r   r4   zrExpected an EvaluationResult object, or dict with a metric 'key' and optional 'score' or categorical 'value'; got r:   z[Expected an EvaluationResult object, or dict with a metric 'key' and optional 'score'; got r&   )rC   r1   r:   
ValueErrorrv   allr   )rY   r}   r:   r   es    `   r(   _coerce_evaluation_resultz-DynamicRunEvaluator._coerce_evaluation_result   s6    f.// 	' 5'4$M	  OFLO O   F""|" $
uJJJJ,IJJJJJ  XOUX X   $QQ&P&PQQQ 	 	 	=4:= =  	s   A B	 	
B+B&&B+rO   Union[dict, EvaluationResults]rV   c                     d|v r8|                                 } fd|d         D             |d<   t          di |S                      t          t          |          d          S )NrO   c                >    g | ]}                     |           S ))r:   )r   )r   rrY   r:   s     r(   
<listcomp>zBDynamicRunEvaluator._coerce_evaluation_results.<locals>.<listcomp>  s<        ..q.NN  r'   T)r:   r   r&   )copyrN   r   r   rL   )rY   rO   r:   cps   ` ` r(   _coerce_evaluation_resultsz.DynamicRunEvaluator._coerce_evaluation_results  s    
 B     +  ByM %**r***--w}4 . 
 
 	
r'   MUnion[EvaluationResult, EvaluationResults, dict, str, int, bool, float, list]c                    t          |t                    r|j        s||_        |S t          |          }|                     ||          S r`   )rC   r1   r:   _format_evaluator_resultr   )rY   r}   r:   s      r(   _format_resultz"DynamicRunEvaluator._format_result  sR     f.// 	' 5'4$M)&11..v}EEEr'   c                "    t          | d          S zCheck if the evaluator function is asynchronous.

        Returns:
            bool: True if the evaluator function is asynchronous, False otherwise.
        ri   hasattrrY   s    r(   is_asynczDynamicRunEvaluator.is_async'       tW%%%r'   rR   r   rS   rT   c                   t          | d          s_t          j                    }|                                rt	          d          |                    |                     ||                    S t          j                    }d|j	        i}t          |dd          rt          |j                  |d<   |                     ||||d          }|                     ||          S )	a  Evaluate a run using the wrapped function.

        This method directly invokes the wrapped function with the provided arguments.

        Args:
            run (Run): The run to be evaluated.
            example (Optional[Example]): An optional example to be used in the evaluation.

        Returns:
            Union[EvaluationResult, EvaluationResults]: The result of the evaluation.
        rg   tCannot call `evaluate_run` on an async run evaluator from within an running event loop. Use `aevaluate_run` instead.r;   
session_idN
experimentrun_idmetadatalangsmith_extra)r   r\   get_event_loop
is_runningRuntimeErrorrun_until_completer_   uuiduuid4idru   r   r   rg   r   )rY   rR   rS   running_loopr:   r   r}   s          r(   rZ   z DynamicRunEvaluator.evaluate_run0  s     tV$$ 	Y"133L&&(( Y"R  
 $66t7I7I#w7W7WXXX
$3SV#<3d++ 	9%(%8%8H\"'4(KK  
 

 ""6=999r'   c                n  K   t          | d          s(t                                          ||           d{V S t          j                    }d|j        i}t          |dd          rt          |j                  |d<   | 	                    ||||d           d{V }| 
                    ||          S )a  Evaluate a run asynchronously using the wrapped async function.

        This method directly invokes the wrapped async function with the
            provided arguments.

        Args:
            run (Run): The run to be evaluated.
            example (Optional[Example]): An optional example to be used
                in the evaluation.

        Returns:
            Union[EvaluationResult, EvaluationResults]: The result of the evaluation.
        ri   Nr;   r   r   r   r   )r   superr_   r   r   r   ru   r   r   ri   r   )rY   rR   rS   r:   r   r}   	__class__s         r(   r_   z!DynamicRunEvaluator.aevaluate_runR  s       tW%% 	=..sG<<<<<<<<<
$3SV#<3d++ 	9%(%8%8H\"zz'4(KK " 
 
 
 
 
 
 
 

 ""6=999r'   c                .    |                      ||          S )a  Make the evaluator callable, allowing it to be used like a function.

        This method enables the evaluator instance to be called directly, forwarding the
        call to `evaluate_run`.

        Args:
            run (Run): The run to be evaluated.
            example (Optional[Example]): An optional example to be used in the evaluation.

        Returns:
            Union[EvaluationResult, EvaluationResults]: The result of the evaluation.
        )rZ   rX   s      r(   __call__zDynamicRunEvaluator.__call__m  s       g...r'   r   c                    d| j          dS ))Represent the DynamicRunEvaluator object.z<DynamicRunEvaluator >rv   r   s    r(   __repr__zDynamicRunEvaluator.__repr__~  s    4tz4444r'   r`   )rg   rh   ri   rj   )F)r}   r~   r:   r   r   r   rU   r1   )rO   r   r:   r   rU   rV   )r}   r   r:   r   rU   rV   rU   r   ra   )rR   r   rS   rT   rU   r   )r!   r"   r#   r$   r|   r   r   r   propertyr   rZ   r_   r   r   __classcell__)r   s   @r(   rf   rf      s+        , 0J 0J 0J 0J 0Jl #	    <
 
 
 
"F F F F & & & X& 6: :  :  :  :  :D: : : : : : :8 6:/ / / / /"5 5 5 5 5 5 5 5r'   rf   rg   rh   c                     t          |           S )zmCreate a run evaluator from a function.

    Decorator that transforms a function into a `RunEvaluator`.
    )rf   rg   s    r(   run_evaluatorr     s     t$$$r'   i'  objr   c                ~    t          |           }t          |          t          k    r|d t          dz
           dz   }|S )N   z...))reprlen_MAXSIZE)r   ss     r(   _maxsize_reprr     s:    S		A
1vvn1n&Hr'   inputsrL   rU   c                    t          |                     d                    }t          |                     d                    }||dS )NrR   rS   )rR   rS   )r   get)r   run_truncatedexample_truncateds      r(   rt   rt     sD    !&**U"3"344M%fjj&;&;<< ->???r'   c                      e Zd ZdZ	 dddZedd
            Z	 dddZ	 dddZ	 dddZ	d dZ
ed!d            Zd"dZdS )#DynamicComparisonRunEvaluatorz4Compare predictions (as traces) from 2 or more runs.Nrg   fCallable[[Sequence[Run], Optional[Example]], Union[_COMPARISON_OUTPUT, Awaitable[_COMPARISON_OUTPUT]]]ri   UOptional[Callable[[Sequence[Run], Optional[Example]], Awaitable[_COMPARISON_OUTPUT]]]c                   t          |          }|rt          |          } t          |          |            ddlm} |7|                    |t
                    | _        t          |dd          | _        t          j
        |          rJ|t          d          |                    |t
                    | _        t          |dd          | _        dS |                    t          t          t          t                   t           t"                   gt$          f         |          t
                    | _        t          |dd          | _        dS )zInitialize the DynamicRunEvaluator with a given function.

        Args:
            func (Callable): A function that takes a `Run` and an optional `Example` as
            arguments, and returns an `EvaluationResult` or `EvaluationResults`.
        r   rl   Nrn   r!   rf   rp   )$_normalize_comparison_evaluator_funcr   rr   rm   rs   rt   ri   ru   rv   rw   rx   ry   r   r   r   r   r   r   _COMPARISON_OUTPUTrg   r{   s       r(   r|   z&DynamicComparisonRunEvaluator.__init__  sj   ( 4D99 	@8??EdD))))))$55&7 6  DJ !
4IJJDJ&t,, 	J 3  
 %55%6 6  DJ !z3HIIDJJJ#44!#(9:*,    1 5 	 	DI !z3HIIDJJJr'   rU   r   c                "    t          | d          S r   r   r   s    r(   r   z&DynamicComparisonRunEvaluator.is_async  r   r'   runsSequence[Run]rS   rT   rc   c                   t          | d          s_t          j                    }|                                rt	          d          |                    |                     ||                    S t          j                    }| 	                    |          }| 
                    ||||d          }|                     |||          S )zCompare runs to score preferences.

        Args:
            runs: A list of runs to compare.
            example: An optional example to be used in the evaluation.

        rg   r   r   tagsr   )r   r\   r   r   r   r   acompare_runsr   r   	_get_tagsrg   _format_results)rY   r   rS   r   r:   r   r}   s          r(   compare_runsz*DynamicComparisonRunEvaluator.compare_runs  s     tV$$ 
	"133L&&(( "R  
 $66&&tW55   
~~d##'4dCC  
 

 ##FM4@@@r'   c                  K   t          | d          s|                     ||          S t          j                    }|                     |          }|                     ||||d           d{V }|                     |||          S )a  Evaluate a run asynchronously using the wrapped async function.

        This method directly invokes the wrapped async function with the
            provided arguments.

        Args:
            runs (Run): The runs to be evaluated.
            example (Optional[Example]): An optional example to be used
                in the evaluation.

        Returns:
            ComparisonEvaluationResult: The result of the evaluation.
        ri   r   r   N)r   r   r   r   r   ri   r   )rY   r   rS   r:   r   r}   s         r(   r   z+DynamicComparisonRunEvaluator.acompare_runs  s        tW%% 	4$$T7333
~~d##zz'4dCC " 
 
 
 
 
 
 
 

 ##FM4@@@r'   c                .    |                      ||          S )a  Make the evaluator callable, allowing it to be used like a function.

        This method enables the evaluator instance to be called directly, forwarding the
        call to `evaluate_run`.

        Args:
            run (Run): The run to be evaluated.
            example (Optional[Example]): An optional example to be used in the evaluation.

        Returns:
            ComparisonEvaluationResult: The result of the evaluation.
        )r   )rY   r   rS   s      r(   r   z&DynamicComparisonRunEvaluator.__call__  s       w///r'   r   c                    d| j          dS )r   z<DynamicComparisonRunEvaluator r   r   r   s    r(   r   z&DynamicComparisonRunEvaluator.__repr__/  s    >>>>>r'   	List[str]c                    g }| D ]g}|                     dt          |j                  z              t          |dd          r*|                     dt          |j                  z              h|S )zExtract tags from runs.zrun:r   Nzexperiment:)appendr   r   ru   r   )r   r   rR   s      r(   r   z'DynamicComparisonRunEvaluator._get_tags3  su      	A 	ACKKSV,---sL$// AMC,?,??@@@r'   r}   -Union[dict, list, ComparisonEvaluationResult]r:   r   c                   t          |t                    r|j        s||_        |S t          |t                    r$d t	          ||          D             | j        |d}n8t          |t                    rd|vr
| j        |d<   nd|}t          |          	 t          di d|i|S # t          $ r}t          d|           |d }~ww xY w)Nc                $    i | ]\  }}|j         |S r&   )r   )r   rR   r3   s      r(   
<dictcomp>zADynamicComparisonRunEvaluator._format_results.<locals>.<dictcomp>J  s     MMMZS%365MMMr'   )rd   r2   r:   r2   zXExpected 'dict', 'list' or 'ComparisonEvaluationResult' result object. Received: result=r:   zExpected a dictionary with a 'key' and dictionary of scores mappingrun IDs to numeric scores, or ComparisonEvaluationResult object, got r&   )	rC   rc   r:   listziprv   rL   r   r   )rY   r}   r:   r   msgr   s         r(   r   z-DynamicComparisonRunEvaluator._format_results>  s:    f899 	"' 5'4$M%% 	"MM3tV;L;LMMMz!. FF
 %% 	"F"" $
u/%+/ /  S//!		-  "M<V<    	 	 	!! !  		s   B( (
C
2CC
r`   )rg   r   ri   r   r   )r   r   rS   rT   rU   rc   r   )r   r   rU   r   )r}   r   r:   r   r   r   rU   rc   )r!   r"   r#   r$   r|   r   r   r   r   r   r   staticmethodr   r   r&   r'   r(   r   r     s       >> 6J 6J 6J 6J 6Jp & & & X& AEA A A A A@ AEA A A A A: AE0 0 0 0 0"? ? ? ?    \" " " " " "r'   r   r   c                     t          |           S )z.Create a comaprison evaluator from a function.)r   r   s    r(   comparison_evaluatorr   c  s     )...r'   r   |Union[Callable[[Run, Optional[Example]], _RUNNABLE_OUTPUT], Callable[[Run, Optional[Example]], Awaitable[_RUNNABLE_OUTPUT]]]c                J    dt          j                   }d |j                                        D             r.t	          fdD                       s(t                    dk    rd d}t          |          t	          fdD                       rdd	gk    r S t          j                   r5d fd}t           d          rt           d          n|j
        |_
        |S d fd}t           d          rt           d          n|j
        |_
        |S )NrR   rS   r   outputsreference_outputsc                B    g | ]\  }}|j         |j        |j        fv |S r&   kindPOSITIONAL_OR_KEYWORDPOSITIONAL_ONLYr   pnameps      r(   r   z-_normalize_evaluator_func.<locals>.<listcomp>u  >       E16a-q/@AAA 	AAAr'   c              3      K   | ]}|v V  	d S r`   r&   r   r   supported_argss     r(   r   z,_normalize_evaluator_func.<locals>.<genexpr>{  (      EEE'EEEEEEr'      kInvalid evaluator function. Must have at least one positional argument. Supported positional arguments are . Please see https://docs.smith.langchain.com/evaluation/how_to_guides/evaluation/evaluate_llm_application#use-custom-evaluatorsc              3      K   | ]}|v V  	d S r`   r&   r   s     r(   r   z,_normalize_evaluator_func.<locals>.<genexpr>  9        $)     r'   rR   rS   r   rT   rU   rz   c                   K   | ||r|j         ni | j        pi |r	|j        pi ni dfdD             } |  d {V S )Nr   c              3  (   K   | ]}|         V  d S r`   r&   r   argarg_maps     r(   r   z>_normalize_evaluator_func.<locals>.awrapper.<locals>.<genexpr>  '      @@@@@@@@r'   r   r   rR   rS   argsr	  rg   positional_argss      @r(   awrapperz+_normalize_evaluator_func.<locals>.awrapper  s       &07?gnnR"{0bBI)Q)>Br  A@@@@@@!T4[(((((((r'   r!   r   c                r    | ||r|j         ni | j        pi |r	|j        pi ni dfdD             } | S )Nr   c              3  (   K   | ]}|         V  d S r`   r&   r  s     r(   r   z=_normalize_evaluator_func.<locals>.wrapper.<locals>.<genexpr>  r
  r'   r  r  s      @r(   wrapperz*_normalize_evaluator_func.<locals>.wrapper  sh    &07?gnnR"{0bBI)Q)>Br  A@@@@@@tT{"r'   )rR   r   rS   rT   rU   rz   )rR   r   rS   r   rU   rz   rw   	signature
parametersitemsr   r   r   rx   r   ru   r!   rg   sigr   r  r  r  r   s   `    @@r(   rq   rq   m  s    RN

D
!
!C ,,..  O
  :EEEE_EEEEE:  A%%G<JG G G 	 oo    -<     /	UI.	.	. &t,, (	) ) ) ) ) ) ) 4,,'j)))& 
 O	# 	# 	# 	# 	# 	# 	# 4,,&j)))% 
 Nr'   Union[Callable[[Sequence[Run], Optional[Example]], _COMPARISON_OUTPUT], Callable[[Sequence[Run], Optional[Example]], Awaitable[_COMPARISON_OUTPUT]]]c                J    dt          j                   }d |j                                        D             r.t	          fdD                       s(t                    dk    rd d}t          |          t	          fdD                       rdd	gk    r S t          j                   r5d fd}t           d          rt           d          n|j
        |_
        |S d fd}t           d          rt           d          n|j
        |_
        |S )Nr   rS   r   r   r   c                B    g | ]\  }}|j         |j        |j        fv |S r&   r   r   s      r(   r   z8_normalize_comparison_evaluator_func.<locals>.<listcomp>  r   r'   c              3      K   | ]}|v V  	d S r`   r&   r   s     r(   r   z7_normalize_comparison_evaluator_func.<locals>.<genexpr>  r   r'   r   r  r  c              3      K   | ]}|v V  	d S r`   r&   r   s     r(   r   z7_normalize_comparison_evaluator_func.<locals>.<genexpr>  r  r'   r   rS   r   rT   rU   r   c                   K   | ||r|j         ni d | D             |r	|j        pi ni dfdD             } |  d {V S )Nc                     g | ]}|j         pi S r&   r   r   rR   s     r(   r   zJ_normalize_comparison_evaluator_func.<locals>.awrapper.<locals>.<listcomp>      BBBc 1rBBBr'   r  c              3  (   K   | ]}|         V  d S r`   r&   r  s     r(   r   zI_normalize_comparison_evaluator_func.<locals>.awrapper.<locals>.<genexpr>  r
  r'   r  r   rS   r  r	  rg   r  s      @r(   r  z6_normalize_comparison_evaluator_func.<locals>.awrapper  s       !&07?gnnRBBTBBBBI)Q)>Br  A@@@@@@!T4[(((((((r'   r!   r   c                x    | ||r|j         ni d | D             |r	|j        pi ni dfdD             } | S )Nc                     g | ]}|j         pi S r&   r!  r"  s     r(   r   zI_normalize_comparison_evaluator_func.<locals>.wrapper.<locals>.<listcomp>  r#  r'   r  c              3  (   K   | ]}|         V  d S r`   r&   r  s     r(   r   zH_normalize_comparison_evaluator_func.<locals>.wrapper.<locals>.<genexpr>  r
  r'   r  r%  s      @r(   r  z5_normalize_comparison_evaluator_func.<locals>.wrapper  so     &07?gnnRBBTBBBBI)Q)>Br  A@@@@@@tT{"r'   )r   r   rS   rT   rU   r   )r   r   rS   r   rU   r   r  r  s   `    @@r(   r   r     s    SN

D
!
!C ,,..  O
  :EEEE_EEEEE:  A%%G<JG G G 	 oo     -<     -	VY/	/	/&t,, (	) ) ) ) ) ) ) 4,,'j)))& 
 O	# 	# 	# 	# 	# 	# 	# 4,,&j)))% 
 Nr'   r}   ;Union[EvaluationResults, dict, str, int, bool, float, list]Union[EvaluationResults, dict]c                   t          | t          t          t          f          rd| i} n| st	          d|            t          | t
                    r1t          d | D                       st	          d|  d          d| i} nBt          | t                    rd| i} n(t          | t                    rnt	          d|            | S )	Nr3   zdExpected a non-empty dict, str, bool, int, float, list, EvaluationResult, or EvaluationResults. Got c              3  @   K   | ]}t          |t                    V  d S r`   )rC   rL   )r   xs     r(   r   z+_format_evaluator_result.<locals>.<genexpr>  s,      771:a&&777777r'   z8Expected a list of dicts or EvaluationResults. Received .rO   r   zZExpected a dict, str, bool, int, float, list, EvaluationResult, or EvaluationResults. Got )	rC   r   rE   rD   r   r   r   r   rL   )r}   s    r(   r   r     s    &4,-- 
6" 
D;AD D
 
 	
 
FD	!	! 
7777777 	T6TTT   V$	FC	 	  
6"	FD	!	! 
/&,/ /
 
 	
 Mr'   SUMMARY_EVALUATOR_Tc                    dt          j                   }d |j                                        D             r.t	          fdD                       s3t                    dk    r d d}r	|d dz  }t          |          t	          fdD                       rd	d
gk    r S d fd}t           d          rt           d          n|j	        |_	        |S )Nr   examplesr   r   r   c                B    g | ]\  }}|j         |j        |j        fv |S r&   r   r   s      r(   r   z0_normalize_summary_evaluator.<locals>.<listcomp>,  r   r'   c              3      K   | ]}|v V  	d S r`   r&   r   s     r(   r   z/_normalize_summary_evaluator.<locals>.<genexpr>2  r   r'   r   r  r.  z Received positional arguments c              3      K   | ]}|v V  	d S r`   r&   r   s     r(   r   z/_normalize_summary_evaluator.<locals>.<genexpr>>  r  r'   r   r2  Sequence[schemas.Run]Sequence[schemas.Example]rU   rV   c                    | |d |D             d | D             d |D             dfdD             } | }t          |t                    r|S t          |          S )Nc                    g | ]	}|j         
S r&   )r   r   rS   s     r(   r   zA_normalize_summary_evaluator.<locals>.wrapper.<locals>.<listcomp>J  s    BBBg7>BBBr'   c                     g | ]}|j         pi S r&   r!  r"  s     r(   r   zA_normalize_summary_evaluator.<locals>.wrapper.<locals>.<listcomp>K  s    >>>#CK-2>>>r'   c                     g | ]}|j         pi S r&   r!  r:  s     r(   r   zA_normalize_summary_evaluator.<locals>.wrapper.<locals>.<listcomp>L  s    %T%T%Tgo&;%T%T%Tr'   r1  c              3  (   K   | ]}|         V  d S r`   r&   r  s     r(   r   z@_normalize_summary_evaluator.<locals>.wrapper.<locals>.<genexpr>N  s'      <<SGCL<<<<<<r'   )rC   r1   r   )r   r2  r  r}   r	  rg   r  s       @r(   r  z-_normalize_summary_evaluator.<locals>.wrapperD  s     $BBBBB>>>>>%T%T8%T%T%T G =<<<O<<<DT4[F&"233 +F333r'   r!   )r   r6  r2  r7  rU   rV   )
rw   r  r  r  r   r   r   r   ru   r!   )rg   r  r   r  r  r   s   `   @@r(   _normalize_summary_evaluatorr>  )  s|   SN

D
!
!C ,,..  O
  &EEEE_EEEEE&  A%%N<JN N N 	  	HG_GGGGCoo     -<     	VZ0	0	0	4 	4 	4 	4 	4 	4 	4" *1z)B)BXGD*%%%HX 	 r'   )rg   rh   )r   r   )r   rL   rU   rL   )rg   r   rU   r   )rg   r   rU   r   )rg   r   rU   r  )r}   r)  rU   r*  )rg   r   rU   r/  )>r$   
__future__r   r\   rw   r   abcr   typingr   r   r   r   r	   r
   r   r   r   r   typing_extensionsr   rr   r   pydantic.v1r   r   r   r   ImportErrorpydanticlogging	functoolsr   langsmith.schemasr   r   r   r   	getLoggerr!   rF   r   r*   r1   rN   rQ   rL   rz   rc   r   rf   r   r   r   rt   r   r   rq   r   r   r/  r>  r&   r'   r(   <module>rJ     s   E E " " " " " "                                 ( ' ' ' ' '                                           B B B B B B B B B B B B		8	$	$' ' ' ' 'y ' ' '6 6 6 6 6Ye 6 6 6 6, , , , ,y , , ,^" " " " "	 " " " "
 
 
 
 
 
 
 
$ )+<dBC : : : : : : : :$ 5t;< S5 S5 S5 S5 S5, S5 S5 S5l	% 	% 	% 	%    @ @ @ @A A A A A A A AH/ / / /G G G GTG G G GT   8 	'+	 9: 112	4 	gk	D12 112	4		 . . . . . .s    A A! A!