§
    †ìNgW.  ã                   óâ   — d Z ddlmZmZmZmZmZmZmZm	Z	 ddl
mZ ddlmZ ddlmZmZmZ ddlmZmZ  G d„ de¦  «        Z G d	„ d
e¦  «        Zdeeef         defd„Z G d„ de¦  «        ZdS )zGContains the LLMEvaluator class for building LLM-as-a-judge evaluators.é    )ÚAnyÚCallableÚDictÚListÚOptionalÚTupleÚUnionÚcast)Ú	BaseModel)Ú	warn_beta)ÚEvaluationResultÚEvaluationResultsÚRunEvaluator)ÚExampleÚRunc                   óf   — e Zd ZU dZeed<   ee         ed<   eed<   dZeed<   dZ	e
e         ed<   dS )	ÚCategoricalScoreConfigz&Configuration for a categorical score.ÚkeyÚchoicesÚdescriptionFÚinclude_explanationNÚexplanation_description)Ú__name__Ú
__module__Ú__qualname__Ú__doc__ÚstrÚ__annotations__r   r   Úboolr   r   © ó    ú^/var/www/html/ai-engine/env/lib/python3.11/site-packages/langsmith/evaluation/llm_evaluator.pyr   r      sc   € € € € € € Ø0Ð0à	€H€HHØ#ŒYÐÐÑØÐÐÑØ %Ð˜Ð%Ð%Ñ%Ø-1Ð˜X cœ]Ð1Ð1Ñ1Ð1Ð1r!   r   c                   ól   — e Zd ZU dZeed<   dZeed<   dZeed<   eed<   dZ	e
ed	<   d
Zee         ed<   d
S )ÚContinuousScoreConfigz%Configuration for a continuous score.r   r   Úminé   Úmaxr   Fr   Nr   )r   r   r   r   r   r   r%   Úfloatr'   r   r   r   r   r    r!   r"   r$   r$      sm   € € € € € € Ø/Ð/à	€H€HHØ€Cˆ€N€NNØ€Cˆ€N€NNØÐÐÑØ %Ð˜Ð%Ð%Ñ%Ø-1Ð˜X cœ]Ð1Ð1Ñ1Ð1Ð1r!   r$   Úscore_configÚreturnc                 ó  — i }t          | t          ¦  «        r+d| j        dd                     | j        ¦  «        › ddœ|d<   nJt          | t          ¦  «        r&d| j        | j        d| j        › d	| j        › d
dœ|d<   nt          d¦  «        ‚| j        rd| j	        €dn| j	        dœ|d<   | j
        | j        d|| j        rddgndgdœS )NÚstringz%The score for the evaluation, one of z, ú.)ÚtypeÚenumr   ÚscoreÚnumberz&The score for the evaluation, between z and z, inclusive.)r.   ÚminimumÚmaximumr   z9Invalid score type. Must be 'categorical' or 'continuous'zThe explanation for the score.)r.   r   ÚexplanationÚobject)Útitler   r.   Ú
propertiesÚrequired)Ú
isinstancer   r   Újoinr$   r%   r'   Ú
ValueErrorr   r   r   r   )r)   r7   s     r"   Ú_create_score_json_schemar<   !   sL  € ð "$€JÝ,Õ 6Ñ7Ô7ð VàØ Ô(ð2ØyŠy˜Ô-Ñ.Ô.ð2ð 2ð 2ð
ð 
ˆ
7ÑÐõ 
LÕ"7Ñ	8Ô	8ð 	VàØ#Ô'Ø#Ô'ðEØÔðEð EØ&2Ô&6ðEð Eð Eð	
ð 
ˆ
7ÑÐõ ÐTÑUÔUÐUàÔ'ð 
àð  Ô7Ð?ð 1Ð0à!Ô9ð%
ð %
ˆ
=Ñ!ð Ô!Ø#Ô/ØØ à(4Ô(HÐWˆWmÐ$Ð$ÈwÈiðð ð r!   c                   ó~  — e Zd ZdZddddœdeeeeeef                  f         deee	f         de
eee
e         gef                  d	ed
ef
d„Zeddœdedeeeeeef                  f         deee	f         de
eee
e         gef                  fd„¦   «         Zdeeeeeef                  f         deee	f         de
eee
e         gef                  defd„Ze	 ddede
e         deeef         fd„¦   «         Ze	 ddede
e         deeef         fd„¦   «         Zdede
e         defd„Zdedeeef         fd„ZdS )ÚLLMEvaluatorz/A class for building LLM-as-a-judge evaluators.Nzgpt-4oÚopenai)Úmap_variablesÚ
model_nameÚmodel_providerÚprompt_templater)   r@   rA   rB   c                ó    — 	 ddl m} n"# t          $ r}t          d¦  «        |‚d}~ww xY w |d||dœ|¤Ž}	|                      ||||	¦  «         dS )aæ  Initialize the LLMEvaluator.

        Args:
            prompt_template (Union[str, List[Tuple[str, str]]): The prompt
                template to use for the evaluation. If a string is provided, it is
                assumed to be a human / user message.
            score_config (Union[CategoricalScoreConfig, ContinuousScoreConfig]):
                The configuration for the score, either categorical or continuous.
            map_variables (Optional[Callable[[Run, Example], dict]], optional):
                A function that maps the run and example to the variables in the
                prompt. Defaults to None. If None, it is assumed that the prompt
                only requires 'input', 'output', and 'expected'.
            model_name (Optional[str], optional): The model to use for the evaluation.
                Defaults to "gpt-4o".
            model_provider (Optional[str], optional): The model provider to use
                for the evaluation. Defaults to "openai".
        r   )Úinit_chat_modelzmLLMEvaluator requires langchain to be installed. Please install langchain by running `pip install langchain`.N)ÚmodelrB   r    )Úlangchain.chat_modelsrE   ÚImportErrorÚ_initialize)
ÚselfrC   r)   r@   rA   rB   ÚkwargsrE   ÚeÚ
chat_models
             r"   Ú__init__zLLMEvaluator.__init__O   s¤   € ð6	Ø=Ð=Ð=Ð=Ð=Ð=Ð=øÝð 	ð 	ð 	ÝðOñô ð ðøøøøð	øøøð %_ð 
Ø¨^ð
ð 
Ø?Eð
ð 
ˆ
ð 	×Ò˜¨,¸ÀzÑRÔRÐRÐRÐRs   ‚	 ‰
(“#£()r@   rF   c                ó`   — |                       | ¦  «        }|                     ||||¦  «         |S )a›  Create an LLMEvaluator instance from a BaseChatModel instance.

        Args:
            model (BaseChatModel): The chat model instance to use for the evaluation.
            prompt_template (Union[str, List[Tuple[str, str]]): The prompt
                template to use for the evaluation. If a string is provided, it is
                assumed to be a system message.
            score_config (Union[CategoricalScoreConfig, ContinuousScoreConfig]):
                The configuration for the score, either categorical or continuous.
            map_variables (Optional[Callable[[Run, Example]], dict]], optional):
                A function that maps the run and example to the variables in the
                prompt. Defaults to None. If None, it is assumed that the prompt
                only requires 'input', 'output', and 'expected'.

        Returns:
            LLMEvaluator: An instance of LLMEvaluator.
        )Ú__new__rI   )ÚclsrF   rC   r)   r@   Úinstances         r"   Ú
from_modelzLLMEvaluator.from_modelx   s4   € ð4 —;’;˜sÑ#Ô#ˆØ×Ò˜_¨l¸MÈ5ÑQÔQÐQØˆr!   rM   c                 ó\  — 	 ddl m} ddlm} n"# t          $ r}t	          d¦  «        |‚d}~ww xY wt          ||¦  «        rt          |d¦  «        st          d¦  «        ‚t          |t          ¦  «        r| 	                    d|fg¦  «        | _
        n| 	                    |¦  «        | _
        t          | j
        j        ¦  «        h d	£z
  r|st          d
¦  «        ‚|| _        || _        t          | j        ¦  «        | _        |                     | j        ¦  «        }| j
        |z  | _        dS )aÑ  Shared initialization code for __init__ and from_model.

        Args:
            prompt_template (Union[str, List[Tuple[str, str]]): The prompt template.
            score_config (Union[CategoricalScoreConfig, ContinuousScoreConfig]):
                The score configuration.
            map_variables (Optional[Callable[[Run, Example]], dict]]):
                Function to map variables.
            chat_model (BaseChatModel): The chat model instance.
        r   )ÚBaseChatModel)ÚChatPromptTemplatez|LLMEvaluator requires langchain-core to be installed. Please install langchain-core by running `pip install langchain-core`.NÚwith_structured_outputzRchat_model must be an instance of BaseLanguageModel and support structured output.Úhuman>   ÚinputÚoutputÚexpectedzrmap_inputs must be provided if the prompt template contains variables other than 'input', 'output', and 'expected')Ú*langchain_core.language_models.chat_modelsrU   Úlangchain_core.promptsrV   rH   r9   Úhasattrr;   r   Úfrom_messagesÚpromptÚsetÚinput_variablesr@   r)   r<   Úscore_schemarW   Úrunnable)rJ   rC   r)   r@   rM   rU   rV   rL   s           r"   rI   zLLMEvaluator._initialize–   s‚  € ð"	ØPÐPÐPÐPÐPÐPØAÐAÐAÐAÐAÐAÐAøÝð 	ð 	ð 	ÝðYñô ð ðøøøøð	øøøõ z =Ñ1Ô1ð	å˜
Ð$<Ñ=Ô=ð	õ ðCñô ð õ
 o¥sÑ+Ô+ð 	LØ,×:Ò:¸WÀoÐ<VÐ;WÑXÔXˆDŒKˆKà,×:Ò:¸?ÑKÔKˆDŒKåˆtŒ{Ô*Ñ+Ô+Ð.MÐ.MÐ.MÑMð 	Ø ð Ý ðMñô ð ð +ˆÔà(ˆÔÝ5°dÔ6GÑHÔHˆÔà×6Ò6°tÔ7HÑIÔIˆ
Øœ jÑ0ˆŒˆˆs   ‚ 
.™)©.ÚrunÚexampler*   c                 ó²   — |                       ||¦  «        }t          t          | j                             |¦  «        ¦  «        }|                      |¦  «        S )zEvaluate a run.)Ú_prepare_variablesr
   Údictrd   ÚinvokeÚ_parse_output©rJ   re   rf   Ú	variablesrZ   s        r"   Úevaluate_runzLLMEvaluator.evaluate_runÌ   sL   € ð
 ×+Ò+¨C°Ñ9Ô9ˆ	ÝD $¤-×"6Ò"6°yÑ"AÔ"AÑBÔBˆØ×!Ò! &Ñ)Ô)Ð)r!   c              ƒ   óÂ   K  — |                       ||¦  «        }t          t          | j                             |¦  «        ƒ d{V —†¦  «        }|                      |¦  «        S )zAsynchronously evaluate a run.N)rh   r
   ri   rd   Úainvokerk   rl   s        r"   Úaevaluate_runzLLMEvaluator.aevaluate_runÕ   sb   è è € ð
 ×+Ò+¨C°Ñ9Ô9ˆ	ÝD¨¬×(=Ò(=¸iÑ(HÔ(HÐ"HÐ"HÐ"HÐ"HÐ"HÐ"HÑIÔIˆØ×!Ò! &Ñ)Ô)Ð)r!   c                 óâ  — | j         r|                       ||¦  «        S i }d| j        j        v r}t          |j        ¦  «        dk    rt          d¦  «        ‚t          |j        ¦  «        dk    rt          d¦  «        ‚t          |j                             ¦   «         ¦  «        d         |d<   d| j        j        v r“|j        st          d¦  «        ‚t          |j        ¦  «        dk    rt          d¦  «        ‚t          |j        ¦  «        dk    rt          d¦  «        ‚t          |j                             ¦   «         ¦  «        d         |d<   d	| j        j        v r•|r|j        st          d
¦  «        ‚t          |j        ¦  «        dk    rt          d¦  «        ‚t          |j        ¦  «        dk    rt          d¦  «        ‚t          |j                             ¦   «         ¦  «        d         |d	<   |S )z'Prepare variables for model invocation.rY   r   zHNo input keys are present in run.inputs but the prompt requires 'input'.r&   zWMultiple input keys are present in run.inputs. Please provide a map_variables function.rZ   zKNo output keys are present in run.outputs but the prompt requires 'output'.zYMultiple output keys are present in run.outputs. Please provide a map_variables function.r[   zMNo example or example outputs is provided but the prompt requires 'expected'.zQNo output keys are present in example.outputs but the prompt requires 'expected'.z]Multiple output keys are present in example.outputs. Please provide a map_variables function.)	r@   r`   rb   ÚlenÚinputsr;   ÚlistÚvaluesÚoutputs)rJ   re   rf   rm   s       r"   rh   zLLMEvaluator._prepare_variablesÞ   s*  € àÔð 	4Ø×%Ò% c¨7Ñ3Ô3Ð3àˆ	Ød”kÔ1Ð1Ð1Ý3”:‰Œ !Ò#Ð#Ý ð(ñô ð õ 3”:‰Œ !Ò#Ð#Ý ð0ñô ð õ "& c¤j×&7Ò&7Ñ&9Ô&9Ñ!:Ô!:¸1Ô!=ˆIgÑàt”{Ô2Ð2Ð2Ø”;ð Ý ð)ñô ð õ 3”;ÑÔ 1Ò$Ð$Ý ð)ñô ð õ 3”;ÑÔ 1Ò$Ð$Ý ð8ñô ð õ #' s¤{×'9Ò'9Ñ';Ô';Ñ"<Ô"<¸QÔ"?ˆIhÑà˜œÔ4Ð4Ð4Øð  '¤/ð Ý ð+ñô ð õ 7”?Ñ#Ô# qÒ(Ð(Ý ð+ñô ð õ 7”?Ñ#Ô# qÒ(Ð(Ý ð8ñô ð õ %)¨¬×)?Ò)?Ñ)AÔ)AÑ$BÔ$BÀ1Ô$EˆIjÑ!àÐr!   rZ   c                 óV  — t          | j        t          ¦  «        r:|d         }|                     dd¦  «        }t	          | j        j        ||¬¦  «        S t          | j        t          ¦  «        r:|d         }|                     dd¦  «        }t	          | j        j        ||¬¦  «        S dS )z1Parse the model output into an evaluation result.r0   r4   N)r   ÚvalueÚcomment)r   r0   rz   )r9   r)   r   Úgetr   r   r$   )rJ   rZ   ry   r4   r0   s        r"   rk   zLLMEvaluator._parse_output  sº   € ådÔ'Õ)?Ñ@Ô@ð 	Ø˜7”OˆEØ Ÿ*š* ]°DÑ9Ô9ˆKÝ#ØÔ%Ô)°Àðñ ô ð õ ˜Ô)Õ+@ÑAÔAð 	Ø˜7”OˆEØ Ÿ*š* ]°DÑ9Ô9ˆKÝ#ØÔ%Ô)°Àðñ ô ð ð	ð 	r!   )N)r   r   r   r   r	   r   r   r   r   r$   r   r   r   r   ri   rN   Úclassmethodr   rS   rI   r   r   r   rn   rq   rh   rk   r    r!   r"   r>   r>   L   sÈ  € € € € € Ø9Ð9ð MQØ"Ø&ð'Sð 'Sð 'Sð ˜s D¨¨s°C¨x¬Ô$9Ð9Ô:ð'Sð Ð2Ð4IÐIÔJð	'Sð
   ¨#¨x¸Ô/@Ð)AÀ4Ð)GÔ HÔIð'Sð ð'Sð ð'Sð 'Sð 'Sð 'SðR ð MQðð ð àðð ˜s D¨¨s°C¨x¬Ô$9Ð9Ô:ð	ð
 Ð2Ð4IÐIÔJðð   ¨#¨x¸Ô/@Ð)AÀ4Ð)GÔ HÔIðð ð ñ „[ðð:41à˜s D¨¨s°C¨x¬Ô$9Ð9Ô:ð41ð Ð2Ð4IÐIÔJð41ð   ¨#¨x¸Ô/@Ð)AÀ4Ð)GÔ HÔIð	41ð
 ð41ð 41ð 41ð 41ðl à59ð*ð *Øð*Ø!)¨'Ô!2ð*à	ÐÐ!2Ð2Ô	3ð*ð *ð *ñ „Yð*ð à59ð*ð *Øð*Ø!)¨'Ô!2ð*à	ÐÐ!2Ð2Ô	3ð*ð *ð *ñ „Yð*ð7 cð 7°H¸WÔ4Eð 7È$ð 7ð 7ð 7ð 7ðr Dð ¨UÐ3CÐEVÐ3VÔ-Wð ð ð ð ð ð r!   r>   N)r   Útypingr   r   r   r   r   r   r	   r
   Úpydanticr   Ú#langsmith._internal._beta_decoratorr   Úlangsmith.evaluationr   r   r   Úlangsmith.schemasr   r   r   r$   ri   r<   r>   r    r!   r"   ú<module>r‚      sd  ðØ MÐ Mà JÐ JÐ JÐ JÐ JÐ JÐ JÐ JÐ JÐ JÐ JÐ JÐ JÐ JÐ JÐ JÐ JÐ JÐ JÐ Jà Ð Ð Ð Ð Ð à 9Ð 9Ð 9Ð 9Ð 9Ð 9Ø RÐ RÐ RÐ RÐ RÐ RÐ RÐ RÐ RÐ RØ *Ð *Ð *Ð *Ð *Ð *Ð *Ð *ð2ð 2ð 2ð 2ð 2˜Yñ 2ô 2ð 2ð2ð 2ð 2ð 2ð 2˜Iñ 2ô 2ð 2ð(ØÐ.Ð0EÐEÔFð(à	ð(ð (ð (ð (ðVXð Xð Xð Xð X<ñ Xô Xð Xð Xð Xr!   