
    Ngc                     "   d Z ddlZddlZddlZddlZddlmZmZmZm	Z	m
Z
mZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ dedefd	Zd
ej        dedee         fdZedddddde	ej                 dedee         dee         dededej        fd            Z dededeej                 fdZ! ed          Z" ed          Z#dee"         dee#         dee
e"e#f                  fdZ$edddded e%d!ee&         dee         ddf
d"            Z'dS )#zfBeta utility functions to assist in common eval workflows.

These functions may change in the future.
    N)DefaultDictListOptionalSequenceTupleTypeVar)
evaluation)	warn_beta)Clientrun_dictid_mapc                 4   | d         }|                                 D ]5\  }}|                    t          |          t          |                    }6|| d<   |                     d          r|| d                  | d<   |                     d          si | d<   | S )a  Convert the IDs in the run dictionary using the provided ID map.

    Parameters:
    - run_dict (dict): The dictionary representing a run.
    - id_map (dict): The dictionary mapping old IDs to new IDs.

    Returns:
    - dict: The updated run dictionary.
    dotted_orderparent_run_idextra)itemsreplacestrget)r   r   dokvs        Q/var/www/html/ai-engine/env/lib/python3.11/site-packages/langsmith/beta/_evals.py_convert_idsr      s     
.	!B ( (1ZZAA''!H^||O$$ F$*8O+D$E!<<   O    rootrun_to_example_mapreturnc                    | g}t          j                    }| j        |ig }|r|                                }|                    h d          }                    |d         t          j                              |d         <   |d                  |d<   |d                  |d<   |j        r|                    |j                   |                    |           |fd|D             }|| j	                 |d         d<   |S )a&  Convert the root run and its child runs to a list of dictionaries.

    Parameters:
    - root (ls_schemas.Run): The root run to convert.
    - run_to_example_map (dict): The dictionary mapping run IDs to example IDs.

    Returns:
    - List[dict]: The list of converted run dictionaries.
    >   
session_idchild_run_idsparent_run_ids)excludeidtrace_idc                 0    g | ]}t          |          S  )r   ).0rr   s     r   
<listcomp>z%_convert_root_run.<locals>.<listcomp>@   s#    777!l1f%%777r   r   reference_example_id)
uuiduuid4r%   popdictr   
child_runsextendappendr$   )	r   r   runs_r%   resultssrcsrc_dictresultr   s	           @r   _convert_root_runr8   )   s    FEz||HmX&FG
 !iikk88$U$U$U8VV!'HTNDJLL!I!Ix~/%hz&:;> 	)LL(((x     ! 8777w777F(:47(CF1I$%Mr   F)test_project_nameclientload_child_runsinclude_outputsrunsdataset_namer9   r:   r;   r<   c                j   | st          d|            pt          j                                        |          }|rd | D             nd}                    d | D             |d | D             |j                   s| }nfd| D             }|p"d	t          j                    j        dd
          }t          
                    |                    }	d |	D             |	d         j        r|	d         j        n|	d         j        }
fd|D             }                    ||j        d|
                                d          }|D ]} j        di |d|i                     |j        t"          j                            t"          j        j                            }|S )a  Convert the following runs to a dataset + test.

    This makes it easy to sample prod runs into a new regression testing
    workflow and compare against a candidate system.

    Internally, this function does the following:
        1. Create a dataset from the provided production run inputs.
        2. Create a new test project.
        3. Clone the production runs and re-upload against the dataset.

    Parameters:
    - runs (Sequence[ls_schemas.Run]): A sequence of runs to be executed as a test.
    - dataset_name (str): The name of the dataset to associate with the test runs.
    - client (Optional[Client]): An optional LangSmith client instance. If not provided,
        a new client will be created.
    - load_child_runs (bool): Whether to load child runs when copying runs.
        Defaults to False.

    Returns:
    - ls_schemas.TracerSession: The project containing the cloned runs.

    Examples:
    --------
    .. code-block:: python

        import langsmith
        import random

        client = langsmith.Client()

        # Randomly sample 100 runs from a prod project
        runs = list(client.list_runs(project_name="My Project", execution_order=1))
        sampled_runs = random.sample(runs, min(len(runs), 100))

        runs_as_test(runs, dataset_name="Random Runs")

        # Select runs named "extractor" whose root traces received good feedback
        runs = client.list_runs(
            project_name="<your_project>",
            filter='eq(name, "extractor")',
            trace_filter='and(eq(feedback_key, "user_score"), eq(feedback_score, 1))',
        )
        runs_as_test(runs, dataset_name="Extraction Good")
    z1Expected a non-empty sequence of runs. Received: )r>   c                     g | ]	}|j         
S r'   )outputsr(   r)   s     r   r*   z(convert_runs_to_test.<locals>.<listcomp>   s    '''Qqy'''r   Nc                     g | ]	}|j         
S r'   )inputsrB   s     r   r*   z(convert_runs_to_test.<locals>.<listcomp>   s    '''Q'''r   c                     g | ]	}|j         
S r'   )r$   rB   s     r   r*   z(convert_runs_to_test.<locals>.<listcomp>   s    ++++++r   )rD   rA   source_run_ids
dataset_idc                 H    g | ]}                     |j                   S ))r;   )read_runr$   )r(   r)   r:   r;   s     r   r*   z(convert_runs_to_test.<locals>.<listcomp>   s8     
 
 
GHFOOAD/OBB
 
 
r   zprod-baseline-   c                 (    i | ]}|j         |j        S r'   )source_run_idr$   )r(   es     r   
<dictcomp>z(convert_runs_to_test.<locals>.<dictcomp>   s    BBBA!/14BBBr   r   c                 :    g | ]}t          |          D ]}|S r'   )r8   )r(   root_runr   r   s      r   r*   z(convert_runs_to_test.<locals>.<listcomp>   sK       )(4FGG   	   r   zprod-baseline)whichdataset_version)project_namereference_dataset_idmetadatarS   )tz)end_timer'   )
ValueErrorrtget_cached_clientcreate_datasetcreate_examplesr$   r,   r-   hexlistlist_examplesmodified_at
created_atcreate_project	isoformat
create_runupdate_projectdatetimenowtimezoneutc)r=   r>   r9   r:   r;   r<   dsrA   runs_to_copyexamplesrR   	to_createprojectnew_run_r   s      ``          @r   convert_runs_to_testrq   E   s[   l  YWtWWXXX-r+--F			L		9	9B+:D''$''''G
''$'''++d+++5	      

 
 
 
 
LP
 
 
 *T-Tdjll>NrPQr>R-T-TF((l(CCDDHBBBBB#+A;#:V@V    $  I ##&U$.88::
 
 $  G  E EDDGDD2CDDDDD
X.22h6G6K2LL 	 	 	A Nr   rS   c                 r   |                     |           }t          j        t                    }g }i }|D ]I}|j        !||j                                     |           n|                    |           |||j        <   J|                                D ]"\  }}t          |d           ||         _	        #|S )N)rS   c                     | j         S N)r   )r)   s    r   <lambda>z%_load_nested_traces.<locals>.<lambda>   s    q~ r   )key)
	list_runscollectionsdefaultdictr^   r   r2   r$   r   sortedr0   )	rS   r:   r=   treemapr4   all_runsrunrun_idr0   s	            r   _load_nested_tracesr      s    66D<G<S= =G GH  (C%&--c2222NN3%mmoo W W
&,Z=U=U&V&V&V##Nr   TUlist1list2c                 F    t          t          j        | |                    S rt   )r^   	itertoolsproduct)r   r   s     r   _outer_productr      s    	!%//000r   
   )max_concurrencyr:   
evaluatorsr   c          
         ddl m} g }|D ]}t          |t          j                  r|                    |           2t          |          r(|                    t          j        |                     it          dt          |                     |pt          j                    }t          | |          } ||          5 } |j        |j        gt          t!          ||           R  }	ddd           n# 1 swxY w Y   |	D ]}
dS )a  Compute test metrics for a given test name using a list of evaluators.

    Args:
        project_name (str): The name of the test project to evaluate.
        evaluators (list): A list of evaluators to compute metrics with.
        max_concurrency (Optional[int], optional): The maximum number of concurrent
            evaluations. Defaults to 10.
        client (Optional[Client], optional): The client to use for evaluations.
            Defaults to None.

    Returns:
        None: This function does not return any value.
    r   )ContextThreadPoolExecutorz5Evaluation not yet implemented for evaluator of type )max_workersN)	langsmithr   
isinstancels_evalRunEvaluatorr2   callablerun_evaluatorNotImplementedErrortyperY   rZ   r   mapevaluate_runzipr   )rS   r   r   r:   r   evaluators_functracesexecutorr4   rp   s              r   compute_test_metricsr      s   * 433333.0K  dG011 	t$$$$d^^ 	w4T::;;;;%TT

TT   -r+--F v66F	"	"	?	?	? 
8(,
"%~fk'J'J"K
 
 

 
 
 
 
 
 
 
 
 
 
 
 
 
 
    s   )C99C= C=)(__doc__rx   rf   r   r,   typingr   r   r   r   r   r   langsmith.run_trees	run_treesrY   langsmith.schemasschemas
ls_schemasr   r	   r   #langsmith._internal._beta_decoratorr
   langsmith.clientr   r/   r   Runr8   r   boolTracerSessionrq   r   r   r   r   r^   intr   r'   r   r   <module>r      s   
           H H H H H H H H H H H H H H H H             & & & & & & + + + + + + 9 9 9 9 9 9 # # # # # #4     ,JN  d    8 
 (,#!!e e e
:>
"e e  }	e
 Ve e e e e e ePc 6 d:>>R    $ GCLLGCLL1$q' 1$q' 1d5A;6G 1 1 1 1 
 &(#' ' '' ' c]	'
 V' 
' ' ' ' ' 'r   