
    Ng                         d dl Z d dlmZmZmZmZmZmZ d dlm	Z	 d dl
mZmZ  e j        e          Z G d de          ZdS )    N)AnyCallableDictIteratorListOptional)Document)	BaseModelmodel_validatorc                       e Zd ZU dZdZeed<   dZeed<   dZe	ed<   dZ
eeegef                  ed	<   eed
<    ed          ededefd                        Zdee         fdZdee         fdZdS )TensorflowDatasetsav  Access to the TensorFlow Datasets.

    The Current implementation can work only with datasets that fit in a memory.

    `TensorFlow Datasets` is a collection of datasets ready to use, with TensorFlow
    or other Python ML frameworks, such as Jax. All datasets are exposed
    as `tf.data.Datasets`.
    To get started see the Guide: https://www.tensorflow.org/datasets/overview and
    the list of datasets: https://www.tensorflow.org/datasets/catalog/
                                               overview#all_datasets

    You have to provide the sample_to_document_function: a function that
       a sample from the dataset-specific format to the Document.

    Attributes:
        dataset_name: the name of the dataset to load
        split_name: the name of the split to load. Defaults to "train".
        load_max_docs: a limit to the number of loaded documents. Defaults to 100.
        sample_to_document_function: a function that converts a dataset sample
          to a Document

    Example:
        .. code-block:: python

            from langchain_community.utilities import TensorflowDatasets

            def mlqaen_example_to_document(example: dict) -> Document:
                return Document(
                    page_content=decode_to_str(example["context"]),
                    metadata={
                        "id": decode_to_str(example["id"]),
                        "title": decode_to_str(example["title"]),
                        "question": decode_to_str(example["question"]),
                        "answer": decode_to_str(example["answers"]["text"][0]),
                    },
                )

            tsds_client = TensorflowDatasets(
                    dataset_name="mlqa/en",
                    split_name="train",
                    load_max_docs=MAX_DOCS,
                    sample_to_document_function=mlqaen_example_to_document,
                )

     dataset_nametrain
split_named   load_max_docsNsample_to_document_functiondatasetbefore)modevaluesreturnc                    	 ddl }n# t          $ r t          d          w xY w	 ddl}n# t          $ r t          d          w xY w|d         t          d          |                    |d         |d         	          |d
<   |S )z7Validate that the python package exists in environment.r   Nz\Could not import tensorflow python package. Please install it with `pip install tensorflow`.znCould not import tensorflow_datasets python package. Please install it with `pip install tensorflow-datasets`.r   zmsample_to_document_function is None. Please provide a function that converts a dataset sample to  a Document.r   r   )splitr   )
tensorflowImportErrortensorflow_datasets
ValueErrorload)clsr   r   r   s       m/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/utilities/tensorflow_datasets.pyvalidate_environmentz'TensorflowDatasets.validate_environment?   s    	 	 	 	C  	
	&&&&& 	 	 	L  	
 /08   
 044>"&*> 5 
 
y s    !* Ac                 Z      fd j                              j                  D             S )zYDownload a selected dataset lazily.

        Returns: an iterator of Documents.

        c              3   P   K   | ] }j         	                     |          V  !d S )N)r   ).0sselfs     r"   	<genexpr>z/TensorflowDatasets.lazy_load.<locals>.<genexpr>c   sF       
 
/; ,,Q//;;;;
 
    )r   taker   r(   s   `r"   	lazy_loadzTensorflowDatasets.lazy_load]   sA    
 
 
 
\&&t'9::
 
 
 	
r*   c                 D    t          |                                           S )zMDownload a selected dataset.

        Returns: a list of Documents.

        )listr-   r,   s    r"   r    zTensorflowDatasets.loadi   s     DNN$$%%%r*   )__name__
__module____qualname____doc__r   str__annotations__r   r   intr   r   r   r   r	   r   r   classmethodr#   r   r-   r   r     r*   r"   r   r   
   s        , ,\ L#JM3HL(D683C*D!ELLLLLL_(###$ 3    [ $#8

8H- 

 

 

 

&d8n & & & & & &r*   r   )loggingtypingr   r   r   r   r   r   langchain_core.documentsr	   pydanticr
   r   	getLoggerr0   loggerr   r8   r*   r"   <module>r?      s     @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ - - - - - - / / / / / / / /		8	$	$e& e& e& e& e& e& e& e& e& e&r*   