
    Ng                         d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	m
Z
mZmZ d dlmZ d dlmZ d dlmZ d dlmZmZ  G d	 d
e          Z G d de          ZdS )    N)TextIOWrapper)Path)AnyDictIteratorListOptionalSequenceUnion)Document)
BaseLoader)detect_file_encodings)UnstructuredFileLoadervalidate_unstructured_versionc                       e Zd ZdZ	 	 	 	 	 ddddeeef         dee         dee         d	ee	         d
ee         de
dee         fdZdee         fdZdedee         fdZdS )	CSVLoadera  Load a `CSV` file into a list of Documents.

    Each document represents one row of the CSV file. Every row is converted
    into a key/value pair and outputted to a new line in the document's
    page_content.

    The source for each document loaded from csv is set to the value of the
    `file_path` argument for all documents by default.
    You can override this by setting the `source_column` argument to the
    name of a column in the CSV file.
    The source of each document will then be set to the value of the column
    with the name specified in `source_column`.

    Output Example:
        .. code-block:: txt

            column1: value1
            column2: value2
            column3: value3

    Instantiate:
        .. code-block:: python

            from langchain_community.document_loaders import CSVLoader

            loader = CSVLoader(file_path='./hw_200.csv',
                csv_args={
                'delimiter': ',',
                'quotechar': '"',
                'fieldnames': ['Index', 'Height', 'Weight']
            })

    Load:
        .. code-block:: python

            docs = loader.load()
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            Index: Index
            Height: Height(Inches)"
            Weight: "Weight(Pounds)"
            {'source': './hw_200.csv', 'row': 0}

    Async load:
        .. code-block:: python

            docs = await loader.aload()
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            Index: Index
            Height: Height(Inches)"
            Weight: "Weight(Pounds)"
            {'source': './hw_200.csv', 'row': 0}

    Lazy load:
        .. code-block:: python

            docs = []
            docs_lazy = loader.lazy_load()

            # async variant:
            # docs_lazy = await loader.alazy_load()

            for doc in docs_lazy:
                docs.append(doc)
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            Index: Index
            Height: Height(Inches)"
            Weight: "Weight(Pounds)"
            {'source': './hw_200.csv', 'row': 0}
    N F)content_columns	file_pathsource_columnmetadata_columnscsv_argsencodingautodetect_encodingr   c                l    || _         || _        || _        || _        |pi | _        || _        || _        dS )a  

        Args:
            file_path: The path to the CSV file.
            source_column: The name of the column in the CSV file to use as the source.
              Optional. Defaults to None.
            metadata_columns: A sequence of column names to use as metadata. Optional.
            csv_args: A dictionary of arguments to pass to the csv.DictReader.
              Optional. Defaults to None.
            encoding: The encoding of the CSV file. Optional. Defaults to None.
            autodetect_encoding: Whether to try to autodetect the file encoding.
            content_columns: A sequence of column names to use for the document content.
                If not present, use all columns that are not part of the metadata.
        N)r   r   r   r   r   r   r   )selfr   r   r   r   r   r   r   s           k/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/document_loaders/csv_loader.py__init__zCSVLoader.__init__c   sC    2 #* 0  B#6 .    returnc              #   h  K   	 t          | j        d| j                  5 }|                     |          E d {V  d d d            d S # 1 swxY w Y   d S # t          $ r}| j        r|t          | j                  }|D ]d}	 t          | j        d|j                  5 }|                     |          E d {V  	 d d d             n;# 1 swxY w Y   U# t          $ r Y aw xY wnt          d| j                   |Y d }~d S Y d }~d S d }~wt          $ r}t          d| j                   |d }~ww xY w)N )newliner   zError loading )	openr   r   _CSVLoader__read_fileUnicodeDecodeErrorr   r   RuntimeError	Exception)r   csvfileedetected_encodingsr   s        r   	lazy_loadzCSVLoader.lazy_load   s\     	Idnb4=III 5W++G4444444445 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5! 	M 	M 	M' M%:4>%J%J" 2 ! !H!! NBAR   "$'+'7'7'@'@@@@@@@@!	" " " " " " " " " " " " " " " " "
 . ! ! ! !! ##DDN#D#DEE1L! ! ! ! ! !" " " " " "  	I 	I 	I@@@AAqH	Is   A A	A 	AA AA 
D1 D CC9CDC

CC
CD
CDCDD1D,,D1r)   c              #      K   t          j        |fi  j        }t          |          D ]\  }}	  j        | j                 nt           j                  }n&# t          $ r t          d j         d          w xY wd	                     fd|
                                D                       }||d} j        D ]/}	 ||         ||<   # t          $ r t          d| d          w xY wt          ||          V  d S )NzSource column 'z' not found in CSV file.
c           	   3   b  K   | ]\  }}j         r
|j         v rn	|j        v||                                n| dt          |t                    r|                                nCt          |t
                    r-d                    t          t          j        |                    n| V  d S )Nz: ,)r   r   strip
isinstancestrlistjoinmap).0kvr   s      r   	<genexpr>z(CSVLoader.__read_file.<locals>.<genexpr>   s       
  
  Aq +
  ----$"777 #$-QWWYYYQ 2 2a%%;.!'')))a&&,.388C	14E4E+F+F+F,-2 2 8777
  
 r   )sourcerowzMetadata column ')page_contentmetadata)csv
DictReaderr   	enumerater   r3   r   KeyError
ValueErrorr5   itemsr   r   )	r   r)   
csv_readerir<   r;   contentr>   cols	   `        r   __read_filezCSVLoader.__read_file   s     ^G==t}==

++ 	D 	DFAs	 )5 *++T^,, 
     Rd&8RRR   ii 
  
  
  
   IIKK	
  
  
  
 
G #)33H, X XX$'HHSMM X X X$%V%V%V%VWWWX(CCCCCCC9	D 	Ds   (A#A<CC+)Nr   NNF)__name__
__module____qualname____doc__r   r3   r   r	   r
   r   boolr   r   r   r,   r   r%   r   r   r   r   r      s       P Pj (,*,#'"&$)/ *,/ / /d#/  }/ #3-	/
 4./ 3-/ "/ "#/ / / /BI8H- I I I I*D= DXh5G D D D D D Dr   r   c                   @     e Zd ZdZ	 d	dededef fdZdefdZ xZ	S )
UnstructuredCSVLoadera|  Load `CSV` files using `Unstructured`.

    Like other
    Unstructured loaders, UnstructuredCSVLoader can be used in both
    "single" and "elements" mode. If you use the loader in "elements"
    mode, the CSV file will be a single Unstructured Table element.
    If you use the loader in "elements" mode, an HTML representation
    of the table will be available in the "text_as_html" key in the
    document metadata.

    Examples
    --------
    from langchain_community.document_loaders.csv_loader import UnstructuredCSVLoader

    loader = UnstructuredCSVLoader("stanley-cups.csv", mode="elements")
    docs = loader.load()
    singler   modeunstructured_kwargsc                 `    t          d            t                      j        d||d| dS )a  

        Args:
            file_path: The path to the CSV file.
            mode: The mode to use when loading the CSV file.
              Optional. Defaults to "single".
            **unstructured_kwargs: Keyword arguments to pass to unstructured.
        z0.6.8)min_unstructured_version)r   rR   Nr   )r   superr   )r   r   rR   rS   	__class__s       r   r   zUnstructuredCSVLoader.__init__   sA     	&wGGGGO94OO;NOOOOOr   r    c                 6    ddl m}  |dd| j        i| j        S )Nr   )partition_csvfilenamer   )unstructured.partition.csvrY   r   rS   )r   rY   s     r   _get_elementsz#UnstructuredCSVLoader._get_elements   s4    <<<<<<}QQdnQ8PQQQr   )rQ   )
rJ   rK   rL   rM   r3   r   r   r   r\   __classcell__)rW   s   @r   rP   rP      s         & +3P PP$'PKNP P P P P PRt R R R R R R R Rr   rP   )r?   ior   pathlibr   typingr   r   r   r   r	   r
   r   langchain_core.documentsr   )langchain_community.document_loaders.baser   ,langchain_community.document_loaders.helpersr   1langchain_community.document_loaders.unstructuredr   r   r   rP   r   r   r   <module>re      s@   



             G G G G G G G G G G G G G G G G G G - - - - - - @ @ @ @ @ @ N N N N N N       gD gD gD gD gD
 gD gD gDT$R $R $R $R $R2 $R $R $R $R $Rr   