
    Ng                       U d dl mZ d dlZd dlZd dlmZmZmZ d dlZ	d dl
mZ d dlmZ d dlmZmZmZ d dlmZ d dlmZmZ d d	lmZmZ d
Zded<    eej                  e	 ddddddd d                        Z G d d          ZdS )!    )annotationsN)IOAnyIterator)add_chunking_strategy)	HtmlTable)ElementElementMetadataTable)FileType)apply_metadataget_last_modified_date)is_temp_file_pathlazypropertycsvstrDETECTION_ORIGINFT)fileencodinginclude_headerinfer_table_structurefilename
str | Noner   IO[bytes] | Noner   r   boolr   kwargsr   returnlist[Element]c                  t                               | ||||          }|                                5 }t          j        ||j        |j        |          }ddd           n# 1 swxY w Y   t          j        |	                    d|d                    }t          | |j        |r|j        nd          }	t          |j        |	t                    gS )	aj  Partitions Microsoft Excel Documents in .csv format into its document elements.

    Parameters
    ----------
    filename
        A string defining the target filename path.
    file
        A file-like object using "rb" mode --> open(filename, "rb").
    encoding
        The encoding method used to decode the text input. If None, utf-8 will be used.
    include_header
        Determines whether or not header info info is included in text and medatada.text_as_html.
    infer_table_structure
        If True, any Table elements that are extracted will also have a metadata field
        named "text_as_html" where the table's text content is rendered into an html string.
        I.e., rows and cells are preserved.
        Whether True or False, the "text" field is always present in any Table element
        and is the text content of the table (no structure).
    	file_pathr   r   r   r   )headersepr   NF )indexr"   na_rep)r   last_modifiedtext_as_html)textmetadatadetection_origin)_CsvPartitioningContextloadopenpdread_csvr"   	delimiterr   from_html_textto_htmlr
   r'   htmlr   r)   r   )
r   r   r   r   r   r   ctx	dataframe
html_tabler*   s
             V/var/www/html/ai-engine/env/lib/python3.11/site-packages/unstructured/partition/csv.pypartition_csvr9      s2   < "
&
&%3 '  C 
 _tKSZS]U]^^^	_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ )nRHH J '(=GZ__4  H zL\]]]^^s   #A##A'*A'c                      e Zd ZdZ	 	 	 	 	 dddZedd            Zedd            Zedd            Z	edd            Z
ej        dd            ZddZdS )r,   zEncapsulates the partitioning-run details.

    Provides access to argument values and especially encapsulates computation of values derived
    from those values so they don't obscure the core partitioning logic.
    NFTr!   r   r   r   r   r   r   r   c                L    || _         || _        || _        || _        || _        d S N)
_file_path_file	_encoding_include_header_infer_table_structure)selfr!   r   r   r   r   s         r8   __init__z _CsvPartitioningContext.__init__Q   s/     $
!-&;###    r   c                F     | |||||                                           S )Nr    )	_validate)clsr!   r   r   r   r   s         r8   r-   z_CsvPartitioningContext.load_   s7     s)"7
 
 
 )++	rD   c                T    t          j                    }d}                                 5 }d                     fd|                    |          D                       }ddd           n# 1 swxY w Y   	 |                    |d          j        S # t           j        $ r Y dS w xY w)zThe CSV delimiter, nominally a comma ",".

        `None` for a single-column CSV file which naturally has no delimiter.
        i   
c              3  P   K   | ] }|                     j        pd           V  !dS )zutf-8N)decoder?   ).0lnrB   s     r8   	<genexpr>z4_CsvPartitioningContext.delimiter.<locals>.<genexpr>{   sG        9;		$.3G44     rD   Nz,;)
delimiters)r   Snifferr.   join	readlinessniffr1   Error)rB   sniffer	num_bytesr   datas   `    r8   r1   z!_CsvPartitioningContext.delimiterp   s     +--	YY[[ 	D99    ?C~~i?X?X    D	 	 	 	 	 	 	 	 	 	 	 	 	 	 		==$=77AAy 	 	 	44	s#   5A,,A03A08B B'&B'
int | Nonec                    | j         rdndS )z5Identifies the header row, if any, to Pandas, by idx.r   N)r@   rB   s    r8   r"   z_CsvPartitioningContext.header   s     (2qqd2rD   c                d    | j         rt          | j                   rdnt          | j                   S )zHThe best last-modified date available, None if no sources are available.N)r=   r   r   rZ   s    r8   r'   z%_CsvPartitioningContext.last_modified   s6    
 ?9&7&H&H9DD'88	
rD   Iterator[IO[bytes]]c              #     K   | j         r4t          | j         d          5 }|V  ddd           dS # 1 swxY w Y   dS | j        }|J |                    d           |V  |                    d           dS )au  Encapsulates complexity of dealing with file-path or file-like-object.

        Provides an `IO[bytes]` object as the "common-denominator" document source.

        Must be used as a context manager using a `with` statement:

            with self._file as file:
                do things with file

        File is guaranteed to be at read position 0 when called.
        rbNr   )r=   r.   r>   seek)rB   fr   s      r8   r.   z_CsvPartitioningContext.open   s       ? 		dot,,                   :D###IIaLLLJJJIIaLLLLLs   155c                @    | j         | j        t          d          | S )z!Raise on invalid argument values.Nz5either file-path or file-like object must be provided)r=   r>   
ValueErrorrZ   s    r8   rF   z!_CsvPartitioningContext._validate   s%    ?"tz'9TUUUrD   )NNNFT)
r!   r   r   r   r   r   r   r   r   r   )r!   r   r   r   r   r   r   r   r   r   r   r,   )r   r   )r   rX   )r   r\   )r   r,   )__name__
__module____qualname____doc__rC   classmethodr-   r   r1   r"   r'   
contextlibcontextmanagerr.   rF    rD   r8   r,   r,   J   s          !%!%#$&*< < < < <    [     \( 3 3 3 \3 
 
 
 \
    .     rD   r,   r<   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   )
__future__r   rh   r   typingr   r   r   pandasr/   unstructured.chunkingr   unstructured.common.html_tabler   unstructured.documents.elementsr	   r
   r   unstructured.file_utils.modelr   &unstructured.partition.common.metadatar   r   unstructured.utilsr   r   r   __annotations__CSVr9   r,   rj   rD   r8   <module>rv      s   " " " " " " "     



 $ $ $ $ $ $ $ $ $ $     7 7 7 7 7 7 4 4 4 4 4 4 K K K K K K K K K K 2 2 2 2 2 2 Y Y Y Y Y Y Y Y > > > > > > > >      2_ " "&2_ 2_ 2_ 2_ 2_  2_je e e e e e e e e erD   