
    Ng                        d dl mZ d dlmZ d dlmZmZmZmZm	Z	m
Z
mZmZ d dlmZ d dlmZmZ d dlmZmZ d dlmZ erd dlmZ eeef         Ze	d	         Z G d
 de          ZdS )    )annotations)Path)TYPE_CHECKINGAnyIteratorListLiteralOptionalSequenceUnion)Document)BaseBlobParser
BaseLoader)
BlobLoaderFileSystemBlobLoader)
get_parser)TextSplitterdefaultc                  n    e Zd ZdZd'dZd(d
Z	 d)d*dZedddddddd+d#            Ze	d,d&            Z
dS )-GenericLoadera  Generic Document Loader.

    A generic document loader that allows combining an arbitrary blob loader with
    a blob parser.

    Examples:

        Parse a specific PDF file:

        .. code-block:: python

            from langchain_community.document_loaders import GenericLoader
            from langchain_community.document_loaders.parsers.pdf import PyPDFParser

            # Recursively load all text files in a directory.
            loader = GenericLoader.from_filesystem(
                "my_lovely_pdf.pdf",
                parser=PyPDFParser()
            )

       .. code-block:: python

            from langchain_community.document_loaders import GenericLoader
            from langchain_community.document_loaders.blob_loaders import FileSystemBlobLoader


            loader = GenericLoader.from_filesystem(
                path="path/to/directory",
                glob="**/[!.]*",
                suffixes=[".pdf"],
                show_progress=True,
            )

            docs = loader.lazy_load()
            next(docs)

    Example instantiations to change which files are loaded:

    .. code-block:: python

        # Recursively load all text files in a directory.
        loader = GenericLoader.from_filesystem("/path/to/dir", glob="**/*.txt")

        # Recursively load all non-hidden files in a directory.
        loader = GenericLoader.from_filesystem("/path/to/dir", glob="**/[!.]*")

        # Load all files in a directory without recursion.
        loader = GenericLoader.from_filesystem("/path/to/dir", glob="*")

    Example instantiations to change which parser is used:

    .. code-block:: python

        from langchain_community.document_loaders.parsers.pdf import PyPDFParser

        # Recursively load all text files in a directory.
        loader = GenericLoader.from_filesystem(
            "/path/to/dir",
            glob="**/*.pdf",
            parser=PyPDFParser()
        )

    blob_loaderr   blob_parserr   returnNonec                "    || _         || _        dS )zA generic document loader.

        Args:
            blob_loader: A blob loader which knows how to yield blobs
            blob_parser: A blob parser which knows how to parse blobs into documents
        N)r   r   )selfr   r   s      h/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/document_loaders/generic.py__init__zGenericLoader.__init__a   s     '&    Iterator[Document]c              #     K   | j                                         D ]"}| j                            |          E d{V  #dS )z>Load documents lazily. Use this when working at a large scale.N)r   yield_blobsr   
lazy_parse)r   blobs     r   	lazy_loadzGenericLoader.lazy_loado   s\       $0022 	9 	9D'2248888888888	9 	9r   Ntext_splitterOptional[TextSplitter]List[Document]c                     t          d          )z1Load all documents and split them into sentences.zLoading and splitting is not yet implemented for generic loaders. When they will be implemented they will be added via the initializer. This method should not be used going forward.NotImplementedError)r   r&   s     r   load_and_splitzGenericLoader.load_and_splitv   s     "<
 
 	
r   z**/[!.]* Fr   )globexcludesuffixesshow_progressparserparser_kwargspath	_PathLiker.   strr/   Sequence[str]r0   Optional[Sequence[str]]r1   boolr2   Union[DEFAULT, BaseBlobParser]r3   Optional[dict]c                   t          |||||          }t          |t                    rF|dk    r0	  | j        di |pi }	n1# t          $ r t          |          }	Y nw xY wt          |          }	n|}	 | ||	          S )a  Create a generic document loader using a filesystem blob loader.

        Args:
            path: The path to the directory to load documents from OR the path to a
                  single file to load. If this is a file, glob, exclude, suffixes
                    will be ignored.
            glob: The glob pattern to use to find documents.
            suffixes: The suffixes to use to filter documents. If None, all files
                      matching the glob will be loaded.
            exclude: A list of patterns to exclude from the loader.
            show_progress: Whether to show a progress bar or not (requires tqdm).
                           Proxies to the file system loader.
            parser: A blob parser which knows how to parse blobs into documents,
                    will instantiate a default parser if not provided.
                    The default can be overridden by either passing a parser or
                    setting the class attribute `blob_parser` (the latter
                    should be used with inheritance).
            parser_kwargs: Keyword arguments to pass to the parser.

        Returns:
            A generic document loader.
        )r.   r/   r0   r1   r   r-   )r   
isinstancer6   r   r+   )
clsr4   r.   r/   r0   r1   r2   r3   r   r   s
             r   from_filesystemzGenericLoader.from_filesystem   s    D +'
 
 
 fc"" 	!""5"0#."I"IM4GR"I"IKK* 5 5 5",V"4"4KKK5 )00 Ks;,,,s   A AAkwargsr   c                     t                      )zBOverride this method to associate a default parser with the class.r*   )r@   s    r   r   zGenericLoader.get_parser   s     "###r   )r   r   r   r   r   r   )r   r    )N)r&   r'   r   r(   )r4   r5   r.   r6   r/   r7   r0   r8   r1   r9   r2   r:   r3   r;   r   r   )r@   r   r   r   )__name__
__module____qualname____doc__r   r%   r,   classmethodr?   staticmethodr   r-   r   r   r   r       s        > >@' ' ' '9 9 9 9 7;
 
 
 
 
 
 !#,0#1:(,4- 4- 4- 4- 4- [4-l $ $ $ \$ $ $r   r   N)
__future__r   pathlibr   typingr   r   r   r   r	   r
   r   r   langchain_core.documentsr   )langchain_community.document_loaders.baser   r   1langchain_community.document_loaders.blob_loadersr   r   5langchain_community.document_loaders.parsers.registryr   langchain_text_splittersr   r6   r5   DEFAULTr   r-   r   r   <module>rQ      sf   " " " " " "      	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 . - - - - - P P P P P P P P        M L L L L L 6555555#t)	
)
Z$ Z$ Z$ Z$ Z$J Z$ Z$ Z$ Z$ Z$r   