
    Ng                         d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	 d dl
mZ d dlmZ d dlmZ  e j        e          Z G d de          ZdS )	    N)Path)AnyIteratorListMappingOptional)Document)
BaseLoader)BibtexparserWrapperc                       e Zd ZdZdddddddedee         d	ee         d
ee         dedefdZ	de
eef         dee         fdZdee         fdZdS )BibtexLoadera  Load a `bibtex` file.

    Each document represents one entry from the bibtex file.

    If a PDF file is present in the `file` bibtex field, the original PDF
    is loaded into the document text. If no such file entry is present,
    the `abstract` field is used instead.
    Ni  Fz
[^:]+\.pdf)parsermax_docsmax_content_charsload_extra_metadatafile_pattern	file_pathr   r   r   r   r   c                    || _         |pt                      | _        || _        || _        || _        t          j        |          | _        dS )a  Initialize the BibtexLoader.

        Args:
            file_path: Path to the bibtex file.
            parser: The parser to use. If None, a default parser is used.
            max_docs: Max number of associated documents to load. Use -1 means
                           no limit.
            max_content_chars: Maximum number of characters to load from the PDF.
            load_extra_metadata: Whether to load extra metadata from the PDF.
            file_pattern: Regex pattern to match the file name in the bibtex.
        N)	r   r   r   r   r   r   recompile
file_regex)selfr   r   r   r   r   r   s          g/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/document_loaders/bibtex.py__init__zBibtexLoader.__init__   sJ    * #5 3 5 5 !2#6 *\22    entryreturnc                    dd l }t          | j                  j        }| j                            |                    dd                    }|sd S g }|D ]}	 |                    ||z            5 }|                    d |D                        d d d            n# 1 swxY w Y   S# t          $ r$}t                              |           Y d }~|d }~ww xY wd                    |          p|                    dd          }	| j        r|	d | j                 }	| j                            || j                  }
t#          |	|
          S )	Nr   file c              3   >   K   | ]}|                                 V  d S )N)get_text).0pages     r   	<genexpr>z+BibtexLoader._load_entry.<locals>.<genexpr>@   s*       ? ?T ? ? ? ? ? ?r   
abstract)
load_extra)page_contentmetadata)fitzr   r   parentr   findallgetopenextendFileNotFoundErrorloggerdebugjoinr   r   get_metadatar   r	   )r   r   r+   
parent_dir
file_namestexts	file_namefecontentr*   s              r   _load_entryzBibtexLoader._load_entry4   s   $.))0
_,,UYYvr-B-BCC
 	4# 	  	 I YYzI566 @!LL ? ?Q ? ? ????@ @ @ @ @ @ @ @ @ @ @ @ @ @ @$      Q ))E""?eii
B&?&?! 	86 667G;++Ed>V+WW 
 
 
 	
s<   B'/ BB'B	B'"B	#B''
C1CCc              #      K   	 ddl }n# t          $ r t          d          w xY w| j                            | j                  }| j        r|d| j                 }|D ]}|                     |          }|r|V  dS )a  Load bibtex file using bibtexparser and get the article texts plus the
        article metadata.
        See https://bibtexparser.readthedocs.io/en/master/

        Returns:
            a list of documents with the document.page_content in text format
        r   NzGPyMuPDF package not found, please install it with `pip install pymupdf`)r+   ImportErrorr   load_bibtex_entriesr   r   r=   )r   r+   entriesr   docs        r   	lazy_loadzBibtexLoader.lazy_loadL   s      	KKKK 	 	 	(  	 +11$.AA= 	/oo.G 	 	E""5))C 				 	s   	 #)__name__
__module____qualname____doc__strr   r   intboolr   r   r   r	   r=   r   rC    r   r   r   r      s          15"&+0$))3 3 33 ,-	3
 3-3 $C=3 "3 3 3 3 38
c!2 
x7I 
 
 
 
08H-      r   r   )loggingr   pathlibr   typingr   r   r   r   r   langchain_core.documentsr	   )langchain_community.document_loaders.baser
   $langchain_community.utilities.bibtexr   	getLoggerrD   r2   r   rK   r   r   <module>rS      s     				       9 9 9 9 9 9 9 9 9 9 9 9 9 9 - - - - - - @ @ @ @ @ @ D D D D D D		8	$	$T T T T T: T T T T Tr   