
    Ng                     ^    d dl mZmZmZmZ d dlmZ d dlmZ d dl	m
Z
  G d de          ZdS )    )AnyIteratorListOptional)Document)
BaseLoader)ArxivAPIWrapperc                   f    e Zd ZdZ	 d
dedee         defdZde	e
         fdZdee
         fd	ZdS )ArxivLoadera  Load a query result from `Arxiv`.
    The loader converts the original PDF format into the text.

    Setup:
        Install ``arxiv`` and ``PyMuPDF`` packages.
        ``PyMuPDF`` transforms PDF files downloaded from the arxiv.org site
        into the text format.

        .. code-block:: bash

            pip install -U arxiv pymupdf


    Instantiate:
        .. code-block:: python

            from langchain_community.document_loaders import ArxivLoader

            loader = ArxivLoader(
                query="reasoning",
                # load_max_docs=2,
                # load_all_available_meta=False
            )

    Load:
        .. code-block:: python

            docs = loader.load()
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python
            Understanding the Reasoning Ability of Language Models
            From the Perspective of Reasoning Paths Aggre
            {
                'Published': '2024-02-29',
                'Title': 'Understanding the Reasoning Ability of Language Models From the
                        Perspective of Reasoning Paths Aggregation',
                'Authors': 'Xinyi Wang, Alfonso Amayuelas, Kexun Zhang, Liangming Pan,
                        Wenhu Chen, William Yang Wang',
                'Summary': 'Pre-trained language models (LMs) are able to perform complex reasoning
                        without explicit fine-tuning...'
            }


    Lazy load:
        .. code-block:: python

            docs = []
            docs_lazy = loader.lazy_load()

            # async variant:
            # docs_lazy = await loader.alazy_load()

            for doc in docs_lazy:
                docs.append(doc)
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            Understanding the Reasoning Ability of Language Models
            From the Perspective of Reasoning Paths Aggre
            {
                'Published': '2024-02-29',
                'Title': 'Understanding the Reasoning Ability of Language Models From the
                        Perspective of Reasoning Paths Aggregation',
                'Authors': 'Xinyi Wang, Alfonso Amayuelas, Kexun Zhang, Liangming Pan,
                        Wenhu Chen, William Yang Wang',
                'Summary': 'Pre-trained language models (LMs) are able to perform complex reasoning
                        without explicit fine-tuning...'
            }

    Async load:
        .. code-block:: python

            docs = await loader.aload()
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            Understanding the Reasoning Ability of Language Models
            From the Perspective of Reasoning Paths Aggre
            {
                'Published': '2024-02-29',
                'Title': 'Understanding the Reasoning Ability of Language Models From the
                        Perspective of Reasoning Paths Aggregation',
                'Authors': 'Xinyi Wang, Alfonso Amayuelas, Kexun Zhang, Liangming Pan,
                        Wenhu Chen, William Yang Wang',
                'Summary': 'Pre-trained language models (LMs) are able to perform complex reasoning
                        without explicit fine-tuning...'
            }

    Use summaries of articles as docs:
        .. code-block:: python

            from langchain_community.document_loaders import ArxivLoader

            loader = ArxivLoader(
                query="reasoning"
            )

            docs = loader.get_summaries_as_docs()
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            Pre-trained language models (LMs) are able to perform complex reasoning
            without explicit fine-tuning
            {
                'Entry ID': 'http://arxiv.org/abs/2402.03268v2',
                'Published': datetime.date(2024, 2, 29),
                'Title': 'Understanding the Reasoning Ability of Language Models From the
                        Perspective of Reasoning Paths Aggregation',
                'Authors': 'Xinyi Wang, Alfonso Amayuelas, Kexun Zhang, Liangming Pan,
                        Wenhu Chen, William Yang Wang'
            }
    Nquerydoc_content_chars_maxkwargsc                 :    || _         t          dd|i|| _        dS )a$  Initialize with search query to find documents in the Arxiv.
        Supports all arguments of `ArxivAPIWrapper`.

        Args:
            query: free text which used to find documents in the Arxiv
            doc_content_chars_max: cut limit for the length of a document's content
        r   N )r   r	   client)selfr   r   r   s       f/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/document_loaders/arxiv.py__init__zArxivLoader.__init__   s5     
% 
 
"7
;A
 
    returnc              #   T   K   | j                             | j                  E d{V  dS )zLazy load Arvix documentsN)r   	lazy_loadr   r   s    r   r   zArxivLoader.lazy_load   s6      ;((44444444444r   c                 @    | j                             | j                  S )zBUses papers summaries as documents rather than source Arvix papers)r   get_summaries_as_docsr   r   s    r   r   z!ArxivLoader.get_summaries_as_docs   s    {00<<<r   )N)__name__
__module____qualname____doc__strr   intr   r   r   r   r   r   r   r   r   r   r   r   	   s        w wt BF
 

19#
QT
 
 
 
 58H- 5 5 5 5=tH~ = = = = = =r   r   N)typingr   r   r   r   langchain_core.documentsr   )langchain_community.document_loaders.baser   #langchain_community.utilities.arxivr	   r   r   r   r   <module>r&      s    0 0 0 0 0 0 0 0 0 0 0 0 - - - - - - @ @ @ @ @ @ ? ? ? ? ? ?P= P= P= P= P=* P= P= P= P= P=r   