
    NgB                     b    d dl mZmZmZmZ d dlmZmZ d dlm	Z	 d dl
mZ  G d de          ZdS )    )AnyIteratorListOptional)urljoinurlparse)Document)WebBaseLoaderc                        e Zd ZdZ	 	 	 	 	 ddededee         d	ed
edef fdZdee	         fdZ
	 ddedee         dee	         fdZdedee         fdZ xZS )GitbookLoaderztLoad `GitBook` data.

    1. load from either a single page, or
    2. load all (relative) paths in the navbar.
    FNmainTweb_pageload_all_pathsbase_urlcontent_selectorcontinue_on_failureshow_progressc                     |p|| _         | j                             d          r| j         dd         | _         |r
| j          d}t                                          |f||           || _        || _        dS )a  Initialize with web page and whether to load all paths.

        Args:
            web_page: The web page to load or the starting point from where
                relative paths are discovered.
            load_all_paths: If set to True, all relative paths in the navbar
                are loaded instead of only `web_page`.
            base_url: If `load_all_paths` is True, the relative paths are
                appended to this base url. Defaults to `web_page`.
            content_selector: The CSS selector for the content to load.
                Defaults to "main".
            continue_on_failure: whether to continue loading the sitemap if an error
                occurs loading a url, emitting a warning instead of raising an
                exception. Setting this to True makes the loader more robust, but also
                may result in missing data. Default: False
            show_progress: whether to show a progress bar while loading. Default: True
        /Nz/sitemap.xml)	web_pathsr   r   )r   endswithsuper__init__r   r   )selfr   r   r   r   r   r   	__class__s          h/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/document_loaders/gitbook.pyr   zGitbookLoader.__init__   s    4 !,H=!!#&& 	/ M#2#.DM 	6-555Hk 3' 	 	
 	
 	

 - 0    returnc              #      K    j         r                                 }                     |          } fd|D             }                     |          }t	          ||          D ]!\  }}                     ||          }|r|V  "dS                                  }                     | j                  }|r|V  dS dS )z(Fetch text from one single GitBook page.c                 :    g | ]}t          j        |          S  )r   r   ).0pathr   s     r   
<listcomp>z+GitbookLoader.lazy_load.<locals>.<listcomp>=   s%    LLLTGDM400LLLr   N)r   scrape
_get_paths
scrape_allzip_get_documentweb_path)r   	soup_inforelative_pathsurls
soup_infosurldocs   `      r   	lazy_loadzGitbookLoader.lazy_load8   s       	I!__Y77NLLLL^LLLD..J"%j$"7"7  	3((C88 III  I$$Y>>C 					 r   soup
custom_urlc                    |                     | j                  }|sdS |                    d                                          }|                     d          }|r|j        nd}|p| j        |d}t          ||          S )z,Fetch content from page and return Document.N
)	separatorh1 )sourcetitle)page_contentmetadata)findr   get_textstriptextr+   r	   )r   r3   r4   page_content_rawcontenttitle_if_existsr;   r=   s           r   r*   zGitbookLoader._get_documentJ   s      99T%:;; 	4"++d+;;AACC*//55(7?$$R(9DMEJJWx@@@@r   c                 @    d |                     d          D             S )z'Fetch all relative paths in the navbar.c                 @    g | ]}t          |j                  j        S r"   )r   rA   r$   )r#   locs     r   r%   z,GitbookLoader._get_paths.<locals>.<listcomp>Y   s%    HHHC""'HHHr   rG   )find_all)r   r3   s     r   r'   zGitbookLoader._get_pathsW   s"    HH4==3G3GHHHHr   )FNr   FT)N)__name__
__module____qualname____doc__strboolr   r   r   r	   r2   r   r*   r   r'   __classcell__)r   s   @r   r   r   	   s3          %"& &$)"&1 &1&1 &1 3-	&1
 &1 "&1 &1 &1 &1 &1 &1 &1P8H-    & 6:A AA%-c]A	(	A A A AIs ItCy I I I I I I I Ir   r   N)typingr   r   r   r   urllib.parser   r   langchain_core.documentsr	   -langchain_community.document_loaders.web_baser
   r   r"   r   r   <module>rT      s    0 0 0 0 0 0 0 0 0 0 0 0 * * * * * * * * - - - - - - G G G G G GPI PI PI PI PIM PI PI PI PI PIr   