
    Ng=                     F    d Z ddlmZmZmZ ddlmZ  G d de          ZdS )z+Load Documents from Docusarus Documentation    )AnyListOptional)SitemapLoaderc                   \     e Zd ZdZ	 d
dedeee                  def fdZdedefd	Z	 xZ
S )DocusaurusLoadera  Load from Docusaurus Documentation.

    It leverages the SitemapLoader to loop through the generated pages of a
    Docusaurus Documentation website and extracts the content by looking for specific
    HTML tags. By default, the parser searches for the main content of the Docusaurus
    page, which is normally the <article>. You can also define your own
    custom HTML tags by providing them as a list, for example: ["div", ".main", "a"].
    Nurlcustom_html_tagskwargsc                     |                     d          s| d}|pdg| _         t                      j        |fd|                     d          p| j        i| dS )aq  Initialize DocusaurusLoader

        Args:
            url: The base URL of the Docusaurus website.
            custom_html_tags: Optional custom html tags to extract content from pages.
            kwargs: Additional args to extend the underlying SitemapLoader, for example:
                filter_urls, blocksize, meta_function, is_local, continue_on_failure
        is_localz/sitemap.xmlzmain articleparsing_functionN)getr
   super__init___parsing_function)selfr	   r
   r   	__class__s       k/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/document_loaders/docusaurus.pyr   zDocusaurusLoader.__init__   s     zz*%% 	'&&&C 0 D^4D	
 	
#ZZ(:;;Ut?U	
 	
 	
 	
 	
 	
    contentreturnc                     |                     d                    | j                            }|D ]}||vr|                                 t	          |                                          S )z0Parses specific elements from a Docusaurus page.,)selectjoinr
   	decomposestrget_text)r   r   relevant_elementselements       r   r   z"DocusaurusLoader._parsing_function+   sk    #NN388D4I+J+JKK( 	$ 	$G///!!###7##%%&&&r   )N)__name__
__module____qualname____doc__r   r   r   r   r   r   __classcell__)r   s   @r   r   r      s          15
 

 #49-
 	
 
 
 
 
 
2' ' ' ' ' ' ' ' ' 'r   r   N)r%   typingr   r   r   ,langchain_community.document_loaders.sitemapr   r    r   r   <module>r*      so    1 1 & & & & & & & & & & F F F F F F+' +' +' +' +'} +' +' +' +' +'r   