
    Ng)                     Z    d dl mZmZmZ d dlmZ d dlmZ d dlm	Z	  G d de          Z
dS )    )IteratorLiteralOptional)
BaseLoader)Document)get_from_envc            
       p    e Zd ZdZdddddedee         ded         d	ee         fd
Zde	e
         fdZdS )SpiderLoaderzLoad web pages as Documents using Spider AI.

    Must have the Python package `spider-client` installed and a Spider API key.
    See https://spider.cloud for more.
    Nscrape)api_keymodeparamsurlr   r   r   crawlr   c                    |ddd}	 ddl m} n# t          $ r t          d          w xY w|dvrt          d	| d
          |pt	          dd          } ||          | _         || _        || _        || _        dS )a  Initialize with API key and URL.

        Args:
            url: The URL to be processed.
            api_key: The Spider API key. If not specified, will be read from env
            var `SPIDER_API_KEY`.
            mode: The mode to run the loader in. Default is "scrape".
                 Options include "scrape" (single page) and "crawl" (with deeper
                 crawling following subpages).
            params: Additional parameters for the Spider API.
        NmarkdownT)return_formatmetadatar   )SpiderzB`spider` package not found, please run `pip install spider-client`r   zUnrecognized mode 'z%'. Expected one of 'scrape', 'crawl'.r   SPIDER_API_KEY)r   )spiderr   ImportError
ValueErrorr   r   r   r   )selfr   r   r   r   r   s         g/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/document_loaders/spider.py__init__zSpiderLoader.__init__   s    & >!+  F
	%%%%%%% 	 	 	T  	 ***QdQQQ  
 F\)5EFFfW---	s    *returnc              #     K   g }| j         dk    r>| j                            | j        | j                  }|r|                    |           nH| j         dk    r=| j                            | j        | j                  }|r|                    |           |D ]}| j         dk    rM|d                             dd          }|d                             di           }|t          ||	          V  | j         dk    rA|                    dd          }|                    di           }|t          ||	          V  dS )
z+Load documents based on the specified mode.r   )r   r   r   content r   N)page_contentr   )
r   r   
scrape_urlr   r   append	crawl_urlextendgetr   )r   spider_docsresponsedocr"   r   s         r   	lazy_loadzSpiderLoader.lazy_load:   sr     9  {--dht{-KKH -""8,,,Y'!!{,,TXdk,JJH -""8,,, 	 	CyH$$"1vzz)R88 q6::j"55+"xPPPPPPyG##"wwy"55 77:r22+"%1!)     %	 	    )__name__
__module____qualname____doc__strr   r   dictr   r   r   r+    r,   r   r
   r
      s          "&+3!%) ) )) #	)
 '() ) ) ) )V$8H- $ $ $ $ $ $r,   r
   N)typingr   r   r   langchain_core.document_loadersr   langchain_core.documentsr   langchain_core.utilsr   r
   r3   r,   r   <module>r8      s    . . . . . . . . . . 6 6 6 6 6 6 - - - - - - - - - - - -V V V V V: V V V V Vr,   