
    Ng	                         d Z ddlZddlmZmZmZmZ ddlmZ ddl	m
Z
 ddlmZ  ej        e          Z G d de          ZdS )	zScrapfly Web Reader.    N)IteratorListLiteralOptional)
BaseLoader)Document)get_from_envc                       e Zd ZdZddddddee         dee         ded	         d
ee         de	ddfdZ
dee         fdZdS )ScrapflyLoaderzTurn a url to llm accessible markdown with `Scrapfly.io`.

    For further details, visit: https://scrapfly.io/docs/sdk/python
    NmarkdownT)api_keyscrape_formatscrape_configcontinue_on_failureurlsr   r   )r   textr   r   returnc                    	 ddl m} n# t          $ r t          d          w xY w|st          d          |pt	          dd          } ||          | _         || _        || _        || _        || _        dS )	a  Initialize client.

        Args:
            urls: List of urls to scrape.
            api_key: The Scrapfly API key. If not specified must have env var
                SCRAPFLY_API_KEY set.
            scrape_format: Scrape result format, one or "markdown" or "text".
            scrape_config: Dictionary of ScrapFly scrape config object.
            continue_on_failure: Whether to continue if scraping a url fails.
        r   )ScrapflyClientzC`scrapfly` package not found, please run `pip install scrapfly-sdk`zURLs must be provided.r   SCRAPFLY_API_KEY)keyN)	scrapflyr   ImportError
ValueErrorr	   r   r   r   r   )selfr   r   r   r   r   r   s          i/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/document_loaders/scrapfly.py__init__zScrapflyLoader.__init__   s    &	/////// 	 	 	U  	  	75666H\)5GHH&7333	**#6   s   	 #c              #   ^  K   ddl m} | j        | j        ni }| j        D ]}	 | j                              ||fd| j        i|          }t          |j        d         d|i          V  L# t          $ r4}| j	        r!t                              d| d|            n|Y d }~d }~ww xY wd S )	Nr   )ScrapeConfigformatcontenturl)page_contentmetadatazError fetching data from z, exception: )r   r   r   r   scraper   r   scrape_result	Exceptionr   loggererror)r   r   r   r"   responsees         r   	lazy_loadzScrapflyLoader.lazy_load5   s%     )))))).2.@.L**RT9 	 	C=// LQQT-?Q=QQ  !)!7	!B#S\          + LL!RS!R!Rq!R!RSSSSG TSSSS	 	s   AA,,
B*6*B%%B*)__name__
__module____qualname____doc__r   strr   r   dictboolr   r   r   r,        r   r   r      s          "&5?(,$( 7  7  73i 7 #	 7
 12 7  ~ 7 " 7 
 7  7  7  7D8H-      r5   r   )r0   loggingtypingr   r   r   r   langchain_core.document_loadersr   langchain_core.documentsr   langchain_core.utilsr	   	getLogger__file__r(   r   r4   r5   r   <module>r=      s       4 4 4 4 4 4 4 4 4 4 4 4 6 6 6 6 6 6 - - - - - - - - - - - -		8	$	$9 9 9 9 9Z 9 9 9 9 9r5   