
    Ng                     z    d dl Z d dlmZmZ d dlZd dlmZ d dlmZ  e j	        e
          Z G d de          ZdS )    N)AnyList)Document)
BaseLoaderc                   n    e Zd ZdZ	 ddedee         defdZdedefd	Zd
ede	fdZ
dee         fdZdS )DiffbotLoaderzLoad `Diffbot` json file.T	api_tokenurlscontinue_on_failurec                 0    || _         || _        || _        dS )a	  Initialize with API token, ids, and key.

        Args:
            api_token: Diffbot API token.
            urls: List of URLs to load.
            continue_on_failure: Whether to continue loading other URLs if one fails.
               Defaults to True.
        N)r	   r
   r   )selfr	   r
   r   s       h/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/document_loaders/diffbot.py__init__zDiffbotLoader.__init__   s      #	#6       diffbot_apireturnc                     d| S )Nzhttps://api.diffbot.com/v3/ )r   r   s     r   _diffbot_api_urlzDiffbotLoader._diffbot_api_url   s    :[:::r   urlc                     |                      d          }| j        |d}t          j        ||d          }|j        r|                                ni S )z'Get Diffbot file from Diffbot REST API.article)tokenr   
   )paramstimeout)r   r	   requestsgetokjson)r   r   diffbot_urlr   responses        r   _get_diffbot_datazDiffbotLoader._get_diffbot_data!   s^     ++I66^
 
 <FBGGG #++5x}}25r   c                 h   t                      }| j        D ]}	 |                     |          }d|v r|d         d         d         nd}d|i}|                    t	          ||                     [# t
          $ r4}| j        r!t                              d| d|            n|Y d	}~d	}~ww xY w|S )
z>Extract text from Diffbot on all the URLs and return Documentsobjectsr   text source)page_contentmetadatazError fetching or processing z, exception: N)	listr
   r#   appendr   	Exceptionr   loggererror)r   docsr   datar&   r*   es          r   loadzDiffbotLoader.load.   s    #vv9 
	 
	C	--c225>$5F5FtIq)&11B$c?H$JJJKKKK   + LL!V!V!VST!V!VWWWWG XWWWW
 s   AA11
B/;*B**B/N)T)__name__
__module____qualname____doc__strr   boolr   r   r   r#   r   r3   r   r   r   r   r      s        ## LP7 77$(I7DH7 7 7 7;C ;C ; ; ; ;6S 6S 6 6 6 6d8n      r   r   )loggingtypingr   r   r   langchain_core.documentsr   )langchain_community.document_loaders.baser   	getLoggerr4   r.   r   r   r   r   <module>r?      s              - - - - - - @ @ @ @ @ @		8	$	$1 1 1 1 1J 1 1 1 1 1r   