
    Ng                         d dl Z d dlmZmZmZmZmZ d dlmZ d dl	m
Z
 d dlmZ  e j        e          Z G d de
          ZdS )    N)AnyIteratorListOptionalSequence)Document)
BaseLoader)NewsURLLoaderc                       e Zd ZdZ	 	 	 	 ddeee                  dee         deded	ed
dfdZ	d
e
e         fdZed
ee         fd            Zd
ee         fdZdS )RSSFeedLoaderaQ  Load news articles from `RSS` feeds using `Unstructured`.

    Args:
        urls: URLs for RSS feeds to load. Each articles in the feed is loaded into its own document.
        opml: OPML file to load feed urls from. Only one of urls or opml should be provided.  The value
        can be a URL string, or OPML markup contents as byte or string.
        continue_on_failure: If True, continue loading documents even if
            loading fails for a particular URL.
        show_progress_bar: If True, use tqdm to show a loading progress bar. Requires
            tqdm to be installed, ``pip install tqdm``.
        **newsloader_kwargs: Any additional named arguments to pass to
            NewsURLLoader.

    Example:
        .. code-block:: python

            from langchain_community.document_loaders import RSSFeedLoader

            loader = RSSFeedLoader(
                urls=["<url-1>", "<url-2>"],
            )
            docs = loader.load()

    The loader uses feedparser to parse RSS feeds.  The feedparser library is not installed by default so you should
    install it if using this loader:
    https://pythonhosted.org/feedparser/

    If you use OPML, you should also install listparser:
    https://pythonhosted.org/listparser/

    Finally, newspaper is used to process each article:
    https://newspaper.readthedocs.io/en/latest/
    NTFurlsopmlcontinue_on_failureshow_progress_barnewsloader_kwargsreturnc                 ~    |du |du k    rt          d          || _        || _        || _        || _        || _        dS )zInitialize with urls or OPML.Nz;Provide either the urls or the opml argument, but not both.)
ValueErrorr   r   r   r   r   )selfr   r   r   r   r   s         d/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/document_loaders/rss.py__init__zRSSFeedLoader.__init__/   sa     DLDL
 
 M   		#6 !2!2    c                     |                                  }| j        r5	 ddlm} n"# t          $ r}t          d          |d }~ww xY w ||          }t	          |          S )Nr   )tqdmzPackage tqdm must be installed if show_progress_bar=True. Please install with 'pip install tqdm' or set show_progress_bar=False.)	lazy_loadr   r   ImportErrorlist)r   iterr   es       r   loadzRSSFeedLoader.loadD   s    ~~! 		%%%%%%%   !/  	 4::DDzzs   $ 
A>Ac                     | j         r| j         S 	 dd l}n"# t          $ r}t          d          |d }~ww xY w|                    | j                  }d |j        D             S )Nr   zPackage listparser must be installed if the opml arg is used. Please install with 'pip install listparser' or use the urls arg instead.c                     g | ]	}|j         
S  )url).0feeds     r   
<listcomp>z+RSSFeedLoader._get_urls.<locals>.<listcomp>_   s    ///T///r   )r   
listparserr   parser   feeds)r   r(   r   rsss       r   	_get_urlszRSSFeedLoader._get_urlsR   s    9 	9	 	 	 	$  		 ty))//SY////s    
4/4c              #     K   	 dd l }n# t          $ r t          d          w xY w| j        D ]}	 |                    |          }t	          |dd          rt          d| d|j                   n@# t          $ r3}| j        r%t          
                    d| d|            Y d }~{|d }~ww xY w	 |j        D ]C}t          d
d|j        gi| j        }|                                d         }||j        d<   |V  D# t          $ r9}| j        r+t          
                    d	|j         d|            Y d }~|d }~ww xY wd S )Nr   zMfeedparser package not found, please install it with `pip install feedparser`bozoFzError fetching z, exception: r   r&   zError processing entry r#   )
feedparserr   r,   r)   getattrr   bozo_exception	Exceptionr   loggererrorentriesr
   linkr   r    metadata)r   r/   r$   r&   r   entryloaderarticles           r   r   zRSSFeedLoader.lazy_loada   s     	 	 	 	+  	 > 	 	C!'',,4// $Q#QQD<OQQ      + LL!H3!H!HQ!H!HIIIHHHHG!\ " "E*  #j\0 F %kkmmA.G/2G$V,!MMMM"    + LL!W5:!W!WTU!W!WXXXHHHHG-	 	sE   	 #A A22
B/<'B*(B**B/3AC??
E	,D=;D==E)NNTF)__name__
__module____qualname____doc__r   r   strboolr   r   r   r   r    propertyr,   r   r   r#   r   r   r   r      s           H )-"$("'3 3x}%3 sm3 "	3
  3 !3 
3 3 3 3*d8n     08C= 0 0 0 X0$8H- $ $ $ $ $ $r   r   )loggingtypingr   r   r   r   r   langchain_core.documentsr   )langchain_community.document_loaders.baser	   )langchain_community.document_loaders.newsr
   	getLoggerr;   r3   r   r#   r   r   <module>rH      s     : : : : : : : : : : : : : : - - - - - - @ @ @ @ @ @ C C C C C C		8	$	$y y y y yJ y y y y yr   