
    Ng                     z    d Z ddlZddlmZmZmZ ddlmZ ddlm	Z	  ej
        e          Z G d de	          ZdS )z1Loader that uses unstructured to load HTML files.    N)AnyIteratorList)Document)
BaseLoaderc                   |    e Zd ZdZ	 	 	 	 ddee         dedededed	ed
dfdZd
ee	         fdZ
d
ee	         fdZdS )NewsURLLoadera/  Load news articles from URLs using `Unstructured`.

    Args:
        urls: URLs to load. Each is loaded into its own document.
        text_mode: If True, extract text from URL and use that for page content.
            Otherwise, extract raw HTML.
        nlp: If True, perform NLP on the extracted contents, like providing a summary
            and extracting keywords.
        continue_on_failure: If True, continue loading documents even if
            loading fails for a particular URL.
        show_progress_bar: If True, use tqdm to show a loading progress bar. Requires
            tqdm to be installed, ``pip install tqdm``.
        **newspaper_kwargs: Any additional named arguments to pass to
            newspaper.Article().

    Example:
        .. code-block:: python

            from langchain_community.document_loaders import NewsURLLoader

            loader = NewsURLLoader(
                urls=["<url-1>", "<url-2>"],
            )
            docs = loader.load()

    Newspaper reference:
        https://newspaper.readthedocs.io/en/latest/
    TFurls	text_modenlpcontinue_on_failureshow_progress_barnewspaper_kwargsreturnNc                     	 ddl }|j        | _        n# t          $ r t          d          w xY w|| _        || _        || _        || _        || _        || _	        dS )zInitialize with file path.r   NzMnewspaper package not found, please install it with `pip install newspaper3k`)
	newspaper__version___NewsURLLoader__versionImportErrorr
   r   r   r   r   r   )selfr
   r   r   r   r   r   r   s           e/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/document_loaders/news.py__init__zNewsURLLoader.__init__+   s    	&2DNN 	 	 	,  	 	"#6  0!2s    -c                     |                                  }| j        r5	 ddlm} n"# t          $ r}t          d          |d }~ww xY w ||          }t	          |          S )Nr   )tqdmzPackage tqdm must be installed if show_progress_bar=True. Please install with 'pip install tqdm' or set show_progress_bar=False.)	lazy_loadr   r   r   list)r   iterr   es       r   loadzNewsURLLoader.loadF   s    ~~! 		%%%%%%%   !/  	 4::DDzzs   $ 
A>Ac              #   0  K   	 ddl m} n"# t          $ r}t          d          |d }~ww xY w| j        D ]`}	  ||fi | j        }|                                 |                                 | j        r|                                 n@# t          $ r3}| j	        r%t                              d| d|            Y d }~|d }~ww xY wt          |dd          t          |dt          |d	d                    t          |d
g           t          |dd          t          |dd          t          |dd          d}| j        r|j        }n|j        }| j        r(t          |dg           |d<   t          |dd          |d<   t!          ||          V  bd S )Nr   )ArticlezFCannot import newspaper, please install with `pip install newspaper3k`zError fetching or processing z, exception: title urlcanonical_linkauthors	meta_langmeta_descriptionpublish_date)r"   linkr&   languagedescriptionr)   keywordssummary)page_contentmetadata)r   r!   r   r
   r   downloadparser   	Exceptionr   loggererrorgetattrr   texthtmlr   )r   r!   r   r$   articler0   contents          r   r   zNewsURLLoader.lazy_loadT   s     	))))))) 	 	 	X 	
 9 "	D "	DC!'#??)>??  """8 "KKMMM   + LL!V!V!VST!V!VWWWHHHHG !'266AQSU0V0VWW"7Ir::#G["==&w0BBGG ' D D H ~ '!,!,x F'.w
B'G'G$&-gy"&E&E#(CCCCCCCE"	D "	Ds.    
*%*AB


C'C CC)TFTF)__name__
__module____qualname____doc__r   strboolr   r   r   r   r   r        r   r	   r	      s         @ $("'3 33i3 3 	3
 "3  3  3 
3 3 3 36d8n    *D8H- *D *D *D *D *D *DrB   r	   )r>   loggingtypingr   r   r   langchain_core.documentsr   )langchain_community.document_loaders.baser   	getLoggerr;   r4   r	   rA   rB   r   <module>rH      s    7 7  & & & & & & & & & & - - - - - - @ @ @ @ @ @		8	$	$qD qD qD qD qDJ qD qD qD qD qDrB   