
    Ng                     J    d dl mZmZ d dlmZ d dlmZ  G d de          ZdS )    )AnyList)Document)WebBaseLoaderc                   b    e Zd ZdZdee         fdZdedee         fdZdedee         fdZ	dS )	HNLoaderz_Load `Hacker News` data.

    It loads data from either main page results or the comments page.returnc                     |                                  }d| j        v r|                     |          S |                     |          S )a  Get important HN webpage information.

        HN webpage components are:
            - title
            - content
            - source url,
            - time of post
            - author of the post
            - number of comments
            - rank of the post
        item)scrapeweb_pathload_commentsload_results)self	soup_infos     c/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/document_loaders/hn.pyloadzHNLoader.load   sF     KKMM	T]""%%i000$$Y///    r   c                      |                     d          }|                    d                              d           fd|D             S )zLoad comments from a HN post.ztr[class='athing comtr']ztr[id='pagespace']titlec                 p    g | ]2}t          |j                                        j        d           3S ))sourcer   )page_contentmetadata)r   textstripr   ).0commentr   r   s     r   
<listcomp>z*HNLoader.load_comments.<locals>.<listcomp>#   sX     
 
 

 	 $\//11$(MEBB  
 
 
r   )select
select_oneget)r   r   commentsr   s   `  @r   r   zHNLoader.load_comments   sl    ##$>??$$%9::>>wGG
 
 
 
 

 $
 
 
 	
r   soupc           	         |                     d          }g }|D ]}|                    d          j        }|                    dddi                              d                              d          }|                    dddi          j                                        }| j        |||d}|                    t          ||||	                     |S )
zLoad items from an HN page.ztr[class='athing']zspan[class='rank']spanclass	titlelineahref)r   r   linkranking)r   r+   r,   r   )	r    r!   r   findr"   r   r   appendr   )	r   r$   items	documentslineItemr,   r+   r   r   s	            r   r   zHNLoader.load_results+   s    011	 	 	H))*>??DG==';)?@@EEcJJNNvVVDMM&7K*@AAFLLNNE-"	 H !&T7X     
 r   N)
__name__
__module____qualname____doc__r   r   r   r   r   r    r   r   r   r      s        I I0d8n 0 0 0 0$

s 

tH~ 

 

 

 

 h      r   r   N)typingr   r   langchain_core.documentsr   -langchain_community.document_loaders.web_baser   r   r6   r   r   <module>r:      su            - - - - - - G G G G G G6 6 6 6 6} 6 6 6 6 6r   