
    Ng                         d Z ddlZddlmZmZmZmZmZ erddlm	Z	m
Z
 ddlmZ ddlmZ  ej        e          Z G d de          ZdS )	zRLoader that uses Selenium to load a page, then uses unstructured to load the html.    N)TYPE_CHECKINGListLiteralOptionalUnionChromeFirefox)Document)
BaseLoaderc                       e Zd ZdZdddddg fdee         deded         d	ee         d
ee         dedee         fdZ	de
d         fdZdede
d         defdZdee         fdZdS )SeleniumURLLoadera  Load `HTML` pages with `Selenium` and parse with `Unstructured`.

    This is useful for loading pages that require javascript to render.

    Attributes:
        urls (List[str]): List of URLs to load.
        continue_on_failure (bool): If True, continue loading other URLs on failure.
        browser (str): The browser to use, either 'chrome' or 'firefox'.
        binary_location (Optional[str]): The location of the browser binary.
        executable_path (Optional[str]): The path to the browser executable.
        headless (bool): If True, the browser will run in headless mode.
        arguments [List[str]]: List of arguments to pass to the browser.
    TchromeNurlscontinue_on_failurebrowser)r   firefoxbinary_locationexecutable_pathheadless	argumentsc                     	 ddl }n# t          $ r t          d          w xY w	 ddl}	n# t          $ r t          d          w xY w|| _        || _        || _        || _        || _        || _        || _	        dS )z4Load a list of URLs using Selenium and unstructured.r   NzIselenium package not found, please install it with `pip install selenium`zQunstructured package not found, please install it with `pip install unstructured`)
seleniumImportErrorunstructuredr   r   r   r   r   r   r   )
selfr   r   r   r   r   r   r   r   r   s
             m/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/document_loaders/url_selenium.py__init__zSeleniumURLLoader.__init__   s    	OOOO 	 	 	)  		 	 	 	-  	 	#6 .. "s    !* Areturnr   c                 "   | j                                         dk    rddlm} ddlm} ddlm}  |            }| j        D ]}|	                    |           | j
        r*|	                    d           |	                    d           | j        | j        |_        | j         ||	          S  || || j        
                    S | j                                         dk    rddlm} ddlm} ddlm}  |            }| j        D ]}|	                    |           | j
        r|	                    d           | j        | j        |_        | j         ||	          S  || || j        
                    S t!          d          )a  Create and return a WebDriver instance based on the specified browser.

        Raises:
            ValueError: If an invalid browser is specified.

        Returns:
            Union[Chrome, Firefox]: A WebDriver instance for the specified browser.
        r   r   )r	   )Options)Servicez
--headlessz--no-sandboxN)options)r   )r#   servicer   )r
   z5Invalid browser specified. Use 'chrome' or 'firefox'.)r   lowerselenium.webdriverr	   !selenium.webdriver.chrome.optionsr!   !selenium.webdriver.chrome.servicer"   r   add_argumentr   r   r   r
   "selenium.webdriver.firefox.options"selenium.webdriver.firefox.service
ValueError)	r   r	   ChromeOptionsr"   chrome_optionsargr
   FirefoxOptionsfirefox_optionss	            r   _get_driverzSeleniumURLLoader._get_driverB   s+    <8++111111RRRRRRAAAAAA*]__N~ 1 1++C0000} <++L999++N;;;#/151E.#+vn55556&0DEEE    \!!Y..222222TTTTTTBBBBBB,n..O~ 2 2,,S1111} ;,,\:::#/262F/#+w77777'0DEEE   
 TUUU    urldriverc                 Z   ddl m} ddlm} 	 |dddd}|j        x}r||d<   	 |                    |j        d	          x}r|                    d
          pd|d<   n# |$ r Y nw xY w	 |                    |j        d          x}r|                    d          pd|d<   n# |$ r Y nw xY w|S )Nr   )NoSuchElementException)ByzNo title found.zNo description found.zNo language found.)sourcetitledescriptionlanguager:   z//meta[@name="description"]contentr;   htmllangr<   )	selenium.common.exceptionsr7   selenium.webdriver.common.byr8   r:   find_elementXPATHget_attributeTAG_NAME)	r   r4   r5   r7   r8   metadatar:   r;   html_tags	            r   _build_metadataz!SeleniumURLLoader._build_metadataw   s;   EEEEEE333333A&2,	
 
 L 5 	& %HW	$117  {   --i88S<S ' & 	 	 	D		!..r{FCCCx **622J6J $ & 	 	 	D	s#   7A A$#A$(7B   B('B(c                    ddl m} t                      }|                                 }| j        D ]}	 |                    |           |j        } ||          }d                    d |D                       }|                     ||          }|	                    t          ||                     # t          $ r4}	| j        r!t                              d| d|	            n|	Y d	}	~	d	}	~	ww xY w|                                 |S )
zLoad the specified URLs using Selenium and create Document instances.

        Returns:
            List[Document]: A list of Document instances with loaded content.
        r   )partition_html)textz

c                 ,    g | ]}t          |          S  )str).0els     r   
<listcomp>z*SeleniumURLLoader.load.<locals>.<listcomp>   s    #?#?#?CGG#?#?#?r3   )page_contentrF   zError fetching or processing z, exception: N)unstructured.partition.htmlrJ   listr2   r   getpage_sourcejoinrH   appendr   	Exceptionr   loggererrorquit)
r   rJ   docsr5   r4   rR   elementsrK   rF   es
             r   loadzSeleniumURLLoader.load   sB    	?>>>>>#vv!!##9 	 	C

3%1)>|<<<{{#?#?h#?#?#?@@//V<<H$JJJKKKK   + LL!V!V!VST!V!VWWWWG XWWWW 	s   BB55
C3?*C..C3)__name__
__module____qualname____doc__r   rN   boolr   r   r   r   r2   dictrH   r   r`   rM   r3   r   r   r      s        " %)08)-)-!!# !#3i!# "!# ,-	!#
 "#!# "#!# !# 9!# !# !# !#F3VU#67 3V 3V 3V 3Vj3 6I0J t    >d8n      r3   r   )rd   loggingtypingr   r   r   r   r   r&   r	   r
   langchain_core.documentsr   )langchain_community.document_loaders.baser   	getLoggerra   rZ   r   rM   r3   r   <module>rl      s    X X  @ @ @ @ @ @ @ @ @ @ @ @ @ @ 322222222 - - - - - - @ @ @ @ @ @		8	$	$` ` ` ` `
 ` ` ` ` `r3   