
    Ng                         d Z ddlZddlZddlmZ ddlmZ ddlmZ ddl	m
Z
mZmZmZmZmZ ddlmZ ddlmZ  ej        e          Z G d	 d
e          ZdS )z`Load documents from Evernote.

https://gist.github.com/foxmask/7b29c43a161e001ff04afdb2f181e31c
    N)	b64decode)Path)strptime)AnyDictIteratorListOptionalUnion)Document)
BaseLoaderc                      e Zd ZdZddeeef         defdZde	e
         fdZde	e
         fdZed	edefd
            Zededefd            Zeddedee         defd            Zedede	eeef                  fd            ZdS )EverNoteLoadera  Load from `EverNote`.

    Loads an EverNote notebook export file e.g. my_notebook.enex into Documents.
    Instructions on producing this file can be found at
    https://help.evernote.com/hc/en-us/articles/209005557-Export-notes-and-notebooks-as-ENEX-or-HTML

    Currently only the plain text in the note is extracted and stored as the contents
    of the Document, any non content metadata (e.g. 'author', 'created', 'updated' etc.
    but not 'content-raw' or 'resource') tags on the note will be extracted and stored
    as metadata on the Document.

    Args:
        file_path (str): The path to the notebook export with a .enex extension
        load_single_document (bool): Whether or not to concatenate the content of all
            notes into a single long Document.
        If this is set to True (default) then the only metadata on the document will be
            the 'source' which contains the file name of the export.
    T	file_pathload_single_documentc                 <    t          |          | _        || _        dS )zInitialize with file path.N)strr   r   )selfr   r   s      i/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/document_loaders/evernote.py__init__zEverNoteLoader.__init__(   s    Y$8!!!    returnc              #      K   |                      | j                  D ]W}|                    d          @t          |d         i d |                                D             d| j        i          V  Xd S )Ncontentc                 "    i | ]\  }}|d v	||S ))r   content-rawresource ).0keyvalues      r   
<dictcomp>z-EverNoteLoader._lazy_load.<locals>.<dictcomp>3   s4        *U"*PPP  PPPr   sourcepage_contentmetadata)_parse_note_xmlr   getr   items)r   notes     r   
_lazy_loadzEverNoteLoader._lazy_load-   s      ((88 	 	Dxx	"".!%i .2jjll   $T^4
 
 
 
 
 
	 	r   c              #      K   | j         s|                                 E d{V  dS t          d                    d |                                 D                       d| j        i          V  dS )z)Load documents from EverNote export file.N c                     g | ]	}|j         
S r   )r%   )r   documents     r   
<listcomp>z,EverNoteLoader.lazy_load.<locals>.<listcomp>C   s    MMMxX*MMMr   r#   r$   )r   r+   r   joinr   )r   s    r   	lazy_loadzEverNoteLoader.lazy_load<   s      ( 	(((((((((((WWMM4??;L;LMMM  #DN3	       r   r   c                     	 dd l }|                     |                                           S # t          $ r}t          d          |d }~ww xY w)Nr   zCould not import `html2text`. Although it is not a required package to use Langchain, using the EverNote loader requires `html2text`. Please install `html2text` via `pip install html2text` and try again.)	html2textstripImportError)r   r4   es      r   _parse_contentzEverNoteLoader._parse_contentH   so    		&&w//55777 	 	 	X  		s   *- 
AAAr   c                     i }| D ]v}|j         dk    rZ|j        rt          |j                  nd||j         <   t          j        ||j                                                            |d<   g|j        ||j         <   w|S )Ndatar   hash)tagtextr   hashlibmd5	hexdigest)r   rsc_dictelems      r   _parse_resourcezEverNoteLoader._parse_resourceU   s    #% 	/ 	/Dx6!!=AY%OYty%9%9%9C"#*;x/A#B#B#L#L#N#N  %)Y""r   Nr*   prefixc                    i }g }dt           dt           ffd| D ]}|j        dk    r2t                              |j                  ||j        <   |j        |d<   ?|j        dk    r.|                    t                              |                     x|j        dk    s|j        dk    rt          |j        d	          ||j        <   |j        d
k    r6t                              ||j                  }|	                    |           |j        ||j        <   t          |          dk    r||d<   fd|                                D             S )Nelement_tagr   c                     | S  d|  S )N.r   )rF   rD   s    r   
add_prefixz.EverNoteLoader._parse_note.<locals>.add_prefixg   s"    ~"",,{,,,r   r   r   r   createdupdatedz%Y%m%dT%H%M%SZznote-attributesr   c                 .    i | ]\  }} |          |S r   r   )r   r    r!   rI   s      r   r"   z.EverNoteLoader._parse_note.<locals>.<dictcomp>   s'    KKK:3

3KKKr   )r   r<   r   r8   r=   appendrC   r   _parse_noteupdatelenr)   )r*   rD   	note_dict	resourcesrB   additional_attributesrI   s    `    @r   rN   zEverNoteLoader._parse_noteb   s   $&			-C 	-C 	- 	- 	- 	- 	- 	-
  	0 	0Dx9$$&4&C&CDI&N&N	$(#+/9	-((Z''  !?!?!E!EFFFFY&&$(i*?*?&.ty:J&K&K	$(##...(6(B(B$() )%   !67777&*i	$(##y>>A$-Ij!KKKK9J9JKKKKr   xml_filec              #     K   	 ddl m} n.# t          $ r!}t                              d           |d}~ww xY w|                    | dddd          }|D ],\  }}|j        d	k    rt                              |          V  -dS )
zParse Evernote xml.r   )etreezCould not import `lxml`. Although it is not a required package to use Langchain, using the EverNote loader requires `lxml`. Please install `lxml` via `pip install lxml` and try again.Nzutf-8FT)encodingstrip_cdata	huge_treerecoverr*   )	lxmlrV   r6   loggererror	iterparser<   r   rN   )rT   rV   r7   contextactionrB   s         r   r'   zEverNoteLoader._parse_note_xml   s      	""""""" 	 	 	LL?  
 G	 //wETSW " 
 
 $ 	7 	7LFDx6!!$0066666	7 	7s    
616)T)N)__name__
__module____qualname____doc__r   r   r   boolr   r   r   r+   r2   staticmethodr8   listdictrC   r	   r
   rN   r   r   r'   r   r   r   r   r      sk        &9 9%T	"2 9$ 9 9 9 9
HX.    
8H- 
 
 
 
 
 
 
 
 
 \
 
$ 
4 
 
 
 \
 L L$ L L L L L \L> 7# 7(4S>*B 7 7 7 \7 7 7r   r   )rd   r>   loggingbase64r   pathlibr   timer   typingr   r   r   r	   r
   r   langchain_core.documentsr   )langchain_community.document_loaders.baser   	getLoggerra   r\   r   r   r   r   <module>rq      s    
                     = = = = = = = = = = = = = = = = - - - - - - @ @ @ @ @ @		8	$	$D7 D7 D7 D7 D7Z D7 D7 D7 D7 D7r   