
    NgO                         d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	m
Z
 d dlZd dlmZ d dlmZ  ej        e          Z G d de          ZdS )    N)Path)AnyDictIteratorPatternUnion)Document)
BaseLoaderc                   0   e Zd ZU dZ ej        dej                  Zee	d<    ej        dej                  Z
ee	d<    ej        d          Zee	d<    ej        dej                  Zee	d	<    ej        d
ej                  Zee	d<    ej        dej                  Zee	d<   	 	 d#deeef         dedefdZdeeef         dej        defdZdedeeef         defdZdedefdZdedefdZdedefdZdedefdZdedefd Z de!e"         fd!Z#d"S )$ObsidianLoaderz%Load `Obsidian` files from directory.z^---\n(.*?)\n---\nFRONT_MATTER_REGEXz	{{(.*?)}}TEMPLATE_VARIABLE_REGEXz[^\S\/]#([a-zA-Z_]+[-_/\w]*)	TAG_REGEXz^\s*(\w+)::\s*(.*)$DATAVIEW_LINE_REGEXz\[(\w+)::\s*(.*)\]DATAVIEW_INLINE_BRACKET_REGEXz\((\w+)::\s*(.*)\)DATAVIEW_INLINE_PAREN_REGEXUTF-8Tpathencodingcollect_metadatac                 0    || _         || _        || _        dS )a%  Initialize with a path.

        Args:
            path: Path to the directory containing the Obsidian files.
            encoding: Charset encoding, defaults to "UTF-8"
            collect_metadata: Whether to collect metadata from the front matter.
                Defaults to True.
        N)	file_pathr   r   )selfr   r   r   s       i/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/document_loaders/obsidian.py__init__zObsidianLoader.__init__   s        0    placeholdersmatchreturnc                 \    dt          |           d}|                    d          ||<   |S )z/Replace a template variable with a placeholder.__TEMPLATE_VAR___   )lengroup)r   r   r   placeholders       r   _replace_template_varz$ObsidianLoader._replace_template_var/   s6     >L(9(9===$)KKNN[!r   objc                    t          |t                    r5|                                D ]\  }}|                    |d| d          } nt          |t                    r4|                                D ]\  }}|                     ||          ||<   nCt          |t                    r.t          |          D ]\  }}|                     ||          ||<   |S )zIRestore template variables replaced with placeholders to original values.z{{z}})
isinstancestritemsreplacedict_restore_template_varslist	enumerate)r   r(   r   r&   valuekeyiitems           r   r/   z%ObsidianLoader._restore_template_vars7   s    c3 	I&2&8&8&:&: C C"Ukk+/Ae/A/A/ABBCT"" 	I!iikk L L
U66ulKKCLT"" 	I$S>> I I444T<HHA
r   contentc                 $   | j         si S | j                            |          }|si S i }t          j        | j        |          }| j                            ||                    d                    }	 t          j
        |          }|                     ||          }d|v r9t          |d         t                    r|d                             d          |d<   |S # t          j        j        $ r t"                              d           i cY S w xY w)zEParse front matter metadata from the content and return it as a dict.r#   tagsz, z Encountered non-yaml frontmatter)r   r   search	functoolspartialr'   r   subr%   yaml	safe_loadr/   r*   r+   splitparserParserErrorloggerwarning)r   r6   r   r   replace_template_varfront_matter_textfront_matters          r   _parse_front_matterz"ObsidianLoader._parse_front_matterD   s)   $ 	I'..w77 	I')(0& 
  
 !8<< %++a..
 
	>*;<<L66|\RRL %%*\&5I3*O*O%'3F';'A'A$'G'GV${& 	 	 	NN=>>>III	s   3A(C 0DDmetadatac                     i }|                                 D ]@\  }}t          |          t          t          t          hv r|||<   .t          |          ||<   A|S )z4Convert a dictionary to a compatible with langchain.)r,   typer+   intfloat)r   rH   resultr3   r2   s        r   !_to_langchain_compatible_metadataz0ObsidianLoader._to_langchain_compatible_metadatab   s]    "..** 	) 	)JCE{{sC///#s!%jjsr   c                     | j         st                      S | j                            |          }|st                      S d |D             S )z0Return a set of all tags in within the document.c                     h | ]}|S  rQ   ).0tags     r   	<setcomp>z6ObsidianLoader._parse_document_tags.<locals>.<setcomp>u   s    %%%%%%r   )r   setr   findall)r   r6   r   s      r   _parse_document_tagsz#ObsidianLoader._parse_document_tagsl   sP    $ 	55L&&w// 	55L%%u%%%%r   c                     | j         si S i d | j                            |          D             d | j                            |          D             d | j                            |          D             S )zWParse obsidian dataview plugin fields from the content and return it
        as a dict.c                 ,    i | ]}|d          |d         S r   r#   rQ   rR   r   s     r   
<dictcomp>z9ObsidianLoader._parse_dataview_fields.<locals>.<dictcomp>~   2        a%(  r   c                 ,    i | ]}|d          |d         S rZ   rQ   r[   s     r   r\   z9ObsidianLoader._parse_dataview_fields.<locals>.<dictcomp>   r]   r   c                 ,    i | ]}|d          |d         S rZ   rQ   r[   s     r   r\   z9ObsidianLoader._parse_dataview_fields.<locals>.<dictcomp>   r]   r   )r   r   rV   r   r   r   r6   s     r   _parse_dataview_fieldsz%ObsidianLoader._parse_dataview_fieldsw   s     $ 	I
 !5==gFF  

 !=EEgNN  
 !?GGPP  
 	
r   c                 J    | j         s|S | j                            d|          S )z4Remove front matter metadata from the given content. )r   r   r<   r`   s     r   _remove_front_matterz#ObsidianLoader._remove_front_matter   s+    $ 	N&**2w777r   c           
   #     K   t          t          | j                                      d                    }|D ]}t	          || j                  5 }|                                }d d d            n# 1 swxY w Y   |                     |          }|                     |          }| 	                    |          }| 
                    |          }t          |j                  t          |          |                                j        |                                j        |                                j        d|                     |          |}|s|                    d          r>d                    |t)          |                    dg           pg           z            |d<   t+          ||          V  d S )Nz**/*.md)r   )sourcer   createdlast_modifiedlast_accessedr8   ,)page_contentrH   )r0   r   r   globopenr   readrG   rW   ra   rd   r+   namestatst_ctimest_mtimest_atimerN   getjoinrU   r	   )	r   pathsr   ftextrF   r8   dataview_fieldsrH   s	            r   	lazy_loadzObsidianLoader.lazy_load   s     T$.))..y99:: 	A 	ADdT]333  qvvxx                               33D99L,,T22D"99$??O,,T22Ddi..D		99;;/!%!5!%!5  88FF "H  |''// #&883|//;;ArBBB$ $  x@@@@@@@/	A 	As   A33A7	:A7	N)r   T)$__name__
__module____qualname____doc__recompileDOTALLr   r   __annotations__r   r   	MULTILINEr   r   r   r   r+   r   boolr   r   Matchr'   r   r/   r.   rG   rN   rU   rW   ra   rd   r   r	   rz   rQ   r   r   r   r      sw        //","*-BBI"N"NNNN'1rz,	'J'JWJJJ#$CDDIwDDD#-2:.Dbl#S#SSSS-7RZr|. .!7    ,62:r|, ,     !%	1 1CI1 1 	1 1 1 1$ cN358	   # T#s(^ PS    3 4    <$ 4    	&C 	&C 	& 	& 	& 	&
c 
d 
 
 
 
*8C 8C 8 8 8 8A8H- A A A A A Ar   r   )r:   loggingr   pathlibr   typingr   r   r   r   r   r=   langchain_core.documentsr	   )langchain_community.document_loaders.baser
   	getLoggerr{   rB   r   rQ   r   r   <module>r      s         				       6 6 6 6 6 6 6 6 6 6 6 6 6 6  - - - - - - @ @ @ @ @ @		8	$	$\A \A \A \A \AZ \A \A \A \A \Ar   