
    Ng                         d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
mZ d dlmZ d dlmZ d dlmZ  G d d	ee          ZdS )
    N)ABC)Path)IteratorListSetTuple)Document)BaseBlobParser)Blobc                       e Zd ZdZdedee         fdZdedee         fdZde	j
        dedeeeeef                  fdZd	ede	j
        d
ee         dee         dee         f
dZdS )
VsdxParserzParser for vsdx files.blobreturnc                 ,    |                      |          S )zParse a vsdx file.)
lazy_parse)selfr   s     m/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/document_loaders/parsers/vsdx.pyparsezVsdxParser.parse   s    t$$$    c              #     K                                    5 }t          j        |d          5 }|                     |j                  }ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   fd|D             E d{V  dS )zoRetrieve the contents of pages from a .vsdx file
        and insert them into documents, one document per page.rNc           	      L    g | ] \  }}}t          |j        ||d           !S ))sourcepage	page_name)page_contentmetadata)r	   r   ).0page_numberr   r   r   s       r   
<listcomp>z)VsdxParser.lazy_parse.<locals>.<listcomp>   sX     

 

 

 5Y )"k'!*   

 

 

r   )as_bytes_iozipfileZipFileget_pages_contentr   )r   r   pdf_file_objzfilepagess    `   r   r   zVsdxParser.lazy_parse   s]       	C<s33 Cu..udkBBC C C C C C C C C C C C C C C	C 	C 	C 	C 	C 	C 	C 	C 	C 	C 	C 	C 	C 	C 	C

 

 

 

 9>

 

 

 
	
 
	
 
	
 
	
 
	
 
	
 
	
 
	
 
	
s4   A-A
A-A	A-A	A--A14A1r&   r   c                 n   	 ddl n# t          $ r t          d          w xY wd                                vr$t          d                    |                     dS d                                vr$t          d                    |                     dS d                                vr$t          d	                    |                     dS                                         d                    }                                        d                    }                                        d                    }t          |d
         d         t                    rd |d
         d         D             n'|d
         d         d         	                                gt          |d         d         t                    rd |d         d         D             nd|d         d         d         z   g|d         d         d         d         dt                             }d |D             }fd|D             }g }|D ]̊                              }	t          j                            |	                    }
t          j        d|
          }t          |          dk    rfd                    |          }ddddddd }|                                D ]\  }}|                    ||          }|                    |d!           ͈fd"|D             }g }t'          t)          ||                    D ]h\  }\  }|                     ||          d                    fd#|D             fd$|D             z             }|                    |||f           i|S )%a  Get the content of the pages of a vsdx file.

        Attributes:
            zfile (zipfile.ZipFile): The vsdx file under zip format.
            source (str): The path of the vsdx file.

        Returns:
            list[tuple[int, str, str]]: A list of tuples containing the page number,
            the name of the page and the content of the page
            for each page of the vsdx file.
        r   NzfThe xmltodict library is required to parse vsdx files. Please install it with `pip install xmltodict`.zvisio/pages/pages.xmlz'WARNING - No pages.xml file found in {}z visio/pages/_rels/pages.xml.relsz,WARNING - No pages.xml.rels file found in {}zdocProps/app.xmlz%WARNING - No app.xml file found in {}PagesPagec                 B    g | ]}|d                                           S )@Namestripr   rels     r   r    z0VsdxParser.get_pages_content.<locals>.<listcomp>P   s5     + + +),G""$$+ + +r   r,   RelationshipsRelationshipc                 $    g | ]}d |d         z   S )visio/pages/@Target r/   s     r   r    z0VsdxParser.get_pages_content.<locals>.<listcomp>X   s1     + + + Y/+ + +r   r4   r5   
PropertiesTitlesOfPartsz	vt:vectorzvt:lpstrc                 6    g | ]}|                                 S r6   r-   )r   names     r   r    z0VsdxParser.get_pages_content.<locals>.<listcomp>d   s     @@@$@@@r   c                 j    g | ]/}                     |                                                   0S r6   )indexr.   )r   r:   disordered_namesdisordered_pathss     r   r    z0VsdxParser.get_pages_content.<locals>.<listcomp>e   sE     
 
 
 -33DJJLLAAB
 
 
r   z("#text"\s*:\s*"([^\\"]*(?:\\.[^\\"]*)*)"
	-'   é   ô)z\nz\tz\u2013z\u2019z\u00e9rz\u00f4mer   r   c                     g | ]o}d t          |          j         d                                v .|                                        d t          |          j         d                    dpS )zvisio/pages/_rels/z	.xml.rels)pathcontent)r   stemnamelistr   read)r   	page_path	xmltodictr&   s     r   r    z0VsdxParser.get_pages_content.<locals>.<listcomp>   s     	
 	
 	
 CDOO$8CCCu~~GWGWWW "$??JJSDOO4HSSSTT   XWWr   c                 4    g | ]}|d          v |d         S rE   r6   )r   page_relationshipss     r   r    z0VsdxParser.get_pages_content.<locals>.<listcomp>   s7       V}55 .)555r   c                 8    g | ]}|d          k    |d         S rE   r6   )r   rO   rG   s     r   r    z0VsdxParser.get_pages_content.<locals>.<listcomp>   s7       V},, .),,,r   )rM   ImportErrorrJ   printformatr   rK   
isinstancelistr.   lenjsondumpsrefindalljoinitemsreplaceappend	enumeratezipget_relationships)r   r&   r   pagesxml_contentappxml_contentpagesxmlrels_contentordered_namesordered_pathsdisordered_pagesrH   string_contentsamplesr   map_symboleskeyvaluepagexml_relsordered_pagesr   r   r=   r>   rG   rP   rM   s    `                  @@@@@r   r$   zVsdxParser.get_pages_content(   s   	 	 	 	B  	 #%..*:*:::;BB6JJKKKF-U^^5E5EEE@GGOOPPPFU^^%5%5559@@HHIIIF!*<S1T1T!U!U(uzz:L/M/MNN%.__JJ9::&
 &
 &w/7>> 	+ +0@0I&0Q+ + +
 !)&1':@@BB+ *?;NKTRR 		+ +/@P+ + + &7G	RS+ $2,#?#P$

$-,---$/ A@-@@@
 
 
 
 
%
 
 
 ! 	V 	VDjj&&G!Z	(@(@AANj;^ G 7||a#yy11"" $!%    #/"4"4"6"6 D DJC#/#7#7U#C#CLL ''|(T(TUUU	
 	
 	
 	
 	
 +	
 	
 	
 57.7}--/
 /
 	I 	I*K*$	 !22e]L M  99   !1  
   !1  	 L   +y,!GHHHHs    'r   filelistrn   c                 >   t                    j        }t                    j        d| dz  }t          |          |                                vrt                      S t          fd|D                       }t          |d         d         t                    rd |d         d         D             }n|d         d         d         g}t          fd|D                       	                    |          }	|	D ]}
|	| 
                    |
|||          z  }	|	S )	a  Get the relationships of a page and the relationships of its relationships,
        etc... recursively.
        Pages are based on other pages (ex: background page),
        so we need to get all the relationships to get all the content of a single page.
        z_rels/z.relsc              3   @   K   | ]}|d          k    |d         V  dS )rG   rH   Nr6   )r   rO   r   s     r   	<genexpr>z/VsdxParser.get_relationships.<locals>.<genexpr>   s?       $
 $
!&%-4:O:OE):O:O:O:O$
 $
r   r1   r2   c                     g | ]
}|d          S )r5   r6   r/   s     r   r    z0VsdxParser.get_relationships.<locals>.<listcomp>   s,        I  r   r5   c                 4    g | ]}t          |z            S r6   )str)r   targetparent_paths     r   r    z0VsdxParser.get_relationships.<locals>.<listcomp>   s&    ===6Sv%&&===r   )r   r:   parentrv   rJ   setnextrU   rV   intersectionrb   )r   r   r&   rp   rn   	name_path	rels_pathpagexml_rels_contenttargetsrP   r0   rx   s    `         @r   rb   zVsdxParser.get_relationships   sj    JJO	4jj'";9";";";;	y>>!1!11155L# $
 $
 $
 $
*6$
 $
 $
  
  
 *?;NKTRR 	Y /@P  GG
 ,O<^LYWXG====W===
 

,x
 
  	 ! 	 	C)D,B,BUHl- - MM r   N)__name__
__module____qualname____doc__r   r   r	   r   r   r"   r#   rv   r   r   intr$   dictr   rb   r6   r   r   r   r      s          %$ %8H#5 % % % %
t 
(: 
 
 
 
(|_|.1|	eCcM"	#| | | ||)) ) s)	)
 4j) 
S) ) ) ) ) )r   r   )rX   rZ   r"   abcr   pathlibr   typingr   r   r   r   %langchain_community.docstore.documentr	   )langchain_community.document_loaders.baser
   1langchain_community.document_loaders.blob_loadersr   r   r6   r   r   <module>r      s     				              - - - - - - - - - - - - : : : : : : D D D D D D B B B B B BB B B B B B B B B Br   