
    Ng@              	      $   U d dl mZ d dlZd dlmZ d dlmZmZmZm	Z	 d dl
mZ d dlmZ d dlmZmZmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZmZ d dlmZ dZded<    eej                   e	 d$ddddddd%d                        Z!d&d!Z"d'd#Z#dS )(    )annotationsN)BytesIO)IOAnyIteratorcast)etree)add_chunking_strategy)ElementElementMetadataText)read_txt_file)FileType)exactly_onespooled_to_bytes_io_if_needed)apply_metadataget_last_modified_date)element_from_textxmlstrDETECTION_ORIGINF)filetextencodingxml_keep_tagsxml_pathfilename
str | Noner   IO[bytes] | Noner   r   r   boolr   kwargsr   returnlist[Element]c                  t          | ||           g }t          | | rt          |           nd          }t          |_        |rZ| rt          | |          d         }	n-|r%t          t          |          |          d         }	n|J |}	t          |	|          g}nWt          | |||          }
|
D ]A}|r=t          |          }t          j        |          |_        |                    |           B|S )	ap  Partitions an XML document into its document elements.

    Parameters
    ----------
    filename
        A string defining the target filename path.
    file
        A file-like object using "rb" mode --> open(filename, "rb").
    text
        The text of the XML file.
    encoding
        The encoding method used to decode the text input. If None, utf-8 will be used.
    xml_keep_tags
        If True, will retain the XML tags in the output. Otherwise it will simply extract
        the text from within the tags.
    xml_path
        The xml_path to use for extracting the text. Only used if xml_keep_tags=False.
    r   r   r   N)r   last_modified)r   r      )r   r   )r   metadata)r   r   r   r   )r   r   r   r   detection_originr   r   r   get_leaf_elementsr   copydeepcopyr(   append)r   r   r   r   r   r   r!   elementsr(   raw_textleaf_elementsleaf_elementelements                V/var/www/html/ai-engine/env/lib/python3.11/site-packages/unstructured/partition/xml.pypartition_xmlr4      sD   < 48888 HX)_)?)I)I)I[_  H !1H ) 	$hJJJ1MHH 	$*G*M*MX`aaabcdHH###Hh:::; *	
 
 
 * 	) 	)L )+L99#'=#:#: (((O    Iterator[str | None]c                   t          | ||           | rt          | |          S |rt          t          |          |          S t          t	          t          t          |          d                    }t          ||          S )zGGet leaf elements from the XML tree defined in filename, file, or text.r%   )r   )r   r   zutf-8)r   )r   _get_leaf_elementsr   r   bytesr   r   )r   r   r   r   bs        r3   r*   r*   Y   s     48888 8!(X>>>>	 8!'DT'J'JU]^^^^E$sD//G<<<==!!h7777r5   str | IO[bytes]c              #    K   g }t          j        | dd          }|;t          |          \  }}t          j        |          }d  ||          D             }|D ]\  }}|dk    r|                    |           |dk    r=|j        "|j                                        r	|j        V  |                                 |rJ|d                                         0|	                                 |r|d                                         0dS )	z<Parse the XML tree in a memory efficient manner if possible.)startendF)eventsresolve_entitiesNc              3     K   | ]}d |fV  	dS )r>   N ).0els     r3   	<genexpr>z%_get_leaf_elements.<locals>.<genexpr>u   s&      IIBUBKIIIIIIr5   r=   r>   )
r	   	iterparsenextXPathr-   r   stripclear	getparentpop)r   r   element_stackelement_iterator_r2   compiled_pathevents           r3   r8   r8   g   s9     
 +-Mt4DW\]]] *++
7H--II--2H2HIII*    wG  )))E>>|'GL,>,>,@,@'l"""MMOOO 	 b 1 ; ; = = E  	 b 1 ; ; = = E   r5   )N)r   r   r   r   r   r   r   r   r   r    r   r   r!   r   r"   r#   )
r   r   r   r   r   r   r   r   r"   r6   )r   r;   r   r   r"   r6   )$
__future__r   r+   ior   typingr   r   r   r   lxmlr	   unstructured.chunkingr
   unstructured.documents.elementsr   r   r    unstructured.file_utils.encodingr   unstructured.file_utils.modelr   $unstructured.partition.common.commonr   r   &unstructured.partition.common.metadatar   r   unstructured.partition.textr   r   __annotations__XMLr4   r*   r8   rB   r5   r3   <module>r`      s   " " " " " " "        * * * * * * * * * * * *       7 7 7 7 7 7 J J J J J J J J J J : : : : : : 2 2 2 2 2 2        Z Y Y Y Y Y Y Y 9 9 9 9 9 9      = "= = = = =  =@8 8 8 8           r5   