
    Ng                        d dl mZ d dlZd dlZd dlmZmZmZ d dlm	Z	 d dl
mZ d dlmZmZ d dlmZ d dlmZ 	 	 	 	 	 dddZdS )    )annotationsN)IOAnyOptional)Element)FileType)convert_office_docexactly_one)get_last_modified_date)partition_docxMS Word 2007 XMLfilenameOptional[str]fileOptional[IO[bytes]]metadata_filenamemetadata_last_modifiedlibre_office_filterkwargsr   returnlist[Element]c           	     Z   t          | |           | rt          |           nd}| 2t          j                            |           st          d|  d          t          j                    5 }|| dn| }|J |Ot          |d          5 }	|		                    |
                                           ddd           n# 1 swxY w Y   t          ||d|           t          j                            t          j                            |                    \  }
}t          j                            |          \  }}
t          j                            || d	          }t!          d||p| t"          j        |p|d
|}ddd           n# 1 swxY w Y   |r|D ]}||j        _        |S )a  Partitions Microsoft Word Documents in .doc format into its document elements.

    All parameters available on `partition_docx()` are also available here.

    Parameters
    ----------
    filename
        A string defining the target filename path.
    file
        A file-like object using "rb" mode --> open(filename, "rb").
    metadata_last_modified
        The last modified date for the document.
    libre_office_filter
        The filter to use when coverting to .doc. The default is the
        filter that is required when using LibreOffice7. Pass in None
        if you do not want to apply any filter.
    languages
        User defined value for `metadata.languages` if provided. Otherwise language is detected
        using naive Bayesian filter via `langdetect`. Multiple languages indicates text could be
        in either language.
        Additional Parameters:
            detect_language_per_element
                Detect language per element instead of at the document level.
    starting_page_number
        Indicates what page number should be assigned to the first page in the document.
        This information will be reflected in elements' metadata and can be be especially
        useful when partitioning a document that is part of a larger document.
    )r   r   Nz	The file z does not exist.z/document.docwbdocx)target_formattarget_filterz.docx)r   r   metadata_file_typer    )r
   r   ospathexists
ValueErrortempfileTemporaryDirectoryopenwritereadr	   splitabspathsplitextjoinr   r   DOCmetadatar   )r   r   r   r   r   r   last_modified
target_dirsource_file_pathf_filename_no_pathbase_filenametarget_file_pathelementselements                   V/var/www/html/ai-engine/env/lib/python3.11/site-packages/unstructured/partition/doc.pypartition_docr9      sf   H ----8@J*8444dM BGNN8$<$<?X???@@@ 
	$	&	& !
*;?;Kj7777QY+++ &-- %		$$$% % % % % % % % % % % % % % %
 	 -		
 	
 	
 	
 !gmmBGOO<L,M,MNN7++,<==q7<<
}4K4K4KLL
 " 
%/;8'|#9#J]	
 

 
 
7!
 !
 !
 !
 !
 !
 !
 !
 !
 !
 !
 !
 !
 !
 !
J  : 	: 	:G(9G%%Os7   , F(C 4F C	FC	B9FFF)NNNNr   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   )
__future__r   r   r#   typingr   r   r   unstructured.documents.elementsr   unstructured.file_utils.modelr   $unstructured.partition.common.commonr	   r
   &unstructured.partition.common.metadatar   unstructured.partition.docxr   r9   r       r8   <module>rB      s    " " " " " " 				  $ $ $ $ $ $ $ $ $ $ 3 3 3 3 3 3 2 2 2 2 2 2 P P P P P P P P I I I I I I 6 6 6 6 6 6 # $'+,0);Y Y Y Y Y Y YrA   