
    NgT                       d Z ddlmZ ddlmZmZ ddlZddlmZ ddl	m
Z
mZmZmZmZ ddlmZ ddlmZmZmZmZmZmZ dd	lmZ dd
lmZ ddlmZ d Zd Zd Z d Z!d Z"d Z#d Z$ej%        &                    di ddig          d*d            Z'd Z(d+dZ)d+dZ*d Z+d Z,ej%        &                    di d fd!did fd!d"id"fg          d,d'            Z-d( Z.d) Z/dS )-z3Test suite for `unstructured.partition.odt` module.    )annotations)AnyIteratorN)MockFixture)ANYFixtureRequestassert_round_trips_through_JSONexample_doc_pathmethod_mock)chunk_elements)CompositeElementElementTable
TableChunkTextTitle)partition_docx)partition_odt)#UNSTRUCTURED_INCLUDE_DEBUG_METADATAc                     t          d          } t          d          }t          |           t          |          k    sJ d S )N
simple.odtzsimple.docx)r
   r   r   )odt_file_pathdocx_file_paths     `/var/www/html/ai-engine/env/lib/python3.11/site-packages/test_unstructured/partition/test_odt.py)test_partition_odt_matches_partition_docxr   !   sC    $\22M%m44N''>.+I+IIIIIII    c                     t          t          d                    } | t          d          t          d          gk    sJ t	          d | D                       sJ t
          rd | D             dhk    sJ d S d S )Nfake.odtLorem ipsum dolor sit amet.SHeader row Mon Wed Fri Color Blue Red Green Time 1pm 2pm 3pm Leader Sarah Mark Ryanc              3  6   K   | ]}|j         j        d k    V  dS )r   Nmetadatafilename.0es     r   	<genexpr>z3test_partition_odt_from_filename.<locals>.<genexpr>7   s,      CCQqz"j0CCCCCCr   c                &    h | ]}|j         j        S  )r#   detection_originr%   s     r   	<setcomp>z3test_partition_odt_from_filename.<locals>.<setcomp>:   s    >>>
+>>>r   docx)r   r
   r   r   allr   elementss    r    test_partition_odt_from_filenamer1   +   s    -j99::H+,,&	
 	
     CC(CCCCCCCC* K>>X>>>6(JJJJK KJJr   c                     t          t          d          d          5 } t          |           }d d d            n# 1 swxY w Y   |t          d          t	          d          gk    sJ d S )Nr   rb)filer   r    )openr
   r   r   r   fr0   s     r   test_partition_odt_from_filer8   =   s    	z**D	1	1 )Q a((() ) ) ) ) ) ) ) ) ) ) ) ) ) ) +,,&	
 	
      s   ;??c                     t          t          d                    } t          d | D                       s*J dt          | d         j        j                               d S )Nr   c              3  6   K   | ]}|j         j        d k    V  dS )r   Nr"   r%   s     r   r(   zktest_partition_odt_from_filename_gets_the_ODT_filename_in_metadata_not_the_DOCX_filename.<locals>.<genexpr>Q   s,      EEqqz"l2EEEEEEr   zGExpected all elements to have 'simple.odt' as their filename, but got: r   )r   r
   r.   reprr#   r$   r/   s    r   Xtest_partition_odt_from_filename_gets_the_ODT_filename_in_metadata_not_the_DOCX_filenamer<   O   sy    -l;;<<HEEHEEEEE  	2!%.//	2 	2    r   c                 x    t          t          d          d          } t          d | D                       sJ d S )Nr   test)metadata_filenamec              3  6   K   | ]}|j         j        d k    V  dS r>   Nr"   r%   s     r   r(   zJtest_partition_odt_from_filename_with_metadata_filename.<locals>.<genexpr>Y   ,      ??qz"f,??????r   r   r
   r.   r/   s    r   7test_partition_odt_from_filename_with_metadata_filenamerD   W   sF    -j99VTTTH??h??????????r   c                     t          t          d          d          5 } t          | d          }d d d            n# 1 swxY w Y   t          d |D                       sJ d S )Nr   r3   r>   )r4   r?   c              3  6   K   | ]}|j         j        d k    V  dS rA   r"   r%   s     r   r(   zFtest_partition_odt_from_file_with_metadata_filename.<locals>.<genexpr>_   rB   r   )r5   r
   r   r.   r6   s     r   3test_partition_odt_from_file_with_metadata_filenamerG   \   s    	z**D	1	1 CQ a6BBBC C C C C C C C C C C C C C C??h??????????   <A A c                     dt          t          d                    } t          fd| D                       s-J d dt          | d         j        j                               d S )Nz'application/vnd.oasis.opendocument.textr   c              3  8   K   | ]}|j         j        k    V  d S N)r#   filetype)r&   r'   ODT_MIME_TYPEs     r   r(   zQtest_partition_odt_gets_the_ODT_MIME_type_in_metadata_filetype.<locals>.<genexpr>h   s-      FFqz"m3FFFFFFr   zExpected all elements to have 'z' as their filetype, but got: r   )r   r
   r.   r;   r#   rL   )r0   rM   s    @r   >test_partition_odt_gets_the_ODT_MIME_type_in_metadata_filetyperN   e   s    =M-l;;<<HFFFFXFFFFF  	2- 	2 	2!%.//	2 	2    r   kwargsinfer_table_structureTdict[str, Any]c                (   t          t          d          d          5 }t          dd|i| }d d d            n# 1 swxY w Y   |d         }t          |t                    sJ |j        j        J |j        j                            d          sJ d S )Nr   r3   r4      z<table>r*   )r5   r
   r   
isinstancer   r#   text_as_html
startswith)rO   r7   r0   tables       r   Rtest_partition_odt_adds_text_as_html_when_infer_table_structure_is_omitted_or_TruerX   q   s     
z**D	1	1 3Q 22a26223 3 3 3 3 3 3 3 3 3 3 3 3 3 3 QKEeU#####>&222>&11)<<<<<<<s   9= =c                     t          t          d          d          5 } t          | d          }d d d            n# 1 swxY w Y   |d         }t          |t                    sJ |j        j        J d S )Nr   r3   F)r4   rP   rS   )r5   r
   r   rT   r   r#   rU   )r7   r0   rW   s      r   Ntest_partition_odt_suppresses_text_as_html_when_infer_table_structure_is_FalserZ   ~   s    	z**D	1	1 FQ auEEEF F F F F F F F F F F F F F F QKEeU#####>&.....rH   mockerr   c                    d|                      d           t          t          d                    }t          fd|D                       sJ d S )N2029-07-05T09:24:281unstructured.partition.odt.get_last_modified_datereturn_valuer   c              3  8   K   | ]}|j         j        k    V  d S rK   r#   last_modified)r&   r'   filesystem_last_modifieds     r   r(   zItest_partition_odt_pulls_last_modified_from_filesystem.<locals>.<genexpr>   s.      VVqz'+CCVVVVVVr   patchr   r
   r.   )r[   r0   rd   s     @r   6test_partition_odt_pulls_last_modified_from_filesystemrg      sq    4
LL;Jb     -j99::HVVVVXVVVVVVVVVVr   c                    d}d|                      d|           t          t          d                    }t          fd|D                       sJ d S )Nr]   z2020-07-05T09:24:28r^   r_   r   )metadata_last_modifiedc              3  8   K   | ]}|j         j        k    V  d S rK   rb   )r&   r'   ri   s     r   r(   zRtest_partition_odt_prefers_metadata_last_modified_when_provided.<locals>.<genexpr>   s.      TTaqz'+AATTTTTTr   re   )r[   rd   r0   ri   s      @r   ?test_partition_odt_prefers_metadata_last_modified_when_providedrk      s    42
LL;Jb     &&?U  H TTTT8TTTTTTTTTTr   c                 t    t          t          d                    } t          d | D                       sJ d S )Nr   c              3  8   K   | ]}|j         j        d gk    V  dS )engNr#   	languagesr%   s     r   r(   z=test_partition_odt_adds_languages_metadata.<locals>.<genexpr>   s.      AA1qz#w.AAAAAAr   rC   r/   s    r   *test_partition_odt_adds_languages_metadatarq      sA    -l;;<<HAAAAAAAAAAAAr   c                 |    t          t          d          d          } d | D             dgddgdgdgdggk    sJ d S )Nzlanguage-docs/eng_spa_mult.odtT)detect_language_per_elementc                &    g | ]}|j         j        S r*   ro   r%   s     r   
<listcomp>zOtest_partition_odt_respects_detect_language_per_element_arg.<locals>.<listcomp>   s    333QAJ 333r   rn   spa)r   r
   r/   s    r   ;test_partition_odt_respects_detect_language_per_element_argrw      sv    9::X\  H 43(333					8      r   )rO   expected_valuehi_resstrategyautorequestr   rx   
str | Nonec                    ddl m} dd}t          | |d|	          }t          t	          d
          fi |\  }|                    t                     |j        d| k    sJ d S )Nr   )_DocxPartitionerselfr   returnIterator[Element]c              3  F   K   t          d| j        j                   V  d S )Nstrategy == )r   _optsrz   )r   s    r   fake_iter_document_elementsz_test_partition_odt_forwards_strategy_arg_to_partition_docx.<locals>.fake_iter_document_elements   s.      7$*"5778888888r   _iter_document_elements)side_effectr   r   )r   r   r   r   )unstructured.partition.docxr   r   r   r
   assert_called_once_withr   text)r|   rO   rx   r   r   _iter_elements_elements          r   :test_partition_odt_forwards_strategy_arg_to_partition_docxr      s     =<<<<<9 9 9 9 "!/	  O /==HHHHJW++C000<:.::::::::r   c                 X    t          t          t          d                               dS )zCElements produced can be serialized then deserialized without loss.r   N)r	   r   r
   r*   r   r   +test_partition_odt_round_trips_through_jsonr      s'    #M2B<2P2P$Q$QRRRRRr   c                     t          d          } t          |           }t          | d          }t          d |D                       sJ |t          |          k    sJ d S )Nr   basic)chunking_strategyc              3  Z   K   | ]&}t          |t          t          t          f          V  'd S rK   )rT   r   r   r   )r&   cs     r   r(   zYtest_partition_odt_chunks_elements_when_chunking_strategy_is_specified.<locals>.<genexpr>   s4      TTz!.zBCCTTTTTTr   )r
   r   r.   r   )document_pathr0   chunkss      r   Ftest_partition_odt_chunks_elements_when_chunking_strategy_is_specifiedr      sr    $\22M]++H=GDDDF TTVTTTTTTTT^H--------r   )rO   rQ   )r[   r   )r|   r   rO   rQ   rx   r}   )0__doc__
__future__r   typingr   r   pytestpytest_mockr   test_unstructured.unit_utilsr   r   r	   r
   r   unstructured.chunking.basicr   unstructured.documents.elementsr   r   r   r   r   r   r   r   unstructured.partition.odtr   &unstructured.partition.utils.constantsr   r   r1   r8   r<   rD   rG   rN   markparametrizerX   rZ   rg   rk   rq   rw   r   r   r   r*   r   r   <module>r      s   : 9 " " " " " "                  # # # # # #              7 6 6 6 6 6                7 6 6 6 6 6 4 4 4 4 4 4 V V V V V VJ J JK K K$  $  @ @ @
@ @ @   B)@$(G#HII	= 	= 	= JI	=/ / /W W W WU U U U"B B B

 
 
   (^z4((3z66JF5ST ; ; ;	 ;*S S S
. . . . .r   