
    Ng                    n   d Z ddlmZ ddlZddlmZ ddlmZmZ ddl	m
Z
 ddlmZmZmZmZ ddlmZ dd	lmZ  ed
           ed           ed           ed           ed           ed          gZd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd%dZd Z d%dZ!d  Z"d! Z#d" Z$d# Z%d$ Z&dS )&z3Test-suite for `unstructured.partition.ppt` module.    )annotationsN)MockFixture)assert_round_trips_through_JSONexample_doc_path)chunk_by_title)ListItemNarrativeText	PageBreakTitle)partition_ppt)#UNSTRUCTURED_INCLUDE_DEBUG_METADATAzAdding a Bullet SlidetextzFind the bullet slide layoutz$Use _TextFrame.text for first bulletz5Use _TextFrame.add_paragraph() for subsequent bulletszHere is a lot of text!z Here is some text in a text box!c                     t          t          d                    } | t          k    sJ | D ]}|j        j        dk    sJ t
          rd | D             dhk    sJ d S d S )Nfake-power-point.pptc                &    h | ]}|j         j        S  )metadatadetection_origin.0elements     `/var/www/html/ai-engine/env/lib/python3.11/site-packages/test_unstructured/partition/test_ppt.py	<setcomp>z3test_partition_ppt_from_filename.<locals>.<setcomp>   s    JJJg 1JJJ    pptx)r   r   EXPECTED_PPT_OUTPUTr   filenamer   )elementsr   s     r    test_partition_ppt_from_filenamer       s    -.DEEFFH***** C C(,BBBBBB* WJJJJJvhVVVVW WVVr   c                     t          j        t                    5  t          t	          d                     d d d            d S # 1 swxY w Y   d S )Nzdoesnt-exist.ppt)pytestraises
ValueErrorr   r   r   r   r   +test_partition_ppt_raises_with_missing_filer%   !   s    	z	"	" < <&'9::;;;< < < < < < < < < < < < < < < < < <s   AAAc                     t          t          d          d          5 } t          |           }d d d            n# 1 swxY w Y   |t          k    sJ |D ]}|j        j        J d S )Nr   rbfileopenr   r   r   r   r   fr   r   s      r   test_partition_ppt_from_filer.   &   s    	566	=	= ) a((() ) ) ) ) ) ) ) ) ) ) ) ) ) )***** 1 1(00001 1   ;??c                     t          t          d          d          5 } t          | d          }d d d            n# 1 swxY w Y   |t          k    sJ |D ]}|j        j        dk    sJ d S )Nr   r'   testr)   metadata_filenamer*   r,   s      r   3test_partition_ppt_from_file_with_metadata_filenamer4   .   s    	566	=	= C a6BBBC C C C C C C C C C C C C C C***** 3 3(F222223 3   <A A c                     t          d          } t          | d          5 }t          j        t                    5  t          | |           d d d            n# 1 swxY w Y   d d d            d S # 1 swxY w Y   d S )Nr   r'   )r   r)   )r   r+   r"   r#   r$   r   )r   r-   s     r   -test_partition_ppt_raises_with_both_specifiedr7   6   s     677H	h		 1FM*$=$= 1 1xa00001 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1s4   A0AA0A	A0A	 A00A47A4c                     t          j        t                    5  t                       d d d            d S # 1 swxY w Y   d S N)r"   r#   r$   r   r   r   r   Dtest_partition_ppt_raises_when_neither_file_path_or_file_is_providedr:   <   s    	z	"	"                   s   6::c                     t          t          d                    } t          |           dk    sJ t          d | D                       sJ d S )Nr   r   c              3  6   K   | ]}|j         j        d k    V  dS )r   Nr   r   r   es     r   	<genexpr>zStest_partition_ppt_from_filename_gets_filename_from_filename_arg.<locals>.<genexpr>H   s-      OOqz"&<<OOOOOOr   r   r   lenallr   s    r   @test_partition_ppt_from_filename_gets_filename_from_filename_argrE   D   sY    -.DEEFFHx==1OOhOOOOOOOOOOr   c                     t          t          d          d          5 } t          |           }d d d            n# 1 swxY w Y   t          |          dk    sJ t	          d |D                       sJ d S )Nr   r'   r(   r   c              3  2   K   | ]}|j         j        d u V  d S r9   r=   r>   s     r   r@   zBtest_partition_ppt_from_file_gets_filename_None.<locals>.<genexpr>P   s,      ==qqz"d*======r   )r+   r   r   rB   rC   r-   r   s     r   /test_partition_ppt_from_file_gets_filename_NonerI   K   s    	566	=	= ) a((() ) ) ) ) ) ) ) ) ) ) ) ) ) ) x==1==H==========r/   c                     t          t          d          d          } t          |           dk    sJ t          d | D                       sJ d S )Nr   r1   )r3   r   c              3  6   K   | ]}|j         j        d k    V  dS r1   Nr=   r   s     r   r@   zMtest_partition_ppt_from_filename_prefers_metadata_filename.<locals>.<genexpr>W   s-      KKww(F2KKKKKKr   rA   rD   s    r   :test_partition_ppt_from_filename_prefers_metadata_filenamerM   S   s_    -.DEEY_```Hx==1KK(KKKKKKKKKKr   c                     t          t          d          d          5 } t          | d          }d d d            n# 1 swxY w Y   t          d |D                       sJ d S )Nr   r'   r1   r2   c              3  6   K   | ]}|j         j        d k    V  dS rL   r=   r>   s     r   r@   zItest_partition_ppt_from_file_prefers_metadata_filename.<locals>.<genexpr>^   s,      ??qz"f,??????r   r+   r   r   rC   rH   s     r   6test_partition_ppt_from_file_prefers_metadata_filenamerQ   Z   s    	566	=	= C a6BBBC C C C C C C C C C C C C C C ??h??????????r5   mockerr   c                    d|                      d           t          t          d                    }t          fd|D                       sJ d S )N2024-05-01T15:37:281unstructured.partition.ppt.get_last_modified_datereturn_valuer   c              3  8   K   | ]}|j         j        k    V  d S r9   r   last_modified)r   r?   filesystem_last_modifieds     r   r@   zWtest_partition_ppt_from_file_path_gets_last_modified_from_filesystem.<locals>.<genexpr>l   s.      VVqz'+CCVVVVVVr   patchr   r   rC   )rR   r   r[   s     @r   Dtest_partition_ppt_from_file_path_gets_last_modified_from_filesystemr^   d   sr    4
LL;Jb     -.DEEFFHVVVVXVVVVVVVVVVr   c                     t          t          d          d          5 } t          |           }d d d            n# 1 swxY w Y   t          d |D                       sJ d S )Nr   r'   r(   c              3  2   K   | ]}|j         j        d u V  d S r9   rY   r>   s     r   r@   zGtest_partition_ppt_from_file_gets_last_modified_None.<locals>.<genexpr>s   s,      BBAqz'4/BBBBBBr   rP   rH   s     r   4test_partition_ppt_from_file_gets_last_modified_Nonera   o   s    	566	=	= ) a((() ) ) ) ) ) ) ) ) ) ) ) ) ) ) BBBBBBBBBBBBr/   c                    d}d|                      d|           t          t          d                    }t          fd|D                       sJ d S )NrT   2020-07-05T09:24:28rU   rV   r   )metadata_last_modifiedc              3  8   K   | ]}|j         j        k    V  d S r9   rY   )r   r?   rd   s     r   r@   zStest_partition_ppt_from_file_path_prefers_metadata_last_modified.<locals>.<genexpr>   s.      TTaqz'+AATTTTTTr   r\   )rR   r[   r   rd   s      @r   @test_partition_ppt_from_file_path_prefers_metadata_last_modifiedrf   v   s    42
LL;Jb     /00I_  H TTTT8TTTTTTTTTTr   c                     d} t          t          d          d          5 }t          ||           }d d d            n# 1 swxY w Y   |d         j        j        | k    sJ d S )Nrc   r   r'   )r)   rd   r   )r+   r   r   r   rZ   )rd   r-   r   s      r   ;test_partition_ppt_from_file_prefers_metadata_last_modifiedrh      s    2	566	=	= X a@VWWWX X X X X X X X X X X X X X X A;-1GGGGGGGs   >AAc                 \    t          t          d                    } t          |            d S )Nr   )r   r   r   rD   s    r   test_partition_ppt_with_jsonrj      s-    -.DEEFFH#H-----r   c                     t          d          } t          |           }t          | d          }t          |          }||k    sJ ||k    sJ d S )Nr   by_title)chunking_strategy)r   r   r   )	file_pathr   chunk_elementschunkss       r   4test_add_chunking_strategy_by_title_on_partition_pptrq      sd     !788IY''H"9
KKKNH%%FX%%%%V######r   c                     t          t          d          dd          } | d         j        j        dgk    sJ t	          d | D                       sJ t	          d | D                       sJ dS )	zSIntegration test of params: languages, include_page_break, and include_slide_notes.language-docs/eng_spa_mult.pptT)include_page_breaksinclude_slide_notesr   engc              3  @   K   | ]}t          |t                    V  d S r9   )
isinstancer
   r   s     r   r@   z,test_partition_ppt_params.<locals>.<genexpr>   s,      FF'z'9--FFFFFFr   c              3  ,   K   | ]}|j         d k    V  dS )zThis is a slide note.Nr   r   s     r   r@   z,test_partition_ppt_params.<locals>.<genexpr>   s*      OO7w|66OOOOOOr   N)r   r   r   	languagesanyrD   s    r   test_partition_ppt_paramsr|      s    9::    H
 A;)eW4444FFXFFFFFFFFOOhOOOOOOOOOOr   c                     t          t          d          d          } d | D             }d | D             }d|v sJ d|v sJ d S )Nrs   T)detect_language_per_elementc                &    g | ]}|j         j        S r   r   rz   r   s     r   
<listcomp>zKtest_partition_ppt_respects_detect_language_per_element.<locals>.<listcomp>   s    @@@GW'@@@r   c                J    h | ] }|j         j        |j         j        d          !S )r   r   r   s     r   r   zJtest_partition_ppt_respects_detect_language_per_element.<locals>.<setcomp>   s.    aaawgFVF`aW'*aaar   rv   spa)r   r   )r   langss     r   7test_partition_ppt_respects_detect_language_per_elementr      so    9::X\  H A@x@@@E ba(aaaEE>>>>E>>>>>>r   )rR   r   )'__doc__
__future__r   r"   pytest_mockr   test_unstructured.unit_utilsr   r   unstructured.chunking.titler   unstructured.documents.elementsr   r	   r
   r   unstructured.partition.pptr   &unstructured.partition.utils.constantsr   r   r    r%   r.   r4   r7   r:   rE   rI   rM   rQ   r^   ra   rf   rh   rj   rq   r|   r   r   r   r   <module>r      sY   9 9 " " " " " "  # # # # # # Z Z Z Z Z Z Z Z 6 6 6 6 6 6 U U U U U U U U U U U U 4 4 4 4 4 4 V V V V V V 
E&'''H0111H8999HIJJJM/000M9::: W W W< < <
1 1 13 3 31 1 1  P P P> > >L L L@ @ @W W W WC C CU U U UH H H. . .
$ $ $
P 
P 
P	 	 	 	 	r   