
    Ngɽ                    p   d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
mZ d dlmZ d dlZd dlmZ d dlmZ d dlmZmZmZmZ d d	lmZmZmZmZmZmZ d d
lm Z  d dl!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+ d dl,m-Z- d dl.m/Z/m0Z0 d dl1m2Z2 d dl3m4Z4 d dl5m6Z6m7Z7m8Z8 ej9        :                    d          Z;d Z<d Z=ej>        ?                    dg d          dd            Z@ej>        A                    e;d          dd            ZBdd ZCdd!ZDej>        ?                    d"g d#          ej>        ?                    d$e4jE        e4jF        e4jG        e4jH        g          dd(                        ZI e'd)*           e+d+*           e&d,*           e&d-*          gZJd. ZKd/ ZLd0 ZMd1 ZNej>        ?                    dg d2          dd3            ZOej>        ?                    dg d2          dd4            ZPd5 ZQej>        ?                    dg d6          dd7            ZRej>        ?                    dg d6          dd8            ZSdd;ZTej>        ?                    d<d=d>g          dd?            ZUd@ ZVej>        W                    dAeXd>B          dC             ZYddDZZdE Z[dF Z\ddGZ]ddHZ^dI Z_dJ Z`ej>        ?                    dg dK          ddL            Zaej>        ?                    dg dK          ddM            ZbddNZcdO Zdej>        ?                    d<d=d>g          ddP            ZedQ ZfdR ZgdS Zhej>        ?                    d"dTdUg          ej>        ?                    d$e4jE        e4jF        e4jG        e4jH        g          ddV                        ZidW ZjdX ZkdY ZldZ Zmej>        ?                    d[d\ e'd]*           e"d^*           e+d+*           e&d_*           e&d`*           e&da*          gfdbg fg          dde            Zndf Zodg Zpdh Zqdi Zrdj Zsdk Ztdl Zudm Zvdn ZwddoZxdp Zydq Zzdr Z{dduZ|ddvZ}dw Z~dx Zdy Zej>        ?                    dzd{                                          dd|            Zd} Zd~ ZddZd ZddZej>        ?                    ddgdfdgdfg          dd            ZddZej>        ?                    ddgdd=fg dd>fdgdd=fg dd>fg          dd            Zej>        ?                    dg d          dd            Zej>        ?                    dddg          dd            Zej>        ?                    dde-j        fde-j        fde-j        fde-j        fde-j        fde-j        fde-j        fde-j        fde-j        fde-j        fdUe-j        fdTe-j        fde-j        fde-j        fde-j        fd\e-j        fde-j        fde-j        fde-j        fg          dd            Zd Zd Zd Zd Zd ZddZ ej                    d             ZdS )    )annotationsN)import_module)Iterator)patch)Image)assert_element_extraction)EXPECTED_TABLEEXPECTED_TABLE_XLSXEXPECTED_TEXTEXPECTED_XLS_TABLE)ANYFixtureRequestLogCaptureFixtureexample_doc_pathfunction_mockmethod_mock)clean_extra_whitespace)
AddressCompositeElementElementElementMetadataListItemNarrativeTextTable
TableChunkTextTitle)FileType)_PartitionerLoader	partition)UnsupportedFileFormatError)PartitionStrategy)elements_from_jsonelements_to_dictselements_to_jsonz/.dockerenvc                     t          t          d                    } t          | d         j                  t          k    sJ | d         j        j        t          k    sJ | d         j        j        dk    sJ d S )Nstanley-cups.csvr   text/csv	r    r   r   textr   metadatatext_as_htmlr	   filetypeelementss    a/var/www/html/ai-engine/env/lib/python3.11/site-packages/test_unstructured/partition/test_auto.py%test_auto_partition_csv_from_filenamer1   <   u    )*<==>>H!(1+"233}DDDDA;,>>>>A;(J666666    c                 ~   t          t          d          d          5 } t          |           }d d d            n# 1 swxY w Y   t          |d         j                  t
          k    sJ t          |d         t                    sJ |d         j        j	        t          k    sJ |d         j        j        dk    sJ d S )Nr'   rbfiler   r(   )openr   r    r   r*   r   
isinstancer   r+   r,   r	   r-   fr/   s     r0   !test_auto_partition_csv_from_filer<   D   s    	122D	9	9 %Q!$$$% % % % % % % % % % % % % % % "(1+"233}DDDDhqk5)))))A;,>>>>A;(J666666   ;??)pass_metadata_filenamecontent_type)FN)Fapplication/msword)TrA   TNr>   boolr?   
str | Noneexpected_docx_elementslist[Element]c                b   t          d          }| r|nd }t          |||t          j                  }|D ];}t	          t          |          j         dt          |j                   d           <||k    sJ t          d |D                       sJ t          d |D                       sJ d S )N
simple.docfilenamemetadata_filenamer?   strategy()c              3  6   K   | ]}|j         j        d k    V  dS )rH   Nr+   rJ   .0es     r0   	<genexpr>z8test_auto_partition_doc_from_filename.<locals>.<genexpr>h   s,      EEqqz"l2EEEEEEr3   c              3  P   K   | ]!}|j         j        t          d           k    V  "dS  Nr+   file_directoryr   rQ   s     r0   rT   z8test_auto_partition_doc_from_filename.<locals>.<genexpr>i   5      SSQqz(,<R,@,@@SSSSSSr3   )
r   r    r"   HI_RESprinttype__name__reprr*   all)r>   r?   rE   	file_pathrK   r/   rS   s          r0   %test_auto_partition_doc_from_filenamerb   S   s     !..I%;E		+!")	  H  5 5a!33DLL3334444-----EEHEEEEEEEESS(SSSSSSSSSSr3   z6Passes in CI but not Docker. Remove skip on #3364 fix.)reasonc                    t          t          d          d          5 }t          |          }d d d            n# 1 swxY w Y   || k    sJ d S )NrH   r5   r6   r8   r   r    rE   r;   r/   s      r0   !test_auto_partition_doc_from_filerg   l   s    	|,,d	3	3 %q!$$$% % % % % % % % % % % % % % % -------r=   c                    t          t          d          t          j                  }|| k    sJ t	          d |D                       sJ d S )Nsimple.docxrL   c              3  6   K   | ]}|j         j        d k    V  dS )ri   NrP   rQ   s     r0   rT   z9test_auto_partition_docx_from_filename.<locals>.<genexpr>}   s,      FFqz"m3FFFFFFr3   )r    r   r"   r[   r`   rE   r/   s     r0   &test_auto_partition_docx_from_filenamerm   y   s\    )-88CTC[\\\H-----FFXFFFFFFFFFFr3   c                    t          t          d          d          5 }t          |t          j                  }d d d            n# 1 swxY w Y   || k    sJ d S )Nri   r5   r7   rL   r8   r   r    r"   r[   rf   s      r0   "test_auto_partition_docx_from_filerq      s    	}--t	4	4 H!.?.FGGGH H H H H H H H H H H H H H H-------   AA
A
	file_name)ri   rH   
simple.odtrL   requestr   strc                    ddl m} dd}t          | |d|	          }t          t	          |          |
          \  }|                    t                     |j        d| k    sJ dS )a|  The `strategy` arg value received by `partition()` is received by `partition_docx().

    To do this in the brokering-partitioner cases (DOC, ODT) it must make its way to
    `partition_doc()` or `partition_odt()` which must then forward it to `partition_docx()`. This
    test makes sure it made it all the way.

    Note this is 3 file-types X 4 strategies = 12 test-cases.
    r   )_DocxPartitionerselfrx   returnIterator[Element]c              3  F   K   t          d| j        j                   V  d S N
strategy==r   _optsrL   ry   s    r0   fake_iter_document_elementszktest_partition_forwards_strategy_arg_to_partition_docx_and_its_brokers.<locals>.fake_iter_document_elements   .      5
 3556666666r3   _iter_document_elementsside_effectrj   r~   N)ry   rx   rz   r{   )unstructured.partition.docxrx   r   r    r   assert_called_once_withr   r*   )ru   rs   rL   rx   r   _iter_elements_elements          r0   Ftest_partition_forwards_strategy_arg_to_partition_docx_and_its_brokersr      s    * =<<<<<7 7 7 7 "!/	  O +I66JJJJW++C000<222222222r3   +This is a test email to use for unit tests.r*   Important points:Roses are redViolets are bluec                 ~   t          d          } t          | t          j                  }t	          |          dk    sJ |t
          k    sJ |d         j        j        t          j	        
                    |           k    sJ |d         j        j        t          j	                            |           d         k    sJ d S )Neml/fake-email.emlrj   r   )r   r    r"   r[   lenEXPECTED_EMAIL_OUTPUTr+   rJ   ospathbasenamerY   splitra   r/   s     r0   'test_auto_partition_email_from_filenamer      s     !566I->-EFFFHx==1,,,,,A;(BG,<,<Y,G,GGGGGA;."'--	2J2J12MMMMMMMr3   c                     t          t          d          d          5 } t          | t          j                  }d d d            n# 1 swxY w Y   t          |          dk    sJ |t          k    sJ d S )Nr   r5   ro   r   )r8   r   r    r"   r[   r   r   r:   s     r0   #test_auto_partition_email_from_filer      s    	344d	;	; Hq!.?.FGGGH H H H H H H H H H H H H H H x==1,,,,,,,rr   c                     t          t          d          t          j                  } t	          |           dk    sJ | d         j                            d          sJ d S )Nwinter-sports.epubrj   r   ,The Project Gutenberg eBook of Winter Sports)r    r   r"   r[   r   r*   
startswithr.   s    r0   &test_auto_partition_epub_from_filenamer      sa    )*>??J[JbcccHx==1A;&&'UVVVVVVVr3   c                    t          t          d          d          5 } t          | t          j                  }d d d            n# 1 swxY w Y   t          |          dk    sJ |d         j                            d          sJ d S )Nr   r5   ro   r   r   )r8   r   r    r"   r[   r   r*   r   r:   s     r0   "test_auto_partition_epub_from_filer      s    	344d	;	; Hq!.?.FGGGH H H H H H H H H H H H H H H x==1A;&&'UVVVVVVVrr   )r@   )F	text/html)Tr   rB   c                t   t          d          }| r|nd }t          |||t          j                  }|sJ t          j                            |          t          j                            |          d         ct          fd|D                       sJ t          fd|D                       sJ d S )Nexample-10k-1p.htmlrI   r   c              3  8   K   | ]}|j         j        k    V  d S NrP   )rR   rS   expected_filenames     r0   rT   z9test_auto_partition_html_from_filename.<locals>.<genexpr>   s.      JJAqz"&77JJJJJJr3   c              3  8   K   | ]}|j         j        k    V  d S r   )r+   rY   )rR   rS   expected_directorys     r0   rT   z9test_auto_partition_html_from_filename.<locals>.<genexpr>   s.      QQ1qz(,>>QQQQQQr3   )	r   r    r"   r[   r   r   r   r   r`   )r>   r?   ra   rK   r/   r   r   s        @@r0   &test_auto_partition_html_from_filenamer      s    
 !!677I%;E		+!")	  H OOO,.G,<,<Y,G,GW`IaIabcId))JJJJJJJJJJJJQQQQQQQQQQQQQQr3   c                    t          d          }| r|nd }t          |d          5 }t          |||t          j                  }d d d            n# 1 swxY w Y   t          |          dk    sJ d S )Nr   r5   r7   rK   r?   rL   r   )r   r8   r    r"   r[   r   )r>   r?   ra   rK   r;   r/   s         r0   "test_auto_partition_html_from_filer      s    
 !!677I%;E			i		 
!/%&-	
 
 

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 x==1   AAAc                    t          t          d                    } t          |           dk    sJ dd | D             vsJ t          | d         j                                      d          sJ t          | d         t                    sJ t          d | D                       sJ t          d | D                       sJ d S )Nfake-html-pre.htmr   	PageBreakc                    g | ]	}|j         
S  categoryrR   elems     r0   
<listcomp>z:test_auto_partition_html_pre_from_file.<locals>.<listcomp>  s    BBBt}BBBr3   z[107th Congress Public Law 56]c              3  6   K   | ]}|j         j        d k    V  dS )r   Nr+   r-   rQ   s     r0   rT   z9test_auto_partition_html_pre_from_file.<locals>.<genexpr>  s,      DDaqz"k1DDDDDDr3   c              3  6   K   | ]}|j         j        d k    V  dS )r   NrP   rQ   s     r0   rT   z9test_auto_partition_html_pre_from_file.<locals>.<genexpr>  s-      LLaqz"&99LLLLLLr3   )	r    r   r   r   r*   r   r9   r   r`   r.   s    r0   &test_auto_partition_html_pre_from_filer     s    )*=>>??Hx==1BBBBBBBBB!(1+"233>>?_`````hqk=11111DD8DDDDDDDDLL8LLLLLLLLLLr3   )r@   )F
image/jpeg)Tr   rB   c                    t          d          }| r|nd }t          |||t          j                  }|d         }|j        dk    sJ |j        j        J d S )N img/layout-parser-paper-fast.jpgrI      OLayoutParser: A Unified Toolkit for Deep Learning Based Document Image Analysis)r   r    r"   AUTOr*   r+   coordinatesr>   r?   ra   rK   r/   rS   s         r0   &test_auto_partition_jpeg_from_filenamer     s    
 !!CDDI%;E		+!"'	  H 	A6Y    :!-----r3   c                    t          d          }| r|nd }t          |d          5 }t          |||t          j                  }d d d            n# 1 swxY w Y   |d         }|j        dk    sJ |j        j        J d S )Nr   r5   r   r   r   )r   r8   r    r"   r   r*   r+   r   r>   r?   ra   rK   r;   r/   rS   s          r0   "test_auto_partition_jpeg_from_filer   3  s    
 !!CDDI%;E			i		 
!/%&+	
 
 

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 	A6Y    :!-----r   tmp_pathpathlib.Pathc                n   t          | dz            }t          j        t          d                    5 }|                    |           d d d            n# 1 swxY w Y   t          |t          j                  }d |D             }t          |          dk    sJ d|d         v sJ d|d         v sJ d S )	Nzexample.bmpz&img/layout-parser-paper-with-table.jpgrJ   rL   c                >    g | ]}|j         j        |j         j        S r   )r+   r,   rQ   s     r0   r   z9test_auto_partition_bmp_from_filename.<locals>.<listcomp>Q  s'    RRR!*:QRQZ$RRRr3      z<table><thead><tr>r   z</thead><tbody><tr>)	rv   r   r8   r   saver    r"   r[   r   )r   bmp_filenameimgr/   tables        r0   %test_auto_partition_bmp_from_filenamer   J  s   x-/00L	$%MNN	O	O SV               ,9J9QRRRHRRhRRREu::????58++++ E!H,,,,,,s   AAAextract_image_block_to_payloadFTc                    ddg}t          j                    5 }t          t          d          || |          }t	          ||| |           d d d            d S # 1 swxY w Y   d S )Nr   r   zimg/embedded-images-tables.jpg)rJ   extract_image_block_typesr   extract_image_block_output_dirtempfileTemporaryDirectoryr    r   r   r   r   tmpdirr/   s       r0   ,test_auto_partition_image_element_extractionr   W  s    !(' 2		$	&	& 

&%&FGG&?+I+1	
 
 
 	"/1OQW	
 	
 	


 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

   3AAAc            	        t          d          } d}t          |           5 }t          j        |          }ddd           n# 1 swxY w Y   t          j        t          t          t          |           |t          j	                                      }|D ]}|
                    d           |D ]}|
                    d           ||k    sJ dS )zBTest auto-processing an unstructured json output file by filename.zspring-weather.html.jsonzspring-weather.htmlN)rJ   rK   rL   r+   )r   r8   jsonloadloadsr%   r    rv   r"   r[   pop)json_file_pathoriginal_file_namejson_fexpected_resultpartitioning_resultr   s         r0   Mtest_auto_partitioned_json_output_maintains_consistency_with_fixture_elementsr   q  s.   %&@AAN.	n		 ,)F++, , , , , , , , , , , , , , , *^,,"4*1	  	
 	
	 	 $    1111111s   AA	Az~https://github.com/Unstructured-IO/unstructured/issues/3365 partition_json() does not preserve original element-id or metadata)rc   raisesstrictc                     t          d          } t          |           }t          | d          5 }t          |          }d d d            n# 1 swxY w Y   t	          |          t	          |          k    sJ d S )Nzsimple.jsonr5   r6   )r   r#   r8   r    r$   )ra   original_elementsr;   partitioned_elementss       r0   >test_auto_partition_json_from_file_preserves_original_elementsr     s     !//I*955	i		 1!(a0001 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1226GHY6Z6ZZZZZZZs   AAAc                0   d}t          | dz            }t          |d          5 }|                    |           d d d            n# 1 swxY w Y   t          j        t
          d          5  t          |           d d d            d S # 1 swxY w Y   d S )Nz{"hi": "there"}zunprocessable.jsonwz6Detected a JSON file that does not conform to the Unstmatch)rJ   )rv   r8   writepytestr   
ValueErrorr    )r   r*   ra   r;   s       r0   7test_auto_partition_json_raises_with_unprocessable_jsonr     s    DH3344I	i		 	               
z)a	b	b	b & &9%%%%& & & & & & & & & & & & & & & & & &s#   AAA-BBBc                 d    d} t          | dt          j                  }d|d         j        v sJ d S )NzMhttps://raw.githubusercontent.com/Unstructured-IO/unstructured/main/README.mdztext/markdownurlr?   rL   unstructuredr   )r    r"   r[   r*   )r   r/   s     r0   3test_partition_md_from_url_works_with_embedded_htmlr     s=    
YCSIZIabbbHXa[-------r3   c                     t          t          d          t          j                  t	          d          t          d          t          d          t          d          gk    sJ d S )Nfake-email.msgrj   r   r   r   r   r   )r    r   r"   r[   r   r   r   r   r3   r0   %test_auto_partition_msg_from_filenamer    s    %&677BSBZ[[[HIII&'''o&&&()))	`      r3   c                f    t          t          d          t          j                  }|| k    sJ d S )Nrt   rj   r    r   r"   r[   rl   s     r0   %test_auto_partition_odt_from_filenamer    s9    ),77BSBZ[[[H-------r3   c                    t          t          d          d          5 }t          |t          j                  }d d d            n# 1 swxY w Y   || k    sJ d S )Nrt   r5   ro   rp   rf   s      r0   !test_auto_partition_odt_from_filer    s    	|,,d	3	3 Hq!.?.FGGGH H H H H H H H H H H H H H H -------rr   c                     t          t          d                    } | d         t          d          k    sJ | d         j        j        dk    sJ d S )N
README.orgr   Example Docstext/orgr    r   r   r+   r-   r.   s    r0   %test_auto_partition_org_from_filenamer    sV    ),7788HA;%//////A;(J666666r3   c                     t          t          d          d          5 } t          | d          }d d d            n# 1 swxY w Y   |d         t          d          k    sJ |d         j        j        dk    sJ d S )Nr  r5   r
  r7   r?   r   r	  r8   r   r    r   r+   r-   r:   s     r0   !test_auto_partition_org_from_filer    s    	|,,d	3	3 >q!*===> > > > > > > > > > > > > > > A;%//////A;(J666666   <A A )r@   )Fapplication/pdf)Tr  rB   c                   t          d          }| r|nd }t          |||t          j                  }|d         }t	          |t
                    sJ |j                            d          sJ |j        j	        t          j                            |          k    sJ |j        j        t          j                            |          d         k    sJ |d         }t	          |t                    sJ |j                            d          sJ d S )Npdf/chevron-page.pdfrI   r   eastern mediterraneanr      We’re investing)r   r    r"   r[   r9   r   r*   r   r+   rJ   r   r   r   rY   r   r   r   s         r0   %test_auto_partition_pdf_from_filenamer    s   
 !!788I%;E		+!")	  H 	Aa6455555:"'"2"29"="=====:$i(@(@(CCCCCAa'''''601111111r3   c                   t          d          }| r|nd }t          |d          5 }t          |||t          j                  }d d d            n# 1 swxY w Y   |d         }t          |t                    sJ |j                            d          sJ |d         }t          |t                    sJ |j                            d          sJ d S )Nr  r5   r   r   r  r   r  )
r   r8   r    r"   r[   r9   r   r*   r   r   r   s          r0   !test_auto_partition_pdf_from_filer    s&   
 !!788I%;E			i		 
!/%&-	
 
 

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 	Aa6455555Aa'''''601111111r   c                n   t          | dt          d          g          }t          | t          d|          }t	          d          }t          |t          j                   |                    t          t          j                   |                    |d d t          j        d d dddd d dd d	           d S )
N(unstructured.partition.pdf.partition_pdfzHello there!return_valueget pdf/layout-parser-paper-fast.pdfrj   Fr   )rJ   r7   r   rL   	languagesrK   include_page_breaksinfer_table_structureextract_images_in_pdfr   r   r   hi_res_model_namestarting_page_number)r   r   r   r   r   r    r"   FASTr   r   r   PDF)ru   partition_pdf_partitioner_loader_get_ra   s       r0   *test_auto_partition_pdf_with_fast_strategyr*    s    "2#N334  N
 *#U   !!CDDIi"3"8999933CFFF**"'!##"&'+', +     r3   c                     t          d          5 } t          t          d          dt          j                   | j        d         d         sJ 	 d d d            d S # 1 swxY w Y   d S )N:unstructured.partition.pdf_image.ocr.process_file_with_ocrr  T)pdf_infer_table_structurerL   r   r"  )r   r    r   r"   r[   	call_args)mock_process_file_with_models    r0   ?test_auto_partition_pdf_uses_pdf_infer_table_structure_argumentr0  ?  s    	D
 
 R	%?@@&*&-	
 	
 	
 	

 ,5a89PQQQQQR R R R R R R R R R R R R R R R R Rs   ?AA!$A!c                    ddg}t          j                    5 }t          t          d          || |          }t	          ||| |           d d d            d S # 1 swxY w Y   d S )Nr   r   zpdf/embedded-images-tables.pdf)r   r   r   r   r   s       r0   *test_auto_partition_pdf_element_extractionr2  K  s    !(' 2		$	&	& 

&=>>&?+I+1	
 
 
 	"/1OQW	
 	
 	


 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

r   c                     t          j                    5  t          j        d           t          t	          d          t
          j                   d d d            d S # 1 swxY w Y   d S )Nerrorr  rj   )warningscatch_warningssimplefilterr    r   r"   r[   r   r3   r0   )test_partition_pdf_does_not_raise_warningr8  \  s    
 
	 	"	" 
 
g&&&?@@K\Kc	
 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
s   =AA"%A"c            	        t          d          } t          | t          j                  }|t	          d          t          d          t          d          t          d          t          d          t          d	          gk    sJ t          d
 |D                       sJ t          d |D                       sJ d S )Nfake-power-point.pptrj   Adding a Bullet Slider   Find the bullet slide layout$Use _TextFrame.text for first bullet5Use _TextFrame.add_paragraph() for subsequent bulletsHere is a lot of text! Here is some text in a text box!c              3  6   K   | ]}|j         j        d k    V  dS )r:  NrP   rQ   s     r0   rT   z8test_auto_partition_ppt_from_filename.<locals>.<genexpr>z  s-      OOqz"&<<OOOOOOr3   c              3  P   K   | ]!}|j         j        t          d           k    V  "dS rV   rX   rQ   s     r0   rT   z8test_auto_partition_ppt_from_filename.<locals>.<genexpr>{  rZ   r3   r   r    r"   r[   r   r   r   r`   r   s     r0   %test_auto_partition_ppt_from_filenamerD  m  s     !788I->-EFFFH*+++4555<===MNNN3444=>>>     OOhOOOOOOOOSS(SSSSSSSSSSr3   c            	        t          d          } t          | t          j                  }|t	          d          t          d          t          d          t          d          t          d          t          d	          gk    sJ t          d
 |D                       sJ t          d |D                       sJ d S )Nfake-power-point.pptxrj   r;  r   r<  r=  r>  r?  r@  c              3  6   K   | ]}|j         j        d k    V  dS )rF  NrP   rQ   s     r0   rT   z9test_auto_partition_pptx_from_filename.<locals>.<genexpr>  s-      PP!qz"&==PPPPPPr3   c              3  P   K   | ]!}|j         j        t          d           k    V  "dS rV   rX   rQ   s     r0   rT   z9test_auto_partition_pptx_from_filename.<locals>.<genexpr>  rZ   r3   rC  r   s     r0   &test_auto_partition_pptx_from_filenamerI    s     !899I->-EFFFH*+++4555<===MNNN3444=>>>     PPxPPPPPPPPSS(SSSSSSSSSSr3   zsimple.pptxr:  c                    ddl m} dd}t          | |d|	          }t          t	          |          |
          \  }|                    t                     |j        d| k    sJ dS )as  The `strategy` arg value received by `partition()` is received by `partition_pptx().

    To do this in the brokering-partitioner case (PPT) the strategy argument must make its way to
    `partition_ppt()` which must then forward it to `partition_pptx()`. This test makes sure it
    made it all the way.

    Note this is 2 file-types X 4 strategies = 8 test-cases.
    r   )_PptxPartitionerry   rK  rz   r{   c              3  F   K   t          d| j        j                   V  d S r}   r   r   s    r0   fake_iter_presentation_elementszotest_partition_forwards_strategy_arg_to_partition_pptx_and_its_brokers.<locals>.fake_iter_presentation_elements  r   r3   _iter_presentation_elementsr   rj   r~   N)ry   rK  rz   r{   )unstructured.partition.pptxrK  r   r    r   r   r   r*   )ru   rs   rL   rK  rM  r   r   s          r0   Ftest_partition_forwards_strategy_arg_to_partition_pptx_and_its_brokersrP    s    * =<<<<<7 7 7 7 "%3	  O +I66JJJJW++C000<222222222r3   c                     t          t          d                    } | d         t          d          k    sJ | d         j        j        dk    sJ d S )N
README.rstr   r	  
text/x-rstr  r.   s    r0   %test_auto_partition_rst_from_filenamerT    sV    ),7788HA;%//////A;(L888888r3   c                     t          t          d          d          5 } t          | d          }d d d            n# 1 swxY w Y   |d         t          d          k    sJ |d         j        j        dk    sJ d S )NrR  r5   rS  r  r   r	  r  r:   s     r0   !test_auto_partition_rst_from_filerV    s    	|,,d	3	3 @q!,???@ @ @ @ @ @ @ @ @ @ @ @ @ @ @ A;%//////A;(L888888r  c                     t          t          d          t          j                  } | d         t	          d          k    sJ d S )Nfake-doc.rtfrj   r   zMy First Heading)r    r   r"   r[   r   r.   s    r0   %test_auto_partition_rtf_from_filenamerY    sE    ).99DUD\]]]HA;% 233333333r3   c                     t          t          d                    } t          | d         j                  t          k    sJ | d         j        j        t          k    sJ | d         j        j        dk    sJ d S )Nstanley-cups.tsvr   ztext/tsvr)   r.   s    r0   %test_auto_partition_tsv_from_filenamer\    r2   r3   )rJ   expected_elementsfake-text.txt.This is a test document to use for unit tests.Doylestown, PA 18901Hamburgers are deliciousDogs are the bestI love fuzzy blanketszfake-text-all-whitespace.txtrJ   r]  c                     t                     }t          |t          j                  }||k    sJ t	           fd|D                       sJ t	          d |D                       sJ d S )Nr   c              3  8   K   | ]}|j         j        k    V  d S r   rP   )rR   rS   rJ   s     r0   rT   z9test_auto_partition_text_from_filename.<locals>.<genexpr>   s-      AA1qz"h.AAAAAAr3   c              3  P   K   | ]!}|j         j        t          d           k    V  "dS rV   rX   rQ   s     r0   rT   z9test_auto_partition_text_from_filename.<locals>.<genexpr>  rZ   r3   )r   r    r"   r[   r`   )rJ   r]  ra   r/   s   `   r0   &test_auto_partition_text_from_filenamerg    s    $ !**I)6G6NOOOH(((((AAAAAAAAAAAASS(SSSSSSSSSSr3   c            	        t          t          d          d          5 } t          | t          j                  }d d d            n# 1 swxY w Y   t          |          dk    sJ |t          d          t          d          t          d          t          d	          t          d
          t          d          gk    sJ d S )Nr^  r5   ro   r   r_  r   r`  r   ra  rb  rc  )
r8   r   r    r"   r[   r   r   r   r   r   r:   s     r0   "test_auto_partition_text_from_fileri    s,   	//	6	6 H!!.?.FGGGH H H H H H H H H H H H H H H x==1KLLL+,,,&'''0111)***-...      rr   c                 &   t          t          d          dg           } t          |           dk    sJ t          d | D                       dk    sJ | d         j        j        t          k    sJ t          | d         j                  dk    sJ d S )	Ntests-example.xlsFinclude_headerskip_infer_table_types   c              3  @   K   | ]}t          |t                    V  d S r   r9   r   rQ   s     r0   rT   z8test_auto_partition_xls_from_filename.<locals>.<genexpr>  ,      66z!U##666666r3   r   r   i  )r    r   r   sumr+   r,   r   r*   r.   s    r0   %test_auto_partition_xls_from_filenamert    s    ,--e\^  H x==B66X66666!;;;;A;,0BBBBBx{  C''''''r3   c                 f   t          t          d          dg           } t          |           dk    sJ t          d | D                       dk    sJ t          d | D                       dk    sJ t	          | d         j                  d	k    sJ t	          | d
         j                  dk    sJ | d
         j        j        t          k    sJ t          d | d d         D                       sJ t          d | dd          D                       sJ t          d | D                       sJ d S )Nstanley-cups.xlsxFrl     c              3  @   K   | ]}t          |t                    V  d S r   rq  rQ   s     r0   rT   z9test_auto_partition_xlsx_from_filename.<locals>.<genexpr>.  rr  r3   r   c              3  @   K   | ]}t          |t                    V  d S r   r9   r   rQ   s     r0   rT   z9test_auto_partition_xlsx_from_filename.<locals>.<genexpr>/  rr  r3   r   Stanley Cupsr   FTeam Location Stanley Cups Blues STL 1 Flyers PHI 2 Maple Leafs TOR 13c              3  6   K   | ]}|j         j        d k    V  dS r   Nr+   page_numberrQ   s     r0   rT   z9test_auto_partition_xlsx_from_filename.<locals>.<genexpr>5  ,      AAqqz%*AAAAAAr3   c              3  6   K   | ]}|j         j        d k    V  dS r   Nr  rQ   s     r0   rT   z9test_auto_partition_xlsx_from_filename.<locals>.<genexpr>6  r  r3   c              3  6   K   | ]}|j         j        d k    V  dS zAapplication/vnd.openxmlformats-officedocument.spreadsheetml.sheetNr   rQ   s     r0   rT   z9test_auto_partition_xlsx_from_filename.<locals>.<genexpr>7  A         	

bb     r3   )
r    r   r   rs  r   r*   r+   r,   r
   r`   r.   s    r0   &test_auto_partition_xlsx_from_filenamer  (  s   ,--e\^  H x==A66X66666!;;;;66X66666!;;;;!(1+"233~EEEE!(1+"233P    A;,0CCCCCAAHRaRLAAAAAAAAAAHQRRLAAAAAAAA           r3   c                    t          t          d          d          5 } t          | dg           }d d d            n# 1 swxY w Y   t          |          dk    sJ t	          d |D                       dk    sJ t	          d |D                       dk    sJ t          |d	         j                  d
k    sJ t          |d         j                  dk    sJ |d         j        j        t          k    sJ t          d |d d         D                       sJ t          d |dd          D                       sJ t          d |D                       sJ d S )Nrv  r5   F)r7   rm  rn  rw  c              3  @   K   | ]}t          |t                    V  d S r   rq  rR   r   s     r0   rT   z5test_auto_partition_xlsx_from_file.<locals>.<genexpr>B  ,      BBgz'5))BBBBBBr3   r   c              3  @   K   | ]}t          |t                    V  d S r   rz  r  s     r0   rT   z5test_auto_partition_xlsx_from_file.<locals>.<genexpr>C  r  r3   r   r{  r   r|  c              3  6   K   | ]}|j         j        d k    V  dS r~  r  rQ   s     r0   rT   z5test_auto_partition_xlsx_from_file.<locals>.<genexpr>I  r  r3   c              3  6   K   | ]}|j         j        d k    V  dS r  r  rQ   s     r0   rT   z5test_auto_partition_xlsx_from_file.<locals>.<genexpr>J  r  r3   c              3  6   K   | ]}|j         j        d k    V  dS r  r   rQ   s     r0   rT   z5test_auto_partition_xlsx_from_file.<locals>.<genexpr>K  r  r3   )r8   r   r    r   rs  r   r*   r+   r,   r
   r`   r:   s     r0   "test_auto_partition_xlsx_from_filer  =  s   	233T	:	: Va!ERTUUUV V V V V V V V V V V V V V V x==ABBBBBBBaGGGGBBBBBBBaGGGG!(1+"233~EEEE!(1+"233P    A;,0CCCCCAAHRaRLAAAAAAAAAAHQRRLAAAAAAAA           s   =AAc                     t          t          d          d          } t          d | d d         D                       sJ t          d | dd          D                       sJ d S )Nrv     )r%  c              3  6   K   | ]}|j         j        d k    V  dS )r  Nr  rQ   s     r0   rT   zRtest_auto_partition_xlsx_respects_starting_page_number_argument.<locals>.<genexpr>S  r  r3   r   c              3  6   K   | ]}|j         j        d k    V  dS )rw  Nr  rQ   s     r0   rT   zRtest_auto_partition_xlsx_respects_starting_page_number_argument.<locals>.<genexpr>T  r  r3   r    r   r`   r.   s    r0   ?test_auto_partition_xlsx_respects_starting_page_number_argumentr  Q  s{    )*=>>UVWWWHAAHRaRLAAAAAAAAAAHQRRLAAAAAAAAAAr3   c                     t          t          d          d          } | d         j        dk    sJ t          d | D                       sJ d S )Nfactbook.xmlFxml_keep_tagsr   United Statesc              3  6   K   | ]}|j         j        d k    V  dS )r  NrP   rQ   s     r0   rT   z8test_auto_partition_xml_from_filename.<locals>.<genexpr>`  s,      GGqz"n4GGGGGGr3   )r    r   r*   r`   r.   s    r0   %test_auto_partition_xml_from_filenamer  \  s^    ).99OOOHA;....GGhGGGGGGGGGGr3   c                     t          t          d          d          5 } t          | d          }d d d            n# 1 swxY w Y   |d         j        dk    sJ d S )Nr  r5   Fr7   r  r   r  r8   r   r    r*   r:   s     r0   !test_auto_partition_xml_from_filer  c  s    	~..	5	5 :!5999: : : : : : : : : : : : : : : A;......r  c                     t          t          d          d          } d| d         j        v sJ | d         j        j        dk    sJ d S )Nr  Tr  <leader>Joe Biden</leader>r   )r    r   r*   r+   rJ   r.   s    r0   /test_auto_partition_xml_from_filename_with_tagsr  j  sV    ).99NNNH'8A;+;;;;;A;(N::::::r3   c                     t          t          d          d          5 } t          | d          }d d d            n# 1 swxY w Y   d|d         j        v sJ d S )Nr  r5   Tr  r  r   r  r:   s     r0   +test_auto_partition_xml_from_file_with_tagsr  q  s    	~..	5	5 9!48889 9 9 9 9 9 9 9 9 9 9 9 9 9 9 (8A;+;;;;;;;r  c                   t          | dt          j                  }t          j        t
          d          5  t          dt          j                   d d d            n# 1 swxY w Y   |	                    dd d d d            d S )Nz+unstructured.partition.auto.detect_filetyper  zQInvalid file made-up.fake. The FileType.UNK file type is not supported in partitir   zmade-up.faker   )ra   r7   encodingr?   metadata_file_path)
r   r   UNKr   r   r!   r    r"   r[   r   )ru   detect_filetype_s     r0   (test_auto_partition_raises_with_bad_typer  }  s    $>X\   
"a
 
 
 N N 	>4E4LMMMM	N N N N N N N N N N N N N N N ,,  -     s   A  A$'A$c                     dt          dt          j                  } | d         t          d          k    sJ t	          fd| D                       sJ d S )NNhttps://raw.githubusercontent.com/Unstructured-IO/unstructured/main/LICENSE.mdz
text/plainr   r   Apache Licensec              3  8   K   | ]}|j         j        k    V  d S r   r+   r   rR   rS   r   s     r0   rT   z/test_auto_partition_from_url.<locals>.<genexpr>  ,      77qz~$777777r3   r    r"   r[   r   r`   r/   r   s    @r0   test_auto_partition_from_urlr    sm    
ZCS|FWF^___HA;% 01111117777h7777777777r3   c                     dt          dt          j                  } | d         t          d          k    sJ t	          fd| D                       sJ d S )Nr  ztext/plain; charset=utf-8r   r   r  c              3  8   K   | ]}|j         j        k    V  d S r   r  r  s     r0   rT   zItest_auto_partition_from_url_with_rfc9110_content_type.<locals>.<genexpr>  r  r3   r  r  s    @r0   6test_auto_partition_from_url_with_rfc9110_content_typer    sw    
ZC9DUD\  H A;% 01111117777h7777777777r3   c                     dt          t          j                  } | d         t          d          k    sJ t	          fd| D                       sJ d S )Nr  )r   rL   r   r  c              3  8   K   | ]}|j         j        k    V  d S r   r  r  s     r0   rT   zNtest_auto_partition_from_url_without_providing_content_type.<locals>.<genexpr>  r  r3   r  r  s    @r0   ;test_auto_partition_from_url_without_providing_content_typer    sk    
ZCS+<+CDDDHA;% 01111117777h7777777777r3   caplogr   c                    t          t          d          ddit          j                   | j        d         j        dk    sJ d| j        v sJ d S )Nr   Acceptr  )headersrL   r   WARNINGzGheaders kwarg is set but the url kwarg is not. The headers kwarg will br    r   r"   r[   records	levelnamer*   r  s    r0   3test_auto_partition_warns_if_header_set_and_not_urlr    sl    -..,-")    >!&)3333TX^Xcccccccr3   c                    t          | dt          d                    }t          j        t          d          5  t	          dd           d d d            n# 1 swxY w Y   |                    dd i dd	           d S )
Nz2unstructured.partition.auto.file_and_type_from_urlzTrouble on the wire ...r   r   zhttp://eie.ioiF  )r   request_timeoutT)r   r?   r  
ssl_verifyr  )r   ConnectionErrorr   r   r    r   )ru   file_and_type_from_url_s     r0   ;test_auto_partition_from_url_routes_timeout_to_HTTP_requestr    s    +<#$=>>   
.G	H	H	H < <os;;;;< < < < < < < < < < < < < < < 33$t]` 4     s   AA Ac                 x    t          t          d          d          } t          d | D                       sJ d S )Nr   by_title)chunking_strategyc              3  Z   K   | ]&}t          |t          t          t          f          V  'd S r   )r9   r   r   r   rR   chunks     r0   rT   zLtest_auto_partition_forwards_chunking_strategy_via_kwargs.<locals>.<genexpr>  s4      \\Ez%"2E:!FGG\\\\\\r3   r  chunkss    r0   9test_auto_partition_forwards_chunking_strategy_via_kwargsr    sI    '(=>>R\]]]F\\U[\\\\\\\\\\r3   c                 z    t          t          d          dd          } t          d | D                       sJ d S )Nr   r     )r  max_charactersc              3  F   K   | ]}t          |j                  d k    V  dS )r  N)r   r*   r  s     r0   rT   zItest_auto_partition_forwards_max_characters_via_kwargs.<locals>.<genexpr>  s/      ::%s5:#%::::::r3   r  r  s    r0   6test_auto_partition_forwards_max_characters_via_kwargsr    sS    .//$  F
 ::6::::::::::r3   c                     t          t          d          d          } d | D             }|dgddgdgdgdggk    sJ d S )Nzlanguage-docs/eng_spa_mult.txtTdetect_language_per_elementc                &    g | ]}|j         j        S r   r+   r   r  s     r0   r   zPtest_auto_partition_respects_detect_language_per_element_arg.<locals>.<listcomp>  s    @@@GW'@@@r3   engspar    r   )r/   langss     r0   <test_auto_partition_respects_detect_language_per_element_argr    si    9::X\  H A@x@@@EeWuenugwHHHHHHHr3   file_extensionz:doc docx eml epub html md odt org ppt pptx rst rtf txt xmlc                    t          t          d|            dg          }t          d |D                       sJ d S )Nzlanguage-docs/eng_spa_mult.deu)r   c              3  8   K   | ]}|j         j        d gk    V  dS )r  Nr  r  s     r0   rT   z<test_auto_partition_respects_language_arg.<locals>.<genexpr>  s/      MMw)eW4MMMMMMr3   r  )r  r/   s     r0   )test_auto_partition_respects_language_argr    s]     G~GGHHUZT[  H MMHMMMMMMMMMMr3   c                 x    t          t          d          dt          j                  } dd | D             v sJ d S )Nr  T)r!  rL   r   c                    g | ]	}|j         
S r   r   r   s     r0   r   zUtest_auto_partition_forwards_include_page_breaks_to_partition_pdf.<locals>.<listcomp>  s    >>>T4=>>>r3   r  r.   s    r0   Atest_auto_partition_forwards_include_page_breaks_to_partition_pdfr    sS    ;<< ")  H
 >>X>>>>>>>>>r3   c                     t          t          d          d          5 } t          | d          }d d d            n# 1 swxY w Y   t          d |D                       sJ d S )Nr^  r5   much-more-interesting-name.txt)r7   rK   c              3  6   K   | ]}|j         j        d k    V  dS )r  NrP   rQ   s     r0   rT   zLtest_auto_partition_forwards_metadata_filename_via_kwargs.<locals>.<genexpr>  s-      YY1qz"&FFYYYYYYr3   )r8   r   r    r`   r:   s     r0   9test_auto_partition_forwards_metadata_filename_via_kwargsr  	  s    	//	6	6 Y!!7WXXXY Y Y Y Y Y Y Y Y Y Y Y Y Y Y YYPXYYYYYYYYYYr  c                   t          d          }t          |d          5 }t          ||          }d d d            n# 1 swxY w Y   t          d |D                       sJ | j        d         j        dk    sJ d| j        v sJ d S )Nr^  r5   )r7   file_filenamec              3  6   K   | ]}|j         j        d k    V  dS )r^  NrP   rQ   s     r0   rT   zLtest_auto_partition_warns_about_file_filename_deprecation.<locals>.<genexpr>  s,      HH!qz"o5HHHHHHr3   r   r  z*The file_filename kwarg will be deprecated)r   r8   r    r`   r  r  r*   )r  ra   r;   r/   s       r0   9test_auto_partition_warns_about_file_filename_deprecationr    s     11I	i		 >!!9===> > > > > > > > > > > > > > > HHxHHHHHHHH>!&)333376;FFFFFFs   >AAc                 L   t          d          } t          | d          5 }t          j        |                                          }d d d            n# 1 swxY w Y   t          j        t          d          5  t          || |            d d d            d S # 1 swxY w Y   d S )Nr^  r5   z6Only one of metadata_filename and file_filename is sper   )r7   r  rK   )	r   r8   ioBytesIOreadr   r   r   r    )ra   r;   r7   s      r0   Vtest_auto_partition_raises_when_both_file_filename_and_metadata_filename_args_are_usedr    s-    11I	i		 $!z!&&((##$ $ $ $ $ $ $ $ $ $ $ $ $ $ $ 
z)a	b	b	b S St9	RRRRS S S S S S S S S S S S S S S S S Ss#   'AAA9BB Bc                    t          | d          }t          t          d          t          j        dg           |j        d         d         }|d         dk    sJ d S )	Nr,  zimg/chi_sim_image.jpegzhrL   r   r   r   ocr_languagesz)chi_sim+chi_sim_vert+chi_tra+chi_tra_vert)r   r    r   r"   r[   call_args_list)ru   process_file_with_ocr_call_kwargss      r0   9test_auto_partition_image_formats_languages_for_tesseractr  '  sy    *M  122")&    )7:1=K'+VVVVVVVr3   )r   r  autorW   r  r   	list[str]r  c                    t          t          d          t          j        ||           }t	          d |D                       sJ d S )Nzbook-war-and-peace-1p.txt)rL   r  r   c              3  8   K   | ]}|j         j        d gk    V  dS r  Nr  rQ   s     r0   rT   zMtest_auto_partition_ignores_empty_string_for_ocr_languages.<locals>.<genexpr>@  .      AA1qz#w.AAAAAAr3   r    r   r"   OCR_ONLYr`   )r   r  r/   s      r0   :test_auto_partition_ignores_empty_string_for_ocr_languagesr
  6  s[     455"+#	  H AAAAAAAAAAAAr3   c                    t          t          d          t          j        d           | j        d         j        dk    sJ d| j        v sJ d S )Nr  r  )rL   r  r   r  z*The ocr_languages kwarg will be deprecatedr  r  s    r0   ,test_auto_partition_warns_with_ocr_languagesr  C  sd    /00")    >!&)333376;FFFFFFr3   )rn  rJ   has_text_as_htmlxlsxrv  odtzfake.odtrn  r  c                    t          t          |          d          5 }t          ||           }d d d            n# 1 swxY w Y   d |D             }|sJ |D ]}|j        j        d u|k    sJ d S )Nr5   )r7   rn  c                <    g | ]}t          |t                    |S r   rq  rQ   s     r0   r   zGtest_auto_partition_respects_skip_infer_table_types.<locals>.<listcomp>`  s'    BBBAZ5-A-ABaBBBr3   )r8   r   r    r+   r,   )rn  rJ   r  r;   r/   table_elementsrS   s          r0   3test_auto_partition_respects_skip_infer_table_typesr  Q  s     
x(($	/	/ T1!<RSSST T T T T T T T T T T T T T T CBBBBN I I
't38HHHHHHI Ir  )r?   	shortnameexpected_value))r(   csvr(   )r   htmlr   )jdsfjdfsjkdspdfNr  r  c                h   t          | d| d| t          d          t          d          g          }t          | t          d|          }t	          t          d          |          }|                                 t          |          d	k    sJ t          fd
|D                       sJ d S )Nzunstructured.partition.z.partition_text 1text 2r  r  r  r?   r   c              3  8   K   | ]}|j         j        k    V  d S r   r   )rR   rS   r  s     r0   rT   z@test_auto_partition_adds_filetype_to_metadata.<locals>.<genexpr>  s-      GGqz"n4GGGGGGr3   )	r   r   r   r   r    r   assert_called_oncer   r`   )ru   r?   r  r  partition_fn_r)  r/   s      `   r0   -test_auto_partition_adds_filetype_to_metadatar!  m  s     "C)CC	CC8nnd8nn5  M
 *#U   ;<<<  H ..000x==AGGGGhGGGGGGGGGGr3   r  c           	        t          d          }t          | dt          d|          t          d|          g          }t          | t          d|          }t          t          d	          |
          }|                    t          t          j
                   t          |          dk    sJ t          d |D                       sJ d S )Nimapdf)r-   r  r  )r+   r  r  r  r  r  r   c              3  6   K   | ]}|j         j        d k    V  dS )r  Nr   rQ   s     r0   rT   zctest_auto_partition_overwrites_any_filetype_applied_by_file_specific_partitioner.<locals>.<genexpr>  s-      JJAqz"&77JJJJJJr3   )r   r   r   r   r   r    r   r   r   r   r'  r   r`   )ru   r?   r+   r(  r)  r/   s         r0   Ptest_auto_partition_overwrites_any_filetype_applied_by_file_specific_partitionerr%    s     111H"28h777hQY9Z9Z9Z[  N
 *#U   ;<<<  H 33CFFFx==AJJJJJJJJJJJJr3   )rs   	file_typer'   rH   ri   zfake-email.emlzsimple.epubzfake-html.htmlz	README.mdr   rt   zpdf/DA-1p.pdfrR  rX  r[  rk  r  r&  r   c                    t          |           }j        }t          j                  }t	          ||          } ||d          }|sJ t          fd|D                       sJ d S )NF)process_attachmentsc              3  Z   K   | ]%}|j         j        |j         j        j        k    V  &d S r   )r+   r-   	mime_type)rR   rS   r&  s     r0   rT   zUtest_auto_partition_applies_the_correct_filetype_for_all_filetypes.<locals>.<genexpr>  sG        :* 	

y22**** r3   )r   partitioner_function_namer   partitioner_module_qnamegetattrr`   )rs   r&  ra   partition_fn_namemodulepartition_fnr/   s    `     r0   Btest_auto_partition_applies_the_correct_filetype_for_all_filetypesr1    s    6 !++I!;9=>>F6#455L |I5AAAHOOO             r3   c                     t          t          d          t          j        dg          } t	          d | D                       sJ d S )Nr  r  r  c              3  8   K   | ]}|j         j        d gk    V  dS r  r  rQ   s     r0   rT   zPtest_auto_partition_passes_user_provided_languages_arg_to_PDF.<locals>.<genexpr>  r  r3   r  r.   s    r0   =test_auto_partition_passes_user_provided_languages_arg_to_PDFr4    sX    /00"+'  H
 AAAAAAAAAAAAr3   c                 x    t          t          d          d          } t          d | D                       sJ d S )Nzhandbook-1p.docxTr  c              3  D   K   | ]}|j         j        |j        dk    V  d S )NrW   )r+   r   r*   rQ   s     r0   rT   zVtest_auto_partition_languages_argument_default_to_None_when_omitted.<locals>.<genexpr>  s2      NN1E1Mqv|1M1M1M1MNNr3   r  r.   s    r0   Ctest_auto_partition_languages_argument_default_to_None_when_omittedr7    sH    )*<==[_```HNNXNNNNNNNNNNr3   c                    ddl m}  t          d          } | |          }|d         j        j        dgk    sJ t          |          }|d         j        j        dgk    sJ |d         j        j        |d         j        j        k    sJ dS )zX`partition()` ["eng"] default does not overwrite ["auto"] default in other partitioners.r   )partition_textz(language-docs/UDHR_first_article_all.txtr  N)unstructured.partition.textr9  r   r+   r   r    )r9  ra   text_elementsauto_elementss       r0   =test_auto_partition_default_does_not_overwrite_other_defaultsr=    s     ;::::: !!KLLI"N9--M$.5'9999i((M$.5'9999$.-2B2K2UUUUUUUr3   c                 J    t          t          d                    g k    sJ d S )N	empty.txtr  r   r3   r0   5test_auto_partition_from_filename_works_on_empty_filer@    s*    %k2233r999999r3   c                     t          t          d          d          5 } t          |           g k    sJ 	 d d d            d S # 1 swxY w Y   d S )Nr?  r5   r6   re   )r;   s    r0   1test_auto_partition_from_file_works_on_empty_filerB    s    	{++T	2	2 'aa   B&&&&&' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' 's   AA
Ac                J   t           j                            t          j        d            t          | dd          }d}t          j        t          |          5  t          t          d                     d d d            n# 1 swxY w Y   |                    d           d S )Nz-unstructured.partition.auto.dependency_existsFr  zUpartition_pdf\(\) is not available because one or more dependencies are not installedr   r  	pdf2image)r   _partitionersr   r   r'  r   r   r   ImportErrorr    r   r   )ru   dependency_exists_r   s      r0   Stest_auto_partition_that_requires_extras_raises_when_dependencies_are_not_installedrH    s     $((t<<<&@u   eE	{%	0	0	0 H H"#EFFGGGH H H H H H H H H H H H H H H ..{;;;;;s   BB
Bc            
         t          d          t          d          t          d          t          d          t          d          t          d          t          d          t	          d          gS )	Nz&These are a few of my favorite things:ParrotsHockeyAnalysisz4This is my first thought. This is my second thought.zThis is my third thought.2023zDOYLESTOWN, PA 18901)r   r   r   r   r   r   r3   r0   rE   rE     sh     	677jLMM122V&''	 	r3   )r>   rC   r?   rD   rE   rF   )rE   rF   )ru   r   rs   rv   rL   rv   )r>   rC   r?   rD   )r   r   )r   rC   )ru   r   )rJ   rv   r]  rF   )r  r   )r  rv   )r   r  r  rv   )rn  r  rJ   rv   r  rC   )ru   r   r?   rv   r  rv   r  rD   )ru   r   r?   rD   )rs   rv   r&  r   )
__future__r   r  r   r   pathlibr   r5  	importlibr   typingr   unittest.mockr   r   PILr   .test_unstructured.partition.pdf_image.test_pdfr   *test_unstructured.partition.test_constantsr	   r
   r   r   test_unstructured.unit_utilsr   r   r   r   r   r   unstructured.cleaners.corer   unstructured.documents.elementsr   r   r   r   r   r   r   r   r   r   unstructured.file_utils.modelr   unstructured.partition.autor   r    unstructured.partition.commonr!   &unstructured.partition.utils.constantsr"   unstructured.staging.baser#   r$   r%   r   existsis_in_dockerr1   r<   markparametrizerb   skipifrg   rm   rq   r   r&  r[   r	  r   r   r   r   r   r   r   r   r   r   r   r   r   r   xfailAssertionErrorr   r   r   r  r  r  r  r  r  r  r*  r0  r2  r8  rD  rI  rP  rT  rV  rY  r\  rg  ri  rt  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r
  r  r  r!  r%  CSVDOCDOCXEMLEPUBHTMLMDMSGODTr'  PPTPPTXRSTRTFTSVTXTXLSXXMLr1  r4  r7  r=  r@  rB  rH  fixturerE   r   r3   r0   <module>rw     s   # " " " " " 				  				    # # # # # #                    T T T T T T                           > = = = = =                        3 2 2 2 2 2 E E E E E E E E D D D D D D D D D D D D ] ] ] ] ] ] ] ] ] ]w~~m,,7 7 77 7 7 .^^^ T T T	 T* L)abb. . . cb.G G G G. . . . &Q&Q&QRR "	 3 3 3  SR3D MDEEE	E"###H/"""H$%%%	 N N N- - -W W WW W W .LLL R R R	 R" .LLL   	 M M M  .NNN . . .	 .$ .NNN . . .	 .&
- 
- 
- 
- 9E4=II
 
 
 JI
22 2 22 	N    [ [ [
& 
& 
& 
&$. . .  . . . .
. . . .7 7 77 7 7 .XXX 2 2 2	 2, .XXX 2 2 2	 2*   @	R 	R 	R 9E4=II
 
 
 JI
 	
 	
 	
"T T T,T T T" }6L&MNN "	 3 3 3  ON3D9 9 99 9 94 4 47 7 7 % #STTT3444.///899912225666
	
 
(, "T T T# "T  (( ( (   *  (B B BH H H/ / /; ; ;< < <   28 8 88 8 88 8 8d d d d   ,] ] ]
; ; ;I I I RXXZZ N N N N? ? ?Z Z ZG G G GS S SW W W W 7F8R.E7TV-9XYY	B 	B 	B ZY	BG G G G >
&.	 $'
*e$	Z	 	I 	I 	I 	I& 3   H H H H0  		 K K K K, 	X\*	x|$	&	8<(	&	8=)	hk"	8<(	x|$	(,'	.	&	x|$	&	X\*	(,'	hm,	hm,	&' 0  1 0,B B BO O OV V V(: : :' ' '
< < < <& 
 
 
 
 
r3   