
    NggB                       d Z ddlmZ ddlZddlmZ ddlZddlmZ ddl	m
Z
mZmZmZmZmZmZ ddlmZ ddlmZmZmZmZmZ dd	lmZ dd
lmZmZ  ed           ed           ed           ed          gZd Zd Z d Z!d Z"d Z#d Z$d Z%d Z&d Z'd Z(d/dZ)d Z*d Z+d Z,d  Z-d! Z.d" Z/d# Z0d$ Z1d% Z2d0d(Z3d) Z4d* Z5d+ Z6d, Z7 G d- d.          Z8dS )1z3Test suite for `unstructured.partition.msg` module.    )annotationsN)Any)Message)FixtureRequestLogCaptureFixtureMockassert_round_trips_through_JSONexample_doc_pathfunction_mockproperty_mock)chunk_by_title)ElementMetadataListItemNarrativeTextTextTitle)UnsupportedFileFormatError)MsgPartitionerOptionspartition_msg+This is a test email to use for unit tests.textzImportant points:zRoses are redzViolets are bluec                 &   t          d          } t          |           }|d         j        j        }|t          k    sJ |d         j                                        t          d | dd d dgdgdd|d	g
                                          k    sJ d S )Nfake-email.msgfilenamer   2023-03-28T17:00:31+00:00z."Matthew Robinson" <mrobinson@unstructured.io>zmrobinson@unstructured.ioz
Test Emailapplication/vnd.ms-outlookeng)coordinatesr   last_modifiedpage_numberurl	sent_fromsent_tosubjectfiletype	parent_id	languages)r
   r   metadatar(   EXPECTED_MSG_OUTPUTto_dictr   )r   elementsr(   s      `/var/www/html/ai-engine/env/lib/python3.11/site-packages/test_unstructured/partition/test_msg.py test_partition_msg_from_filenamer/   '   s     011Hh///H$.I*****$$&&5GH01 1g
 
 
 '))	 	 	 	 	 	    c                 ~    t          d          } t          |           }t          |d         t                    sJ d S )Nr   r   r   )r
   r   
isinstancer   r   r-   s     r.   5test_partition_msg_from_filename_returns_uns_elementsr4   ?   s@     011Hh///Hhqk=1111111r0   c                 |    t          d          } t          | d          }t          d |D                       sJ d S )Nr   test)r   metadata_filenamec              3  6   K   | ]}|j         j        d k    V  dS )r6   Nr*   r   .0elements     r.   	<genexpr>zJtest_partition_msg_from_filename_with_metadata_filename.<locals>.<genexpr>H   s-      KKww(F2KKKKKKr0   )r
   r   allr3   s     r.   7test_partition_msg_from_filename_with_metadata_filenamer?   E   sK     011Hh&IIIHKK(KKKKKKKKKKr0   c                     t          d          } t          |           }t          |d                   dk    sJ |d         j        j        dk    sJ |d         j        j        t          d          k    sJ d S )Nr   r   r   r    )r
   r   strr*   r   file_directoryr3   s     r.   2test_partition_msg_from_filename_with_text_contentrD   K   s     011Hh///Hx{LLLLLA;(,<<<<<A;.2B22F2FFFFFFFr0   c                     t          d          } t          j        t                    5  t	          |            d d d            d S # 1 swxY w Y   d S )Nzdoesnt-exist.msgr   )r
   pytestraisesFileNotFoundErrorr   r   s    r.   +test_partition_msg_raises_with_missing_filerI   U   s     233H	(	)	) ) )x(((() ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )s   AAAc                     t          d          } t          | d          5 }t          |          }d d d            n# 1 swxY w Y   |t          k    sJ |D ]}|j        j        J d S )Nr   rbfiler
   openr   r+   r*   r   r   fr-   r<   s       r.   test_partition_msg_from_filerR   [   s     011H	h		 ) a((() ) ) ) ) ) ) ) ) ) ) ) ) ) )***** 1 1(00001 1s   =AAc                     t          d          } t          | d          5 }t          |d          }d d d            n# 1 swxY w Y   |t          k    sJ |D ]}|j        j        dk    sJ d S )Nr   rK   r6   rM   r7   rN   rP   s       r.   3test_partition_msg_from_file_with_metadata_filenamerU   d   s     011H	h		 C a6BBBC C C C C C C C C C C C C C C***** 3 3(F222223 3s   >AAc                     t          t          d          t          j        d                    } | t          k    sJ d S )Nr   s   abcderL   )r   r
   ioBytesIOr+   r-   s    r.   9test_partition_msg_uses_file_path_when_both_are_specifiedrZ   m   sA    -.>??bjQYFZFZ[[[H*******r0   c                     t          j        t                    5  t                       d d d            d S # 1 swxY w Y   d S N)rF   rG   
ValueErrorr    r0   r.   &test_partition_msg_raises_with_neitherr_   r   s    	z	"	"                   s   6::c                    t          t          d          d          } t          d | d d         D                       sJ t          d | dd         D                       sJ t          d | dd	         D                       sJ t          d
 | d	d          D                       sJ d | d d         D             g dk    sJ d | D             d d         g dk    sJ d | D             dd          g dk    sJ d S )N#fake-email-multiple-attachments.msgTprocess_attachmentsc              3  6   K   | ]}|j         j        d k    V  dS )ra   Nr9   r;   es     r.   r=   z=test_partition_msg_can_process_attachments.<locals>.<genexpr>   s.      bbPQqz"&KKbbbbbbr0      c              3  6   K   | ]}|j         j        d k    V  dS )zunstructured_logo.pngNr9   re   s     r.   r=   z=test_partition_msg_can_process_attachments.<locals>.<genexpr>   s-      UU!qz"&==UUUUUUr0      c              3  6   K   | ]}|j         j        d k    V  dS )zdense_doc.pdfNr9   re   s     r.   r=   z=test_partition_msg_can_process_attachments.<locals>.<genexpr>   s,      OO!qz"o5OOOOOOr0   iW  c              3  6   K   | ]}|j         j        d k    V  dS )zEngineering Onboarding.pptxNr9   re   s     r.   r=   z=test_partition_msg_can_process_attachments.<locals>.<genexpr>   s-      \\qz"&CC\\\\\\r0   c                    g | ]	}|j         
S r^   r   re   s     r.   
<listcomp>z>test_partition_msg_can_process_attachments.<locals>.<listcomp>   s    )))qAF)))r0   )Here are those documents.--Mallori HarrellUnstructured TechnologiesData Scientistc                6    g | ]}t          |          j        S r^   type__name__re   s     r.   rm   z>test_partition_msg_can_process_attachments.<locals>.<listcomp>   !    ///DGG///r0   
   )
r   r   r   r   r   Imager   r   r   r   c                6    g | ]}t          |          j        S r^   rt   re   s     r.   rm   z>test_partition_msg_can_process_attachments.<locals>.<listcomp>   rw   r0   i)
r   r   r   r   r   r   r   r   r   r   r   r
   r>   rY   s    r.   *test_partition_msg_can_process_attachmentsr|   z   s   >??UY  H bbU]^`_`^`UabbbbbbbbUUxPQRSPS}UUUUUUUUOOx#OOOOOOOO\\XVYVZVZ^\\\\\\\\))HRaRL))) . . .     0/h///4 9 9 9     0/h///5 : : :      r0   requestr   c                   t          | dt                                 t          t          d          d          }|t	          d          t          d          t          d          t          d	          t          d
          gk    sJ d S )Nz%unstructured.partition.auto.partition)side_effectra   Trb   rn   ro   rp   rq   rr   )r   r   r   r
   r   r   r   )r}   r-   s     r.   Atest_partition_msg_silently_skips_attachments_it_cannot_partitionr      s    8F`FbFb    >??UY  H
 122T

  )**      r0   c                     t          t          d                    } t          d | D                       sJ t          d | D                       sJ d S )Nr   c              3  6   K   | ]}|j         j        d k    V  dS )r   Nr9   re   s     r.   r=   zYtest_partition_msg_from_filename_gets_filename_metadata_from_file_path.<locals>.<genexpr>   s-      II1qz"&66IIIIIIr0   c              3  P   K   | ]!}|j         j        t          d           k    V  "dS )rA   N)r*   rC   r
   re   s     r.   r=   zYtest_partition_msg_from_filename_gets_filename_metadata_from_file_path.<locals>.<genexpr>   s5      SSQqz(,<R,@,@@SSSSSSr0   r{   rY   s    r.   Ftest_partition_msg_from_filename_gets_filename_metadata_from_file_pathr      se    -.>??@@HIIIIIIIIIISS(SSSSSSSSSSr0   c                     t          t          d          d          5 } t          |           }d d d            n# 1 swxY w Y   t          d |D                       sJ t          d |D                       sJ d S )Nr   rK   rL   c              3  2   K   | ]}|j         j        d u V  d S r\   r9   re   s     r.   r=   zKtest_partition_msg_from_file_gets_filename_metadata_None.<locals>.<genexpr>   s,      ==qqz"d*======r0   c              3  2   K   | ]}|j         j        d u V  d S r\   r*   rC   re   s     r.   r=   zKtest_partition_msg_from_file_gets_filename_metadata_None.<locals>.<genexpr>   s,      CCQqz(D0CCCCCCr0   rO   r
   r   r>   rQ   r-   s     r.   8test_partition_msg_from_file_gets_filename_metadata_Noner      s    	/00$	7	7 )1 a((() ) ) ) ) ) ) ) ) ) ) ) ) ) ) ==H========CC(CCCCCCCCCCs   ;??c                     t          t          d          d          } t          d | D                       sJ t          d | D                       sJ d S )Nr   	a/b/c.msg)r7   c              3  6   K   | ]}|j         j        d k    V  dS )zc.msgNr9   re   s     r.   r=   zMtest_partition_msg_from_filename_prefers_metadata_filename.<locals>.<genexpr>   ,      @@!qz"g-@@@@@@r0   c              3  6   K   | ]}|j         j        d k    V  dS )za/bNr   re   s     r.   r=   zMtest_partition_msg_from_filename_prefers_metadata_filename.<locals>.<genexpr>   ,      DDaqz(E1DDDDDDr0   r{   rY   s    r.   :test_partition_msg_from_filename_prefers_metadata_filenamer      sk    -.>??S^___H@@x@@@@@@@@DD8DDDDDDDDDDr0   c                     t          t          d          d          5 } t          | d          }d d d            n# 1 swxY w Y   t          d |D                       sJ t          d |D                       sJ d S )Nr   rK   z	d/e/f.msgrT   c              3  6   K   | ]}|j         j        d k    V  dS )zf.msgNr9   re   s     r.   r=   zItest_partition_msg_from_file_prefers_metadata_filename.<locals>.<genexpr>   r   r0   c              3  6   K   | ]}|j         j        d k    V  dS )zd/eNr   re   s     r.   r=   zItest_partition_msg_from_file_prefers_metadata_filename.<locals>.<genexpr>   r   r0   r   r   s     r.   6test_partition_msg_from_file_prefers_metadata_filenamer      s    	/00$	7	7 H1 a;GGGH H H H H H H H H H H H H H H @@x@@@@@@@@DD8DDDDDDDDDDs   <A A c                     dt          t          d                    } t          fd| D                       s-J d dt          | d         j        j                               d S )Nr   r   c              3  8   K   | ]}|j         j        k    V  d S r\   )r*   r'   )r;   rf   MSG_MIME_TYPEs     r.   r=   zQtest_partition_msg_gets_the_MSG_mime_type_in_metadata_filetype.<locals>.<genexpr>   s-      FFqz"m3FFFFFFr0   zExpected all elements to have 'z' as their filetype, but got: r   )r   r
   r>   reprr*   r'   )r-   r   s    @r.   >test_partition_msg_gets_the_MSG_mime_type_in_metadata_filetyper      s    0M-.>??@@HFFFFXFFFFF  	2- 	2 	2!%.//	2 	2    r0   c                 t    t          t          d                    } t          d | D                       sJ d S )Nr   c              3  6   K   | ]}|j         j        d k    V  dS )r   Nr*   r!   re   s     r.   r=   zPtest_partition_msg_pulls_last_modified_from_message_sent_date.<locals>.<genexpr>   s-      YY1qz'+FFYYYYYYr0   r{   rY   s    r.   =test_partition_msg_pulls_last_modified_from_message_sent_dater      sC    -.>??@@HYYPXYYYYYYYYYYr0   c                 v    d} t          t          d          |           }|d         j        j        | k    sJ d S )N2020-07-05T09:24:28r   )metadata_last_modifiedr   )r   r
   r*   r!   )r   r-   s     r.   @test_partition_msg_from_file_path_prefers_metadata_last_modifiedr      sO    2)**CY  H A;-1GGGGGGGr0   c                     dt          t          d          d          5 } t          |           }d d d            n# 1 swxY w Y   t          fd|D                       sJ d S )Nr   r   rK   )rM   r   c              3  8   K   | ]}|j         j        k    V  d S r\   r   )r;   rf   r   s     r.   r=   zNtest_partition_msg_from_file_prefers_metadata_last_modified.<locals>.<genexpr>   s.      TTaqz'+AATTTTTTr0   r   )rQ   r-   r   s     @r.   ;test_partition_msg_from_file_prefers_metadata_last_modifiedr      s    2	/00$	7	7 X1 a@VWWWX X X X X X X X X X X X X X X TTTT8TTTTTTTTTTs   ?AAc                 \    t          t          d                    } t          |            d S )Nr   )r   r
   r	   rY   s    r.   test_partition_msg_with_jsonr     s-    -.>??@@H#H-----r0   caplogr   c                z    t          t          d                    }|g k    sJ d| j        v sJ d| j        v sJ d S )Nfake-encrypted.msgWARNINGzEncrypted email detected)r   r
   r   )r   r-   s     r.   -test_partition_msg_with_pgp_encrypted_messager   	  sR    -.BCCDDHr>>>>####%444444r0   c                     t          d          } t          |           }t          | d          }t          |          }||k    sJ ||k    sJ d S )Nr   r   by_title)chunking_strategy)r
   r   r   )r   r-   chunk_elementschunkss       r.   4test_add_chunking_strategy_by_title_on_partition_msgr     sg     011Hh///H"8zJJJNH%%FX%%%%V######r0   c                 \    d} t          |           }|d         j        j        dgk    sJ d S )Nexample-docs/fake-email.msgr   r   r   )r   r*   r)   r3   s     r.   1test_partition_msg_element_metadata_has_languagesr     s:    ,Hh///HA;)eW444444r0   c                 d    d} t          | dg          }t          d |D                       sJ d S )Nr   deur   r)   c              3  8   K   | ]}|j         j        d gk    V  dS )r   N)r*   r)   r:   s     r.   r=   z<test_partition_msg_respects_languages_arg.<locals>.<genexpr>(  s/      MMw)eW4MMMMMMr0   )r   r>   r3   s     r.   )test_partition_msg_respects_languages_argr   %  sD    ,Hh5'BBBHMMHMMMMMMMMMMr0   c                     t          j        t                    5  d} t          | d           d d d            d S # 1 swxY w Y   d S )Nr   r   r   )rF   rG   	TypeErrorr   r   s    r.   9test_partition_msg_raises_TypeError_for_invalid_languagesr   +  s    	y	!	! : :0x59999: : : : : : : : : : : : : : : : : :s   ;??c                     e Zd ZdZd(dZej                            dddg          d)d            Zd(dZ	d(dZ
d(dZd(dZd(dZej                            dddg          d*d            Zd(dZd(dZd(dZej                            dddg          d+d             Zd(d!Zej        d,d$            Zej        d,d%            Zej        d-d'            ZdS ).DescribeMsgPartitionerOptionszOUnit-test suite for `unstructured.partition.msg.MsgPartitionerOptions` objects.	opts_argsdict[str, Any]c                    t          d          |d<   t          d	i |}|j        }|j        dgk    sJ |j        dgk    sJ |j        dk    sJ |j        dgk    sJ |j        g dk    sJ |j        dk    sJ d S )
Nzfake-email-with-cc-and-bcc.msg	file_pathhello@unstructured.iosteve@unstructured.ioz.14DDEF33-2BA7-4CDD-A4D8-E7C5873B37F2@gmail.comz""John" <johnjennings702@gmail.com>)zjohn-ctr@unstructured.ior   r   z%Fake email with cc and bcc recipientsr^   )	r
   r   extra_msg_metadatabcc_recipientcc_recipientemail_message_idr$   r%   r&   )selfr   optsms       r.   :it_provides_email_specific_metadata_to_add_to_each_elementzXDescribeMsgPartitionerOptions.it_provides_email_specific_metadata_to_add_to_each_element>  s    !12R!S!S	+$11y11##:";;;;;~"9!:::::!%UUUUU{CDDDDDy 
 
 
 
 
 
 

 yCCCCCCCr0   )	file_nameexpected_value)r   T)r   Fr   rB   r   boolc                X    t          |          |d<   t          di |}|j        |u sJ d S )Nr   r^   )r
   r   is_encrypted)r   r   r   r   r   s        r.   "it_knows_when_the_msg_is_encryptedz@DescribeMsgPartitionerOptions.it_knows_when_the_msg_is_encryptedP  sC     "2)!<!<	+$11y11 N222222r0   c                L    d|d<   d|d<   t          di |}|j        dk    sJ d S )Nz	x/y/z.msgr   r   metadata_file_pathr^   r   r   r   r   r   s      r.   0it_uses_the_metadata_file_path_arg_when_providedzNDescribeMsgPartitionerOptions.it_uses_the_metadata_file_path_arg_when_provided]  sD    !,	+*5	&'$11y11&+555555r0   c                `    t          d          }||d<   t          di |}|j        |k    sJ d S Nr   r   r^   )r
   r   r   )r   r   r   r   s       r.   8and_it_falls_back_to_the_MSG_file_path_arg_when_providedzVDescribeMsgPartitionerOptions.and_it_falls_back_to_the_MSG_file_path_arg_when_providedd  sG    $%566	!*	+$11y11&)333333r0   c                0    t          di |}|j        J d S )Nr^   r   r   s      r.   2but_it_returns_None_when_neither_path_is_availablezPDescribeMsgPartitionerOptions.but_it_returns_None_when_neither_path_is_availablek  s*    $11y11&.....r0   c                j    d}||d<   t          d          |d<   t          di |}|j        |k    sJ d S )Nz2024-03-05T17:02:53r   r   r   r^   r
   r   r   )r   r   r   r   s       r.   :it_uses_metadata_last_modified_when_provided_by_the_callerzXDescribeMsgPartitionerOptions.it_uses_metadata_last_modified_when_provided_by_the_callerr  sT    !6.D	*+!12B!C!C	+$11y11*.DDDDDDDr0   c                \    t          d          |d<   t          di |}|j        dk    sJ d S )Nr   r   r   r^   r   r   s      r.   Oand_it_uses_the_message_Date_header_when_metadata_last_modified_is_not_providedzmDescribeMsgPartitionerOptions.and_it_uses_the_message_Date_header_when_metadata_last_modified_is_not_providedz  sE     "22B!C!C	+$11y11*.IIIIIIIr0   filesystem_last_modifiedz2024-06-03T20:12:53N
str | NoneMessage_sent_date_r   _last_modified_prop_c                x    d |_         ||_         t          d          |d<   t          di |}|j        |k    sJ d S r   )return_valuer
   r   r   )r   r   r   r   r   r   s         r.   Yand_it_uses_the_last_modified_date_from_the_source_file_when_the_message_has_no_sent_datezwDescribeMsgPartitionerOptions.and_it_uses_the_last_modified_date_from_the_source_file_when_the_message_has_no_sent_date  sW     +/',D)!12B!C!C	+$11y11*.FFFFFFFr0   c                z    t          d          |d<   t          di |}t          |j        t                    sJ d S r   )r
   r   r2   msgr   r   s      r.   8it_loads_the_msg_document_from_a_file_path_when_providedzVDescribeMsgPartitionerOptions.it_loads_the_msg_document_from_a_file_path_when_provided  sF    !12B!C!C	+$11y11$(G,,,,,,,r0   c                   t          t          d          d          5 }t          j        |                                          |d<   d d d            n# 1 swxY w Y   t          di |}t          |j        t                    sJ d S )Nr   rK   rM   r^   )	rO   r
   rW   rX   readr   r2   r   r   )r   r   rQ   r   s       r.   Cand_it_loads_the_msg_document_from_a_file_like_object_when_providedzaDescribeMsgPartitionerOptions.and_it_loads_the_msg_document_from_a_file_like_object_when_provided  s     "#344d;; 	5q "
16688 4 4If	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5$11y11$(G,,,,,,,s   *AAAc                    t          j        t          d          5  t          di |j         d d d            d S # 1 swxY w Y   d S )Nz2one of `file` or `filename` arguments must be prov)matchr^   )rF   rG   r]   r   r   )r   r   s     r.   &but_it_raises_when_neither_is_providedzDDescribeMsgPartitionerOptions.but_it_raises_when_neither_is_provided  s    ]:-abbb 	3 	3!..I..22	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3s   ;??partition_attachmentsTFc                b    t          d          |d<   ||d<   t          di |}|j        |u sJ d S )Nr   r   r   r^   )r
   r   r   )r   r   r   r   s       r.   7it_knows_whether_attachments_should_also_be_partitionedzUDescribeMsgPartitionerOptions.it_knows_whether_attachments_should_also_be_partitioned  sP     "22B!C!C	+-B	)*$11y11)-BBBBBBBr0   c                N    ddd|d<   t          di |}|j        dddk    sJ d S )N*   baz)foobarkwargsr^   )r   partitioning_kwargsr   s      r.   Oit_provides_access_to_pass_through_kwargs_collected_by_the_partitioner_functionzmDescribeMsgPartitionerOptions.it_provides_access_to_pass_through_kwargs_collected_by_the_partitioner_function  sK     ')77	($11y11'2e+D+DDDDDDDr0   r}   r   c                .    t          |t          d          S )N_last_modified)r   r   r   r}   s     r.   r   z2DescribeMsgPartitionerOptions._last_modified_prop_  s    W&;=MNNNr0   c                .    t          |t          d          S )N	sent_date)r   r   r  s     r.   r   z0DescribeMsgPartitionerOptions.Message_sent_date_  s    Wg{;;;r0   returnc                    dddddi dS )zAll default arguments for `MsgPartitionerOptions`.

        Individual argument values can be changed to suit each test. Makes construction of opts more
        compact for testing purposes.
        NF)rM   r   r   r   r   r   r^   )r   s    r.   r   z'DescribeMsgPartitionerOptions.opts_args  s%     "&&*%*
 
 	
r0   )r   r   )r   rB   r   r   r   r   )r   r   r   r   r   r   r   r   )r   r   r   r   r}   r   )r  r   )rv   
__module____qualname____doc__r   rF   markparametrizer   r   r   r   r   r   r   r   r   r   r   r   fixturer   r   r   r^   r0   r.   r   r   9  s       YYD D D D$ ['*FHa)b 3 3 3 36 6 6 64 4 4 4/ / / /E E E EJ J J J [7:OQU9VWWG G G XWG - - - -- - - -3 3 3 3 [4tUmDDC C C EDCE E E E ^O O O ^O ^< < < ^< ^
 
 
 ^
 
 
r0   r   r  )r   r   )9r	  
__future__r   rW   typingr   rF   oxmsgr   test_unstructured.unit_utilsr   r   r   r	   r
   r   r   unstructured.chunking.titler   unstructured.documents.elementsr   r   r   r   r   unstructured.partition.commonr   unstructured.partition.msgr   r   r+   r/   r4   r?   rD   rI   rR   rU   rZ   r_   r|   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r^   r0   r.   <module>r     s]   9 9 " " " " " " 				                               7 6 6 6 6 6              E D D D D D K K K K K K K K MDEEE	E"###H/"""H$%%%	   02 2 2L L LG G G) ) )1 1 13 3 3+ + +
  ' ' 'T   0T T TD D DE E EE E E  Z Z Z
H H HU U U. . .
5 5 5 5$ $ $5 5 5N N N: : :[
 [
 [
 [
 [
 [
 [
 [
 [
 [
r0   