
    NgO                        d Z ddlmZ ddlZddlZddlZddlZddlm	Z	m
Z
 ddlZddlmZmZmZmZmZmZmZmZmZ ddlmZ ddlmZmZmZmZ  G d d	          Z G d
 d          Z G d d          Zd Z dS )z?Test-suite for `unstructured.partition.common.metadata` module.    )annotationsN)AnyCallable)	CheckBoxElementElementMetadataFigureCaptionHeaderListItemNarrativeTextTextTitle)FileType)_assign_hash_idsapply_metadataget_last_modified_dateset_element_hierarchyc                      e Zd ZddZddZdS )Describe_get_last_modified_datetmp_pathpathlib.Pathc                   t          j        dddddd                                          }|dz  }|                    d	           t	          j        |||f           t          t          |                    }|d
k    sJ d S )Ni           +   (   )yearmonthdayhourminutesecondzsome_file.txtabcdefgz2024-03-05T17:43:40)dtdatetime	timestamp
write_textosutimer   str)selfr   modified_timestamp	file_pathlast_modified_dates        l/var/www/html/ai-engine/env/lib/python3.11/site-packages/test_unstructured/partition/common/test_metadata.py8it_gets_the_modified_time_of_a_file_identified_by_a_pathzXDescribe_get_last_modified_date.it_gets_the_modified_time_of_a_file_identified_by_a_path(   s    [QABr"
 
 

)++ 	 .	Y'''
/1CDEEE3C	NNCC!%:::::::    c                P    |dz  }t          t          |                    }|J d S )Nz!some_file_that_does_not_exist.txt)r   r+   )r,   r   r.   r/   s       r0   6but_it_returns_None_when_there_is_no_file_at_that_pathzVDescribe_get_last_modified_date.but_it_returns_None_when_there_is_no_file_at_that_path4   s3    BB	3C	NNCC!)))))r2   N)r   r   )__name__
__module____qualname__r1   r4    r2   r0   r   r   '   s<        
; 
; 
; 
;* * * * * *r2   r   c                  2    e Zd Zd Zd Zd Zd Zd Zd ZdS )Describe_set_element_hierarchyc           	        t          dd          t          dd          t          dd          t          dd	          t          d
d          t          dd          g}t          |          }|d         j        j        J |d         j        j        dk    sJ |d         j        j        J |d         j        j        dk    sJ |d         j        j        dk    sJ |d         j        j        d
k    sJ d S )N0Title0
element_idtext1Text02Header03Text14Title15Text2r         r      r   )r   r   r
   r   metadata	parent_idr,   elementsresults      r0   it_applies_default_rulesetz9Describe_set_element_hierarchy.it_applies_default_rulesetC   s   Sx000Cg...c	222Cg...Sx000Cg...
 'x00ay!+333ay!+s2222ay!+333ay!+s2222ay!+s2222ay!+s222222r2   c           
        t          ddt          d                    t          ddt          d                    t          d	d
t          d                    t          ddt          d                    g}t          |          }|d         j        j        J |d         j        j        dk    sJ |d         j        j        dk    sJ |d         j        j        dk    sJ d S )Nr<   r=   rK   category_depthr?   r@   rN   rA   	ListItem0r   rC   	ListItem1rE   	ListItem2rL   r   )r   r   r   r   rN   rO   rP   s      r0   ;it_applies_category_depth_when_element_category_is_the_samezZDescribe_set_element_hierarchy.it_applies_category_depth_when_element_category_is_the_sameV   s    Sx/YZ:[:[:[\\\+_`@a@a@abbb+_`@a@a@abbb+_`@a@a@abbb	
 'x00ay!+333ay!+s2222ay!+s2222ay!+s222222r2   c                   t          ddt          d                    t          ddt          d                    t          d	d
t          d                    t          ddt          d                    t	          ddt          d                    t          ddt          d                    g}t          |          }|d         j        j        J |d         j        j        dk    sJ |d         j        j        J |d         j        j        d	k    sJ |d         j        j        d	k    sJ |d         j        j        d	k    sJ d S )Nr<   r   rL   rU   rW   rA   r   r   rC   r
   rE   rG   r   rK   rI    r   rM   r   )	r   r   r   r
   r   r   r   rN   rO   rP   s      r0   Gbut_it_ignores_category_depth_when_elements_are_of_different_categorieszfDescribe_set_element_hierarchy.but_it_ignores_category_depth_when_elements_are_of_different_categoriese   sj   SwXY9Z9Z9Z[[[CfVW7X7X7XYYYc?Z[;\;\;\]]]CfVW7X7X7XYYY*^_?`?`?`aaaSrO[\<]<]<]^^^
 'x00ay!+333ay!+s2222ay!+333ay!+s2222ay!+s2222ay!+s222222r2   c                .   t          ddt          d                    t          dd          t          dd	          g}t          |          }|d
         j        j        dk    sJ |d         j        j        J |d         j        j        dk    sJ d S )Nr<   r   10)rO   rW   rA   r>   rC   r   r   rK   rL   )r   r   r   r   rN   rO   rP   s      r0   -it_skips_elements_with_pre_existing_parent_idzLDescribe_set_element_hierarchy.it_skips_elements_with_pre_existing_parent_idx   s    SwSW9X9X9XYYYSw///Cf---
 'x00 ay!+t3333ay!+333ay!+s222222r2   c                   t          d          t          d          t          d          t          d          t          dt	          d                    t          dt	          d                    t          d          t          d	d
          t          d          t          d          t          d          t          d          g}t          |          }|d         j        j	        |d         j
        k    s
J d            |d         j        j	        |d         j
        k    s
J d            |d         j        j	        |d         j
        k    s
J d            |d         j        j	        |d         j
        k    s
J d            |d         j        j	        |d         j
        k    s
J d            |d         j        j	        |d         j
        k    s
J d            |d         j        j	        
J d            |d         j        j	        |d         j
        k    s
J d            |d         j        j	        |d         j
        k    s
J d            |d         j        j	        |d         j
        k    s
J d            d S )Nr   r@   r   r	   r   rK   rU   r@   rN   z	some-id-1T)r?   checkedTitle 2r   r   &NarrativeText should be child of TitlerL   z&FigureCaption should be child of Titler   z!ListItem should be child of TitlerM   r         zTitle 2 should be child of None	   z#ListItem should be child of Title 2
      zText should be child of Title 2)r   r   r	   r   r   r   r   r   rN   rO   id)r,   elements_to_setrQ   s      r0   .it_sets_parent_id_for_each_element_in_elementszMDescribe_set_element_hierarchy.it_sets_parent_id_for_each_element_in_elements   s   w//////*%%%*a/P/P/PQQQ*a/P/P/PQQQ*%%%T:::y!!!*%%%*%%%f
 )99 QK *hqkn<<<3 =<< QK *hqkn<<<3 =<<{#-!???Ad???{#-!???Ad???{#-!???Ad???{#-!???Ad??? {#-557X555QK *hqkn<<<0 =<< RL!+x{~===0 >==|$.(1+.@@@Bc@@@@@r2   c                   t          d          t          d          t          d          t          d          t          d          t	          d          g}ddgg dd	}t          ||
          }|d         j        j        |d         j        k    s
J d            |d         j        j        |d         j        k    s
J d            |d         j        j        |d         j        k    s
J d            |d         j        j        |d         j        k    s
J d            |d         j        j        |d         j        k    s
J d            d S )Nr
   rc   r   r   r   rf   r	   )r   UncategorizedTextr	   )r
   r   )rQ   rulesetrK   r   zTitle should be child of HeaderrL   rg   r   zText should be child of TitlerM   z!Title 2 should be child of Headerr   z(FigureCaption should be child of Title 2)	r
   r   r   r   r	   r   rN   rO   rm   )r,   rn   custom_rule_setrQ   s       r0   it_applies_custom_rule_setz9Describe_set_element_hierarchy.it_applies_custom_rule_set   sv   !!!w///fy!!!///
 'LLL
 

 )$#
 
 

 {#-!???Ab???QK *hqkn<<<3 =<<{#-!???A`???{#-!???Ad???QK *hqkn<<<5 =<<<<r2   N)	r5   r6   r7   rS   r[   r^   ra   ro   rt   r8   r2   r0   r:   r:   A   sr        3 3 3&3 3 33 3 3&3 3 3'd 'd 'dR6 6 6 6 6r2   r:   c                     e Zd ZdZd Zd ZddZddZddZdd	Z	dd
Z
ddZddZddZddZddZddZddZej                            di ddig          dd            ZddZddZej        dd            ZdS )Describe_apply_metadatazXUnit-test suite for `unstructured.partition.common.metadata.apply_metadata()` decorator.c                F   t          dt          dd                    dfd
} t                      |          } |            }t          d |D                       t          |          k    sJ t          d |D                       t          |          k    sJ d S )Nr   foo.barrK   filenamepage_numberrd   kwargsr   returnlist[Element]c                     gS Nr8   )r|   elements    r0   fake_partitionerz~Describe_apply_metadata.it_produces_unique_elements_and_metadata_when_input_reuses_element_instances.<locals>.fake_partitioner   s    Wg..r2   c                ,    h | ]}t          |          S r8   rm   .0es     r0   	<setcomp>zwDescribe_apply_metadata.it_produces_unique_elements_and_metadata_when_input_reuses_element_instances.<locals>.<setcomp>       ,,,aBqEE,,,r2   c                6    h | ]}t          |j                  S r8   rm   rN   r   s     r0   r   zwDescribe_apply_metadata.it_produces_unique_elements_and_metadata_when_input_reuses_element_instances.<locals>.<setcomp>        555qBqzNN555r2   r|   r   r}   r~   )r   r   r   len)r,   r   	partitionrQ   r   s       @r0   Lit_produces_unique_elements_and_metadata_when_input_reuses_element_instanceszdDescribe_apply_metadata.it_produces_unique_elements_and_metadata_when_input_reuses_element_instances   s    I`a0b0b0bccc	/ 	/ 	/ 	/ 	/ 	/ %N$$%566	9;; ,,8,,,--X>>>>55H55566#h--GGGGGGr2   c                (   t          dd          dfd} t                      |          } |            }t          d	 |D                       t          |          k    sJ t          d
 |D                       t          |          k    sJ d S )Nrx   rK   ry   r|   r   r}   r~   c                 h    t          d          t          d          t          d          gS )Nfoord   barbaz)r   )r|   rN   s    r0   r   zDescribe_apply_metadata.and_it_produces_unique_elements_and_metadata_when_input_reuses_metadata_instances.<locals>.fake_partitioner   s=    %(333%(333%(333 r2   c                ,    h | ]}t          |          S r8   r   r   s     r0   r   z|Describe_apply_metadata.and_it_produces_unique_elements_and_metadata_when_input_reuses_metadata_instances.<locals>.<setcomp>   r   r2   c                6    h | ]}t          |j                  S r8   r   r   s     r0   r   z|Describe_apply_metadata.and_it_produces_unique_elements_and_metadata_when_input_reuses_metadata_instances.<locals>.<setcomp>   r   r2   r   )r   r   r   )r,   r   r   rQ   rN   s       @r0   Qand_it_produces_unique_elements_and_metadata_when_input_reuses_metadata_instancesziDescribe_apply_metadata.and_it_produces_unique_elements_and_metadata_when_input_reuses_metadata_instances   s    "I1EEE	 	 	 	 	 	 %N$$%566	9;; ,,8,,,--X>>>>55H55566#h--GGGGGGr2   r   Callable[..., list[Element]]c                    t                      |          } |            } |            }t          d |D                       sJ t          d |D                       sJ t          d t          ||          D                       sJ d S )Nc              3  F   K   | ]}t          |j                  d k    V  dS     Nr   rm   r   s     r0   	<genexpr>zkDescribe_apply_metadata.it_assigns_hash_element_ids_when_unique_ids_arg_is_not_specified.<locals>.<genexpr>  .      55q3qt99?555555r2   c              3  (   K   | ]}d |j         vV  dS -Nr   r   s     r0   r   zkDescribe_apply_metadata.it_assigns_hash_element_ids_when_unique_ids_arg_is_not_specified.<locals>.<genexpr>  (      55q3ad?555555r2   c              3  <   K   | ]\  }}|j         |j         k    V  d S r   r   r   r   e2s      r0   r   zkDescribe_apply_metadata.it_assigns_hash_element_ids_when_unique_ids_arg_is_not_specified.<locals>.<genexpr>  .      HHUQ1425=HHHHHHr2   r   allzipr,   r   r   rQ   
elements_2s        r0   @it_assigns_hash_element_ids_when_unique_ids_arg_is_not_specifiedzXDescribe_apply_metadata.it_assigns_hash_element_ids_when_unique_ids_arg_is_not_specified   s     %N$$%566	9;;Y[[
 55H5555555555H55555555HHc(J.G.GHHHHHHHHHHr2   c                "    t                      |          } |d          } |d          }t          d |D                       sJ t          d |D                       sJ t          d t          ||          D                       sJ d S )NFunique_element_idsc              3  F   K   | ]}t          |j                  d k    V  dS r   r   r   s     r0   r   zcDescribe_apply_metadata.it_assigns_hash_element_ids_when_unique_ids_arg_is_False.<locals>.<genexpr>  r   r2   c              3  (   K   | ]}d |j         vV  dS r   r   r   s     r0   r   zcDescribe_apply_metadata.it_assigns_hash_element_ids_when_unique_ids_arg_is_False.<locals>.<genexpr>  r   r2   c              3  <   K   | ]\  }}|j         |j         k    V  d S r   r   r   s      r0   r   zcDescribe_apply_metadata.it_assigns_hash_element_ids_when_unique_ids_arg_is_False.<locals>.<genexpr>  r   r2   r   r   s        r0   8it_assigns_hash_element_ids_when_unique_ids_arg_is_FalsezPDescribe_apply_metadata.it_assigns_hash_element_ids_when_unique_ids_arg_is_False
  s     %N$$%566	9666Y%888
 55H5555555555H55555555HHc(J.G.GHHHHHHHHHHr2   c                "    t                      |          } |d          } |d          }t          d |D                       sJ t          d |D                       sJ t          d t          ||          D                       sJ d S )NTr   c              3  F   K   | ]}t          |j                  d k    V  dS $   Nr   r   s     r0   r   zaDescribe_apply_metadata.it_leaves_UUID_element_ids_when_unique_ids_arg_is_True.<locals>.<genexpr>!  r   r2   c              3  R   K   | ]"}|j                             d           dk    V  #dS )r   rM   N)rm   countr   s     r0   r   zaDescribe_apply_metadata.it_leaves_UUID_element_ids_when_unique_ids_arg_is_True.<locals>.<genexpr>"  s3      ::A14::c??a'::::::r2   c              3  <   K   | ]\  }}|j         |j         k    V  d S r   r   r   s      r0   r   zaDescribe_apply_metadata.it_leaves_UUID_element_ids_when_unique_ids_arg_is_True.<locals>.<genexpr>$  r   r2   r   r   s        r0   6it_leaves_UUID_element_ids_when_unique_ids_arg_is_TruezNDescribe_apply_metadata.it_leaves_UUID_element_ids_when_unique_ids_arg_is_True  s     %N$$%566	9555Y$777
 55H55555555::::::::::HHc(J.G.GHHHHHHHHHHr2   c                     t                      |          } |            }|d         }|j        j        dk    sJ |d         }|j        j        |j        k    sJ d S )Nr   rK   )r   rN   rV   rO   rm   )r,   r   r   rQ   title	narr_texts         r0   !it_computes_and_assigns_parent_idz9Describe_apply_metadata.it_computes_and_assigns_parent_id(  sj    $N$$%566	9;;~,1111QK	!+ux777777r2   c                     t                      |          } |dgd          }t          d |D                       sJ d S )NautoT)	languagesdetect_language_per_elementc              3  8   K   | ]}|j         j        d gk    V  dS )engN)rN   r   r   s     r0   r   zGDescribe_apply_metadata.it_applies_language_metadata.<locals>.<genexpr>9  s.      EEq1:'E72EEEEEEr2   r   r   r,   r   r   rQ   s       r0   it_applies_language_metadataz4Describe_apply_metadata.it_applies_language_metadata4  sW    $N$$%566	9xTRRREEHEEEEEEEEEEr2   c                     t          t          j                  |          } |t          j                  }t	          d |D                       sJ dS )a  A `metadata_file_type` arg overrides the file-type specified in the decorator.

        This is used for example by a delegating partitioner to preserve the original file-type in
        the metadata, like EPUB instead of the HTML that partitioner converts the .epub file to.
        	file_type)metadata_file_typec              3  6   K   | ]}|j         j        d k    V  dS )z'application/vnd.oasis.opendocument.textNrN   filetyper   s     r0   r   zmDescribe_apply_metadata.it_assigns_the_value_of_a_metadata_file_type_arg_when_there_is_one.<locals>.<genexpr>I  s>       
 
QRAJ#LL
 
 
 
 
 
r2   N)r   r   DOCXODTr   r   s       r0   Bit_assigns_the_value_of_a_metadata_file_type_arg_when_there_is_onezZDescribe_apply_metadata.it_assigns_the_value_of_a_metadata_file_type_arg_when_there_is_one=  sw     <NX];;;<LMM	9=== 
 
V^
 
 
 
 
 	
 	
 	
 	
 	
r2   c                     t          t          j                  |          } |            }dt          fd|D                       sJ dS )zThe `file_type=...` decorator arg is the "normal" way to specify the file-type.

        This is used for principal (non-delegating) partitioners.
        r   zGapplication/vnd.openxmlformats-officedocument.wordprocessingml.documentc              3  8   K   | ]}|j         j        k    V  d S r   r   )r   r   DOCX_MIME_TYPEs     r0   r   z|Describe_apply_metadata.and_it_assigns_the_decorator_file_type_when_the_metadata_file_type_arg_is_omitted.<locals>.<genexpr>Y  s-      KKQ1:&.8KKKKKKr2   N)r   r   r   r   )r,   r   r   rQ   r   s       @r0   Qand_it_assigns_the_decorator_file_type_when_the_metadata_file_type_arg_is_omittedziDescribe_apply_metadata.and_it_assigns_the_decorator_file_type_when_the_metadata_file_type_arg_is_omittedM  sa     <NX];;;<LMM	9;;bKKKK(KKKKKKKKKKr2   c                ~     t                      |          } |            }t          d |D                       sJ dS )zA partitioner can elect to assign `.metadata.filetype` for itself.

        This is done in `partition_image()` for example where the same partitioner is used for
        multiple file-types.
        c              3  6   K   | ]}|j         j        d k    V  dS )
image/jpegNr   r   s     r0   r   zjDescribe_apply_metadata.and_it_does_not_assign_file_type_metadata_when_both_are_omitted.<locals>.<genexpr>g  ,      II11:&,6IIIIIIr2   Nr   r   s       r0   ?and_it_does_not_assign_file_type_metadata_when_both_are_omittedzWDescribe_apply_metadata.and_it_does_not_assign_file_type_metadata_when_both_are_omitted[  sN     %N$$%566	9;;IIIIIIIIIIIIr2   c                     t                      |          } |d          }t          d |D                       sJ t          d |D                       sJ dS )z6A `metadata_filename` arg overrides all other sources.	a/b/c.xyz)metadata_filenamec              3  6   K   | ]}|j         j        d k    V  dS zc.xyzNrN   rz   r   s     r0   r   z[Describe_apply_metadata.it_uses_metadata_filename_arg_value_when_present.<locals>.<genexpr>s  ,      DDa1:&'1DDDDDDr2   c              3  6   K   | ]}|j         j        d k    V  dS za/bNrN   file_directoryr   s     r0   r   z[Describe_apply_metadata.it_uses_metadata_filename_arg_value_when_present.<locals>.<genexpr>t  ,      HH!1:,5HHHHHHr2   Nr   r   s       r0   0it_uses_metadata_filename_arg_value_when_presentzHDescribe_apply_metadata.it_uses_metadata_filename_arg_value_when_presentk  sx     %N$$%566	9{;;;DD8DDDDDDDDHHxHHHHHHHHHHr2   c                     t                      |          } |d          }t          d |D                       sJ t          d |D                       sJ d S )Nr   )rz   c              3  6   K   | ]}|j         j        d k    V  dS r   r   r   s     r0   r   zpDescribe_apply_metadata.and_it_uses_filename_arg_value_when_metadata_filename_arg_not_present.<locals>.<genexpr>}  r   r2   c              3  6   K   | ]}|j         j        d k    V  dS r   r   r   s     r0   r   zpDescribe_apply_metadata.and_it_uses_filename_arg_value_when_metadata_filename_arg_not_present.<locals>.<genexpr>~  r   r2   r   r   s       r0   Eand_it_uses_filename_arg_value_when_metadata_filename_arg_not_presentz]Describe_apply_metadata.and_it_uses_filename_arg_value_when_metadata_filename_arg_not_presentv  sx     %N$$%566	9k222DD8DDDDDDDDHHxHHHHHHHHHHr2   c                     t                      |          } |            }t          d |D                       sJ t          d |D                       sJ d S )Nc              3  6   K   | ]}|j         j        d k    V  dS )
image.jpegNr   r   s     r0   r   zlDescribe_apply_metadata.and_it_does_not_assign_filename_metadata_when_neither_are_present.<locals>.<genexpr>  r   r2   c              3  6   K   | ]}|j         j        d k    V  dS )
x/y/imagesNr   r   s     r0   r   zlDescribe_apply_metadata.and_it_does_not_assign_filename_metadata_when_neither_are_present.<locals>.<genexpr>  s,      OO1:,<OOOOOOr2   r   r   s       r0   Aand_it_does_not_assign_filename_metadata_when_neither_are_presentzYDescribe_apply_metadata.and_it_does_not_assign_filename_metadata_when_neither_are_present  sq     %N$$%566	9;;IIIIIIIIIIOOhOOOOOOOOOOr2   c                     t                      |          }d |          }t          fd|D                       sJ dS )z;A `metadata_last_modified` arg overrides all other sources.z2024-09-26T15:17:53)metadata_last_modifiedc              3  8   K   | ]}|j         j        k    V  d S r   rN   last_modified)r   r   r   s     r0   r   z`Describe_apply_metadata.it_uses_metadata_last_modified_arg_value_when_present.<locals>.<genexpr>  s.      XX!1:+/EEXXXXXXr2   Nr   )r,   r   r   rQ   r   s       @r0   5it_uses_metadata_last_modified_arg_value_when_presentzMDescribe_apply_metadata.it_uses_metadata_last_modified_arg_value_when_present  sc     %N$$%566	!694JKKKXXXXxXXXXXXXXXXr2   r|   r   Ndict[str, Any]c                z     t                      |          } |di |}t          d |D                       sJ d S )Nc              3  6   K   | ]}|j         j        d k    V  dS )2020-01-06T05:07:03Nr   r   s     r0   r   z~Describe_apply_metadata.but_it_does_not_update_last_modified_when_metadata_last_modified_arg_absent_or_None.<locals>.<genexpr>  s-      WW1:+/DDWWWWWWr2   r8   r   )r,   r|   r   r   rQ   s        r0   Sbut_it_does_not_update_last_modified_when_metadata_last_modified_arg_absent_or_NonezkDescribe_apply_metadata.but_it_does_not_update_last_modified_when_metadata_last_modified_arg_absent_or_None  sX     %N$$%566	9&&v&&WWhWWWWWWWWWWr2   c                     t                      |          } |d          }t          d |D                       sJ d S )Nhttps://adobe.com/stock/54321)urlc              3  6   K   | ]}|j         j        d k    V  dS )r  NrN   r  r   s     r0   r   z`Describe_apply_metadata.it_assigns_url_metadata_field_when_url_arg_is_present.<locals>.<genexpr>  s,      WW1:>%DDWWWWWWr2   r   r   s       r0   5it_assigns_url_metadata_field_when_url_arg_is_presentzMDescribe_apply_metadata.it_assigns_url_metadata_field_when_url_arg_is_present  sV     %N$$%566	9!@AAAWWhWWWWWWWWWWr2   c                ~     t                      |          } |            }t          d |D                       sJ d S )Nc              3  6   K   | ]}|j         j        d k    V  dS )http://images.comNr  r   s     r0   r   zjDescribe_apply_metadata.and_it_does_not_assign_url_metadata_when_url_arg_is_not_present.<locals>.<genexpr>  s,      KKQ1:>%88KKKKKKr2   r   r   s       r0   ?and_it_does_not_assign_url_metadata_when_url_arg_is_not_presentzWDescribe_apply_metadata.and_it_does_not_assign_url_metadata_when_url_arg_is_not_present  sN     %N$$%566	9;;KK(KKKKKKKKKKr2   r}   c                    dd}|S )Nr|   r   r}   r~   c                 N   t          d          }d|j        _        d|j        _        d|j        _        d|j        _        d|j        _        d|j        _        t          d          }d|j        _        d|j        _        d|j        _        d|j        _        d|j        _        ||gS )	NIntroductionrK   r   r   r   r   r  z0To understand bar you must first understand foo.)	r   rN   rV   r   rz   r   r   r  r   )r|   r   r   s      r0   r   zBDescribe_apply_metadata.fake_partitioner.<locals>.fake_partitioner  s    .))E,-EN),8EN)&2EN#&2EN#+@EN(!4EN%&XYYI0<I-*6I'*6I'/DI,%8I"9%%r2   r   r8   )r,   r   s     r0   r   z(Describe_apply_metadata.fake_partitioner  s    	& 	& 	& 	&$  r2   )r   r   )r|   r   r   r   )r}   r   )r5   r6   r7   __doc__r   r   r   r   r   r   r   r   r   r   r   r   r   r   pytestmarkparametrizer   r  r	  fixturer   r8   r2   r0   rv   rv      s       bbH H HH H H*I I I II I I II I I I 8 8 8 8F F F F
 
 
 
 L L L LJ J J J 	I 	I 	I 	II I I IP P P P	Y 	Y 	Y 	Y [X-Et,L'MNNX X X ONXX X X XL L L L ^      ^     r2   rv   c            	     |   t          dt          dd                    t          dt          dd                    t          dt          dd                    g} t          d | D                       sJ t          t	          j        |                     } t          t	          j        |                     }d | D             }t          d | D                       sJ t          |          t          t          |                    k    sJ t          d	 t          | |          D                       sJ d S )
Nr   rx   rK   ry   rd   c              3  F   K   | ]}t          |j                  d k    V  dS r   r   r   s     r0   r   zntest_assign_hash_ids_produces_unique_and_deterministic_SHA1_ids_even_for_duplicate_elements.<locals>.<genexpr>  .      111s14yyB111111r2   c                    g | ]	}|j         
S r8   r   r   s     r0   
<listcomp>zotest_assign_hash_ids_produces_unique_and_deterministic_SHA1_ids_even_for_duplicate_elements.<locals>.<listcomp>  s    
"
"
"A14
"
"
"r2   c              3  F   K   | ]}t          |j                  d k    V  dS r   r   r   s     r0   r   zntest_assign_hash_ids_produces_unique_and_deterministic_SHA1_ids_even_for_duplicate_elements.<locals>.<genexpr>  r  r2   c              3  <   K   | ]\  }}|j         |j         k    V  d S r   r   r   s      r0   r   zntest_assign_hash_ids_produces_unique_and_deterministic_SHA1_ids_even_for_duplicate_elements.<locals>.<genexpr>  s.      DDBqtru}DDDDDDr2   )	r   r   r   r   copydeepcopyr   setr   )rQ   r   idss      r0   [test_assign_hash_ids_produces_unique_and_deterministic_SHA1_ids_even_for_duplicate_elementsr    sB   )oyVW&X&X&XYYY)oyVW&X&X&XYYY)oyVW&X&X&XYYYH 1111111111h 7 788H!$-"9"9::J
"
"
"
"
"C1111111111s88s3s88}}$$$$DD#h
*C*CDDDDDDDDDDr2   )!r  
__future__r   r  r&   r%   r)   pathlibtypingr   r   r  unstructured.documents.elementsr   r   r   r	   r
   r   r   r   r   unstructured.file_utils.modelr   &unstructured.partition.common.metadatar   r   r   r   r   r:   rv   r  r8   r2   r0   <module>r$     s   E E # " " " " "      				                  
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 3 2 2 2 2 2           * * * * * * * *4J6 J6 J6 J6 J6 J6 J6 J6dx  x  x  x  x  x  x  x @E E E E Er2   