
    Ngo              
      ^   d Z ddlmZ ddlZddlZddlZddlZddlmZ ddl	Z	ddl
mZmZ ddlmZmZ ddlmZmZmZ ddlmZmZmZmZmZmZmZmZmZmZ dd	lm Z  e	j!        "                    d
 e             ed           e            g          d8d            Z#d Z$e	j!        "                    d
 e             ed           edd           e            g          d8d            Z%d Z&d Z'd Z(d Z)e	j!        "                    ddej*        ej*        dfdej*        ej+        dfdej+        ej*        dfdej+        ej+        dfg          d9d             Z,d! Z-d" Z.d# Z/d$ Z0d% Z1d& Z2 G d' d(          Z3d) Z4d* Z5d+ Z6e	j!        "                    d,g d-          d:d6            Z7d7 Z8dS );z8Test-suite for `unstructured.documents.elements` module.    )annotationsN)partial)assign_hash_idsexample_doc_path)clean_bulletsclean_prefix)CoordinateSystemOrientationRelativeCoordinateSystem)
CheckBoxConsolidationStrategyCoordinatesMetadataDataSourceMetadataElementElementMetadataPointsTextTitleassign_and_map_hash_ids)partition_jsonelement textr   c                V   t          | j        t                    sJ t          | j                  dk    sJ | j                            d          dk    sJ d}|                     d          |k    sJ | j        |k    sJ |                     d          |k    sJ | j        |k    sJ d S )N$   -    5336294a19f32ff03ef80066fbc3e0f7r   )
isinstanceidstrlencount
id_to_hash)r   expected_hashs     e/var/www/html/ai-engine/env/lib/python3.11/site-packages/test_unstructured/documents/test_elements.pyQtest_Element_autoassigns_a_UUID_then_becomes_an_idempotent_and_deterministic_hashr(   %   s    
 gj#&&&&&wz??b    :C  A%%%%6Ma  M1111:&&&& a  M1111:&&&&&&    c                 p    t          j        t          dd                                                      d S )Nzhello there!r   
element_id)jsondumpsr   to_dict r)   r'   test_Text_is_JSON_serializabler1   8   s1    JtD999AACCDDDDDr)   r+   c                    t          | j        t                    sJ t          | j                  dk    sJ | j                            d          dk    sJ d S )Nr   r   r   )r    r!   r"   r#   r$   r   s    r'   *test_Element_self_assigns_itself_a_UUID_idr4   =   s\     gj#&&&&&wz??b    :C  A%%%%%%r)   c                     t          d          } |                     t          t          d                     t	          |           dk    sJ d S )N$[1] A Textbook on Crocodile Habitatsr   \[\d{1,2}\]pattern A Textbook on Crocodile Habitats)r   applyr   r   r"   )text_elements    r'    test_text_element_apply_cleanersr=   L   sT    CDDDLw|^DDDEEE| BBBBBBBr)   c                     t          t          d          t          t                    g} t          d          } |j        |   t          |          dk    sJ d S )Nr7   r8   u(   [1] • A Textbook on Crocodile Habitatsr   r:   )r   r   r   r   r;   r"   )cleanersr<   s     r'   )test_text_element_apply_multiple_cleanersr@   S   sb    n===w}?U?UVHJKKKLL!!| BBBBBBBr)   c                     t                      } t          | d          sJ | j        J | j        dk    sJ t          |           dk    sJ d S )Nr   r   )r   hasattrr   r"   r3   s    r'   /test_non_text_elements_are_serializable_to_textrC   Z   s_    jjG7F#####<###<2w<<2r)   c                     dd} t          d          }t          j        t          d          5  |                    |            d d d            d S # 1 swxY w Y   d S )	Nsr"   c                    dS )N   r0   )rE   s    r'   bad_cleanerzFtest_apply_raises_if_func_does_not_produce_string.<locals>.bad_cleanerc   s    qr)   r6   r   z%Cleaner produced a non-string output.match)rE   r"   )r   pytestraises
ValueErrorr;   )rH   r<   s     r'   1test_apply_raises_if_func_does_not_produce_stringrN   b   s        CDDDL	z)P	Q	Q	Q ( (;'''( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( (s   AAA)coordinatesorientation1orientation2expected_coords)rG      rG   r      r   rW   rT   ))
      )rY   (   )   r[   )r\   rZ   ))rY     )rY     )r\   r^   )r\   r]   rO   r   rP   r
   rQ   rR   c                2   t          dd          }||_        t          dd          }||_        t          | |          }|                    |          }|J t	          ||          D ]\  }}	|t          j        |	          k    sJ  |                    |d           |j        j        J |j        j        j	        J t	          |j        j        j	        |          D ]\  }}	|t          j        |	          k    sJ  |j        j        j
        |k    sJ d S )Nd      i  i  rO   coordinate_systemT)in_place)r	   orientationr   !convert_coordinates_to_new_systemziprK   approxmetadatarO   pointssystem)
rO   rP   rQ   rR   coord1coord2r   
new_coords	new_coordexpecteds
             r'   &test_convert_coordinates_to_new_systemrq   l   sE   F c3''F%FdD))F%F+HHHG::6BBJ!!!":?? 4 4	8FM(3333333--ft-DDD'333'.:::"7#3#?#FXX 4 4	8FM(3333333'.&888888r)   c                     t          d d           } t          dd          }t          j        |_        |                     |          J d S )Nrb   r`   ra   )r   r	   r
   SCREENre   rf   )r   coords     r'   *test_convert_coordinate_to_new_system_noneru      sN    $$???GS#&&E#*E44U;;CCCCCr)   c                     d} t                      }t          | |          }t          | |          }|j        j        |k    sJ d S )NrS   rb   rj   rk   )r   r   r   ri   rO   )rO   rc   r   expected_coordinates_metadatas       r'   0test_element_constructor_coordinates_all_presentry      sa    2K022+ARSSSG$7 % % %! '+HHHHHHHr)   c                     t          j        t                    5 } t          t	                                 d d d            n# 1 swxY w Y   t          | j                  dk    sJ d S )N)rc   NCoordinates points should not exist without coordinates system and vice versa.)rK   rL   rM   r   r   r"   valueexc_infos    r'   2test_element_constructor_coordinates_points_absentr      s    	z	"	" >h":"<"<====> > > > > > > > > > > > > > > 	HN[	\ 	\ 	\ 	\ 	\ 	\s   AA
Ac                     t          j        t                    5 } t          d           d d d            n# 1 swxY w Y   t	          | j                  dk    sJ d S )NrS   rO   r{   )rK   rL   rM   r   r"   r|   r}   s    r'   2test_element_constructor_coordinates_system_absentr      s    	z	"	" >h<====> > > > > > > > > > > > > > > 	HN[	\ 	\ 	\ 	\ 	\ 	\s   7;;c                     d} t                      }t          | |          }ddddd}|                                }||k    sJ t          j        |          |k    sJ d S )NrS   rw   rG   r   layout_heightlayout_widthrj   rk   )r   r   r/   	from_dict)rO   rc   coordinates_metadataexpected_schemacoordinates_metadata_dicts        r'   test_coordinate_metadata_serdesr      s    2K022.kJ[\\\2,	 O !5 < < > >$7777()BCCG[[[[[[[r)   c                     d} t                      }t          d| |          }|                                ddddddid ddd	k    sJ d S )
NrS   awt32t1)r,   rO   rc   rO   rG   r   r   r   )ri   typer   r,   )r   r   r/   )rO   rc   r   s      r'   test_element_to_dictr      s    2K022+  G ??!" !:4	 
 ! !      r)   c                     e Zd ZdZd Zej                            d ej	        d          dz  dz  dg          d)d	            Z
d
 Zej                            d ej	        d          dg          d)d            Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd  Z d! Z!d" Z"d# Z#d$ Z$d% Z%d& Z&d' Z'd(S )*DescribeElementMetadatazFUnit-test suite for `unstructured.documents.elements.ElementMetadata`.c                    t          j        t          d          5  t          d           d d d            d S # 1 swxY w Y   d S )Nz.got an unexpected keyword argument 'file_name'rI   	memo.docx)	file_name)rK   rL   	TypeErrorr   selfs    r'   Hit_detects_unknown_constructor_args_at_both_development_time_and_runtimez`DescribeElementMetadata.it_detects_unknown_constructor_args_at_both_development_time_and_runtime   s    ]9,\]]] 	3 	3k2222	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3s   :>>	file_pathdocuments/docxmemosmemo-2023-11-10.docxz)documents/docx/memos/memo-2023-11-10.docxpathlib.Path | strc                Z    t          |          }|j        dk    sJ |j        dk    sJ d S )Nfilenamezdocuments/docx/memosr   r   file_directoryr   r   r   metas      r'   Ait_accommodates_either_a_pathlib_Path_or_str_for_its_filename_argzYDescribeElementMetadata.it_accommodates_either_a_pathlib_Path_or_str_for_its_filename_arg   sC     	222"&<<<<<} 6666666r)   c                F    t                      }|j        J |j        J d S )Nr   r   r   s     r'   Iit_leaves_both_filename_and_file_directory_None_when_neither_is_specifiedzaDescribeElementMetadata.it_leaves_both_filename_and_file_directory_None_when_neither_is_specified  s0      "***}$$$$$r)   r   c                R    t          |          }|j        J |j        dk    sJ d S )Nr   r   r   r   s      r'   Oand_it_leaves_file_directory_None_when_not_specified_and_filename_is_not_a_pathzgDescribeElementMetadata.and_it_leaves_file_directory_None_when_not_specified_and_filename_is_not_a_path  s<     	222"***}++++++r)   c                Z    t          d          }|j        dk    sJ |j        dk    sJ d S )Nz#documents/docx/memo-2023-11-11.docxr   r   zmemo-2023-11-11.docxr   r   s     r'   Mand_it_splits_off_directory_path_from_its_filename_arg_when_it_is_a_file_pathzeDescribeElementMetadata.and_it_splits_off_directory_path_from_its_filename_arg_when_it_is_a_file_path  sB    (MNNN"&66666} 6666666r)   c                \    t          dd          }|j        dk    sJ |j        dk    sJ d S )Nztmp/staging/memo.docxr   )r   r   r   r   r   s     r'   Lbut_it_prefers_a_specified_file_directory_when_filename_also_contains_a_pathzdDescribeElementMetadata.but_it_prefers_a_specified_file_directory_when_filename_also_contains_a_path  sD    (?P`aaa"&66666}++++++r)   c                *    t          ddd           d S )N2T*   )category_depthr   text_as_htmlr   r   s    r'   Mit_knows_the_types_of_its_known_members_so_type_checking_support_is_availablezeDescribeElementMetadata.it_knows_the_types_of_its_known_members_so_type_checking_support_is_available$  s,    	
 	
 	
 	
 	
 	
r)   c                V    t          d          }d|j        v sJ |j        dk    sJ d S )Nhttps://google.com)urlr   r   __dict__r   r   s     r'   +it_returns_the_value_of_an_attribute_it_haszCDescribeElementMetadata.it_returns_the_value_of_an_attribute_it_has.  s@    #7888%%%%x///////r)   c                J    t                      }d|j        vsJ |j        J d S )Nr   r   r   s     r'   :and_it_returns_None_for_a_known_attribute_it_does_not_havezRDescribeElementMetadata.and_it_returns_None_for_a_known_attribute_it_does_not_have3  s4      DM))))xr)   c                    t                      }d|j        vsJ t          j        t          d          5  |j         d d d            d S # 1 swxY w Y   d S )Ncoefficientz%object has no attribute 'coefficient'rI   )r   r   rK   rL   AttributeErrorr   r   s     r'   Fbut_it_raises_AttributeError_for_an_unknown_attribute_it_does_not_havez^DescribeElementMetadata.but_it_raises_AttributeError_for_an_unknown_attribute_it_does_not_have8  s      DM1111]>1XYYY 	 		 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   A

AAc                v    t                      }d|j        vsJ d|_        d|j        v sJ |j        dk    sJ d S )Nr   tmp/r   r   r   r   s     r'   .it_stores_a_non_None_field_value_when_assignedzFDescribeElementMetadata.it_stores_a_non_None_field_value_when_assigned>  sU      t}4444$4=0000"f,,,,,,r)   c                    t          d          }d|j        v sJ |j        dk    sJ d |_        d|j        vsJ |j        J d S )Nr   )r   r   r   r   s     r'   .it_removes_a_field_when_None_is_assigned_to_itzFDescribeElementMetadata.it_removes_a_field_when_None_is_assigned_to_itE  si    f5554=0000"f,,,,"t}4444"*****r)   c                n    t          ddddd          }|                                ddddddk    sJ d S )NrG   r   rT   <table></table>r   )r   r   page_numberr   r   )r   r/   r   s     r'   !it_can_serialize_itself_to_a_dictz9DescribeElementMetadata.it_can_serialize_itself_to_a_dictP  si    !*$
 
 
 ||~~$-'"
 "
 
 
 
 
 
 
r)   c                    t          dt          dt                                d          }|                                ddddddddk    sJ d S )NrG   )rT   rT   rU   rV   rX   rw   rT   )r   rO   r   r   r   )r   r   r   r/   r   s     r'   Gand_it_serializes_a_coordinates_sub_object_to_a_dict_when_it_is_presentz_DescribeElementMetadata.and_it_serializes_a_coordinates_sub_object_to_a_dict_when_it_is_present`  s    +7/11   
 
 
 ||~~!" !:4	  	"
 	"
 	
 	
 	
 	
 	
 	
r)   c                    t          dt          dd          d          }|                                ddddddk    sJ d S )NrG   5https://www.nih.gov/about-nih/who-we-are/nih-director
2023-11-09r   date_createdrT   )r   data_sourcer   )r   r   r/   r   s     r'   Gand_it_serializes_a_data_source_sub_object_to_a_dict_when_it_is_presentz_DescribeElementMetadata.and_it_serializes_a_data_source_sub_object_to_a_dict_when_it_is_presentt  s    *K)   
 
 
 ||~~N ,  "
 "
 
 
 
 
 
 
r)   c                    t          t          d          t          d          g          }t          d|d          }|                                ddddk    sJ d S )NLoremzLorem IpsumrG   rT   )r   orig_elementsr   zeJyFzcsKwjAQheFXKVm7MGkzbXwDocu6EpFcTqTQG3UEtfTdbZa6cTnDd/jPi0CHHgNf2yAOmXCljjqXoErKoIw3hqJRXlPuyphrErtM9GAbLNvNL+t2M56ctvU4o0+AXxPSo2m5g9jIb6VwBE0VBSujp1LJ6EiRLpwiSBf3fyvZcbo/vlqnwVvGbZzbN0KT7Hr5AG/eQyM=)r   r   r   r   r/   )r   elementsr   s      r'   Jand_it_serializes_an_orig_elements_sub_object_to_base64_when_it_is_presentzbDescribeElementMetadata.and_it_serializes_an_orig_elements_sub_object_to_base64_when_it_is_present  s    "E'NND4G4G#HII"
 
 
 ||~~F
 	"
 	"
 	
 	
 	
 	
 	
 	
r)   c                    dddiddid}t          j        |          }|                                }d|v sJ d|d         vsJ d|d         vsJ dS )	zMetadata sub-objects ignore fields they do not explicitly define.

        This is _not_ the case for ElementMetadata itself where an non-known field is welcomed as a
        user-defined ad-hoc metadata field.
        hello	new_fieldworldfoo)r   r   rO   rO   r   N)r   r   r/   )r   element_metadatari   metadata_dicts       r'   Gbut_unlike_in_ElementMetadata_unknown_fields_in_sub_objects_are_ignoredz_DescribeElementMetadata.but_unlike_in_ElementMetadata_unknown_fields_in_sub_objects_are_ignored  s     !W U
 
 #,-=>> ((**m++++-">>>>>-">>>>>>>r)   c                   ddddddddd	d
dgd}t          j        |          }|j        dk    sJ |j        t	          dt                                k    sJ |j        t          dd	
          k    sJ |j        J d|j	        vsJ |j
        dk    sJ t          j        t          d          5  |j         d d d            n# 1 swxY w Y   t          |d          r|j        nd J t!          |j        t$                    sJ |j        dgk    sJ |j                            d           |j        ddgk    sJ |d         dgk    sJ d S )NrG   (\?r   rT   rS   r   r   r   r   r   eng)r   r   rO   r   	languagesrw   r   z.ntMetadata' object has no attribute 'quotient'rI   quotientspar   )r   r   r   rO   r   r   r   r   r   r   r   rK   rL   r   r   rB   r    r   listappend)r   	meta_dictr   s      r'   %it_can_deserialize_itself_from_a_dictz=DescribeElementMetadata.it_can_deserialize_itself_from_a_dict  s%   !" !:4	  O ,   
 
	  (33 "a'''' #63+--$
 $
 $
 
 
 
 
 #5G%$
 $
 $
 
 
 
 
 "***t}4444 4'''' ]>1abbb 	 	MM	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ")z!:!:DMMM $.$/////~%((((e$$$~%////%%000000s   3CCCc                `    t                      }d|_        d|j        v sJ |j        dk    sJ d S N   foobar)r   r   r   r   s     r'   /it_allows_an_end_user_to_add_an_arbitrary_fieldzGDescribeElementMetadata.it_allows_an_end_user_to_add_an_arbitrary_field  s@      4=(((({ar)   c                h    t                      }d|_        |                                ddik    sJ d S r   )r   r   r/   r   s     r'   /and_fields_so_added_appear_in_the_metadata_JSONzGDescribeElementMetadata.and_fields_so_added_appear_in_the_metadata_JSON  s7      ||~~(A......r)   c                    t                      }d|_        d|j        v sJ d |_        d|j        vsJ t          j        t
          d          5  |j         d d d            d S # 1 swxY w Y   d S )Nr   r   z2'ElementMetadata' object has no attribute 'foobar'rI   )r   r   r   rK   rL   r   r   s     r'   9and_it_removes_an_end_user_field_when_it_is_assigned_NonezQDescribeElementMetadata.and_it_removes_an_end_user_field_when_it_is_assigned_None  s      4=((((t},,,,]"V
 
 
 	 	 KK	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   A##A'*A'c                \   t          dd          }d|_        d|_        t          dd          }d|_        d	|_        |                    |           |j        dk    sJ |j        dk    sJ |j        dk    sJ |j        dk    sJ |j        dk    sJ |j        d	k    sJ |j        J |j        dk    sJ |j        dk    sJ |j        J |j	        J |j        dk    sJ |j        d	k    sJ t          j        t          d
          5  |j         d d d            d S # 1 swxY w Y   d S )NrG   )r   r   r      r   rT   )r   r   gffffff?rZ   z.etadata' object has no attribute 'coefficient'rI   )r   r   stem_lengthr   updater   r   r   r   r   rK   rL   r   r   r   others      r'   *it_can_update_itself_from_another_instancezBDescribeElementMetadata.it_can_update_itself_from_another_instance  s   aQ???v1EEEE "a''''"f,,,,1$$$$4''''}####2%%%%#+++#v---- A%%%%!)))y   ~$$$$ B&&&&]>1abbb 	 		 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   D!!D%(D%c                    t                      }t          j        t          d          5  |                    ddi           d d d            d S # 1 swxY w Y   d S )Nz1ate\(\)' must be an instance of 'ElementMetadata'rI   r   z0.56)r   rK   rL   rM   r   r   s     r'   Dbut_it_raises_on_attempt_to_update_from_a_non_ElementMetadata_objectz\DescribeElementMetadata.but_it_raises_on_attempt_to_update_from_a_non_ElementMetadata_object#  s      ]:-abbb 	1 	1KK/000	1 	1 	1 	1 	1 	1 	1 	1 	1 	1 	1 	1 	1 	1 	1 	1 	1 	1s   AAAc                   t          dt          dt                                t          dd          ddgd	d
d          }|t          dt          dt                                t          dd          ddgd	d
d          k    sJ d S )NrG   rS   rw   r   
2023-11-08r   r   r   rT   r   r   )r   rO   r   r   r   r   r   r   )r   r   r   r   r   s     r'   @it_is_equal_to_another_instance_with_the_same_known_field_valueszXDescribeElementMetadata.it_is_equal_to_another_instance_with_the_same_known_field_values*  s    +7/11   +K)   "g*$
 
 
  +7/11   +K)   "g*$
 
 
 
 
 
 
 
 
r)   c                     G d d          }t                      } |            }|j        |j        k    sJ ||k    sJ d S )Nc                      e Zd ZdS )iDescribeElementMetadata.but_it_is_never_equal_to_a_non_ElementMetadata_object.<locals>.NotElementMetadataN)__name__
__module____qualname__r0   r)   r'   NotElementMetadatar  M  s        Dr)   r  )r   r   )r   r  r   r   s       r'   5but_it_is_never_equal_to_a_non_ElementMetadata_objectzMDescribeElementMetadata.but_it_is_never_equal_to_a_non_ElementMetadata_objectL  sm    	 	 	 	 	 	 	 	   ""$$ }....u}}}}}}r)   c                r    t          d          }d|_        t          d          }d|_        ||k    sJ d S )NrG   r   r   r   r   r   s      r'   Ait_is_equal_to_another_instance_with_the_same_ad_hoc_field_valueszYDescribeElementMetadata.it_is_equal_to_another_instance_with_the_same_ad_hoc_field_valuesX  C    a000q111 u}}}}}}r)   c                r    t          d          }d|_        t          d          }d|_        ||k    sJ d S )NrG   r  r   g
ףp=
?r  r   s      r'   Abut_it_is_not_equal_to_an_instance_with_ad_hoc_fields_that_differzYDescribeElementMetadata.but_it_is_not_equal_to_an_instance_with_ad_hoc_fields_that_differ`  r  r)   c                X    t          dg          }|t          ddg          k    sJ d S )Nr   )r   r   r   r   s     r'   :it_is_not_equal_when_a_list_field_contains_different_itemszRDescribeElementMetadata.it_is_not_equal_when_a_list_field_contains_different_itemsh  s:    %111%@@@@@@@@@r)   c                    t          t          dt                                          }|t          t          dt                                          k    sJ d S )NrS   rw   r   )r   )rT   r   rV   )r   rT   )r   r   r   r   s     r'   Aand_it_is_not_equal_when_the_coordinates_sub_object_field_differszYDescribeElementMetadata.and_it_is_not_equal_when_the_coordinates_sub_object_field_differsl  s    +7/11  
 
 
 +7/11  
 
 
 
 
 
 
 
 
r)   c                    t          t          dd                    }|t          t          dd                    k    sJ d S )Nr   r   r   )r   r   )r   r   r   s     r'   Aand_it_is_not_equal_when_the_data_source_sub_object_field_differszYDescribeElementMetadata.and_it_is_not_equal_when_the_data_source_sub_object_field_differsz  sz    *K)  
 
 
 *K)  
 
 
 
 
 
 
 
 
r)   c                    t                      }t          |j                  }t          j                    }|D ]}||v sJ d| d            d S )NzElementMetadata field `.zm` does not have a consolidation strategy. Add one in `ConsolidationStrategy.field_consolidation_strategies().)r   sorted_known_field_namesr   field_consolidation_strategies)r   ri   metadata_field_namesconsolidation_strategies
field_names        r'   Cit_can_find_the_consolidation_strategy_for_each_of_its_known_fieldsz[DescribeElementMetadata.it_can_find_the_consolidation_strategy_for_each_of_its_known_fields  s}    "$$%h&ABB#8#W#Y#Y . 	 	J!9999X: X X X :999	 	r)   N)r   r   )(r  r  r  __doc__r   rK   markparametrizepathlibPathr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r	  r  r  r  r  r  r  r0   r)   r'   r   r      ss       PP3 3 3 [GL)**W47MM7	
 7 7 7 7% % % [[<7<+D+Dk*RSS, , , TS,7 7 7, , ,
 
 
0 0 0
     
  - - -+ + +
 
 
 
 
 
(
 
 
$
 
 
&? ? ?241 41 41p     / / /
	 	 	  B1 1 1 
  
  
D
 
 
    A A A
 
 

 
 
 	 	 	 	 	r)   r   c            	        t          dt          d                    } | t          dt          d| j                            t          dt          d| j                            g}t          t	          j        |                    }d |D             }t          |          t          t          |                    k    s
J d            |d         j        j	        |d	         j        j	        k    sJ t          |          D ]s\  }}|j        ||         j        k    s
J d
            |j        j	        B|j        j	        |v s
J d            |j        j	        ||         j        j	        k    s
J d            td S )NParentrG   r   r   ri   r   r   	parent_idc                    g | ]	}|j         
S r0   r!   .0r   s     r'   
<listcomp>zCtest_hash_ids_are_unique_for_duplicate_elements.<locals>.<listcomp>      
6
6
6'7:
6
6
6r)   z Recalculated IDs must be unique.rT   z'IDs haven't changed after recalculationz Parent ID not in the list of IDsz,Parent ID hasn't changed after recalculation)r   r   r!   r   copydeepcopyr#   setri   r)  	enumerate)parentr   updated_elementsidsidxupdated_elements         r'   /test_hash_ids_are_unique_for_duplicate_elementsr9    s   x/a*H*H*HIIIF)o!vy&Y&Y&YZZZ)o!vy&Y&Y&YZZZH /t}X/F/FGG
6
6%5
6
6
6C s88s3s88}}$$$&H$$$A;)Xa[-A-KKKKK )*: ; ; > >_!Xc]%55557`555#-9"+5<<<>`<<<(2hsm6L6VVVV= WVV> >r)   c                    t          dt          d                    } t          dt          d| j                            }| ||g}t          t	          j        |                    }d |D             }t          |          t          t          |                    dz   k    s
J d            |d         j        j	        |d	         j        j	        k    sJ d S )
Nr%  rG   r&  r'  r   r(  c                    g | ]	}|j         
S r0   r+  r,  s     r'   r.  zItest_hash_ids_can_handle_duplicated_element_instances.<locals>.<listcomp>  r/  r)   z8One element is duplicated so uniques should be one less.rT   )
r   r   r!   r   r0  r1  r#   r2  ri   r)  )r4  r   r   r5  r6  s        r'   5test_hash_ids_can_handle_duplicated_element_instancesr<    s    x/a*H*H*HIIIF	OU[U^,_,_,_```G%w8H /t}X/F/FGG
6
6%5
6
6
6C s88s3s88}}q((((*d(((A;)Xa[-A-KKKKKKKr)   c            	     P   t          dt          d                    } | t          dt          d| j                            t          dt          d| j                            g}t          |          }d |D             }d |D             }|g d	k    sJ |g d
k    sJ d S )Nr%  rG   r&  r'  r   r(  c                    g | ]	}|j         
S r0   r+  r,  s     r'   r.  z3test_hash_ids_are_deterministic.<locals>.<listcomp>  r/  r)   c                &    g | ]}|j         j        S r0   )ri   r)  r,  s     r'   r.  z3test_hash_ids_are_deterministic.<locals>.<listcomp>  s    MMM'",MMMr)   ) ea9eb7e80383c190f8cafce1ad666624 4112a8d24886276e18e759d06956021b eba84bbe7f03e8b91a1527323040ee3d)Nr@  r@  )r   r   r!   r   )r4  r   r5  r6  
parent_idss        r'   test_hash_ids_are_deterministicrD    s    x/a*H*H*HIIIF)o!vy&Y&Y&YZZZ)o!vy&Y&Y&YZZZH /x88
6
6%5
6
6
6CMM<LMMMJ       
          r)   )r   sequence_numberr   r   r&   ))r   rG   foo.pdfrG    4bb264eb23ceb44cd8fcc5af44f8dc71)r   rT   rF  rG    75fc1de48cf724ec00aa8d1c5a0d3758)	some textr   some.txtN 1a2627b5760c06b1440102f11a1edb0f)rI  rG   rJ  N e3fd10d867c4a1c0264dde40e3d7e45ar   r"   rE  intr   r   
int | Noner&   c                    t          | t          ||                    }|                    |          |k    s
J d            |j        |k    s
J d            d S )N)r   r   r'  zReturned ID does not matchzID should be set)r   r   r%   r!   )r   rE  r   r   r&   r   s         r'   test_id_to_hash_calculatesrP    sw      (LLL  G o..-???A]???:&&&(:&&&&&r)   c                    t          d          } t          |           }t          j                    }t	          j        d |D             |           |                    d           t          |          }||k    sJ d S )Nz1test_evaluate_files/unstructured_output/form.jsonr   c                6    g | ]}|                                 S r0   )r/   r,  s     r'   r.  z4test_formskeysvalues_reads_saves.<locals>.<listcomp>  s"    888Ww  888r)   r   )file)r   r   ioStringIOr-   dumpseek)r   as_readtmp_file	as_read_2s       r'    test_formskeysvalues_reads_savesr[    s     STTHh///G{}}HI88888(CCCMM!H---Iir)   )r   r   )rO   r   rP   r
   rQ   r
   rR   r   )
r   r"   rE  rM  r   r"   r   rN  r&   r"   )9r  
__future__r   r0  rT  r-   r"  	functoolsr   rK   test_unstructured.unit_utilsr   r   unstructured.cleaners.corer   r   "unstructured.documents.coordinatesr	   r
   r   unstructured.documents.elementsr   r   r   r   r   r   r   r   r   r   unstructured.partition.jsonr   r   r!  r(   r1   r4   r=   r@   rC   rN   	CARTESIANrs   rq   ru   ry   r   r   r   r   r   r9  r<  rD  rP  r[  r0   r)   r'   <module>rd     s$   ? > " " " " " "  				          J J J J J J J J B B B B B B B B         
                        7 6 6 6 6 6 WWYY"xxzz$JKK' ' ' LK'$E E E
 		""&&&

	 & & & &C C CC C C  ( ( ( F -!!4		
 -!<		
 -!<		
 -4		
' :9 9 9; :92D D DI I I    \ \ \  0f f f f f f f fR> > >4L L L  0 K  
 
; ; ;
 
;         r)   