
    Ng*                    r   d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZmZ  ej                    d             Z ej                    d             Z ej                    d             Zd	 Zd
 Zd Zd Zd Zd Zd Zej                            dd dgd ed          d gd ed          g          d             Zej                            dg dg          d             Zej                            dd gd ed          g          d             Zej                            dd dgd ed          g          d             Zej                            dg dg          d             Zej                            dddddd d!d"gd!d#d$d d%d&d&d'd(d)fdddddd*d+gdd,d-d.d%d&d&d'd(d)fdddddd*d+gdd/d-d d%d&d&d'd(d)fdddd0dd+d*gdd1d-d d%d&d&d'd(d)fddd0ddd*d+gdd2d-d d%d&d&d'd(d)fdddd3dd*d+gdd4d-d d%d&d&d'd(d)fd5d6dddd*d+gdd7d8d d9d:d;d<d(d)fg          d=             Z d> Z!d? Z"d@ Z#dA Z$dS )B    )annotationsN)utils)
PixelSpace)ElementMetadataNarrativeTextTitlec                     ddidddidgS )NtextzThis is a sentence.zThis is another sentence.scoreg?)r
   meta r       X/var/www/html/ai-engine/env/lib/python3.11/site-packages/test_unstructured/test_utils.py
input_datar      s&     
&',wnEE r   c                B    t           j                            | d          S )Nzoutput.jsonl)ospathjoin)tmp_paths    r   output_jsonl_filer      s    7<<.111r   c                    t           j                            | d          }t          |d          5 }|                    d |D                        d d d            n# 1 swxY w Y   |S )Nzinput.jsonlzw+c                <    g | ]}t          j        |          d z   S )
)jsondumps).0objs     r   
<listcomp>z$input_jsonl_file.<locals>.<listcomp>   s%    LLL#tz#5LLLr   )r   r   r   open
writelines)r   r   	file_path
input_files       r   input_jsonl_filer#      s    X}55I	i		 N*LLLLLMMMN N N N N N N N N N N N N N Ns    AA!$A!c                    t          j        | |           t          |          5 }d |D             }d d d            n# 1 swxY w Y   || k    sJ d S )Nc                6    g | ]}t          j        |          S r   )r   loads)r   lines     r   r   z&test_save_as_jsonl.<locals>.<listcomp>%   s"    >>>$TZ%%>>>r   )r   save_as_jsonlr   )r   r   output_file	file_datas       r   test_save_as_jsonlr+   "   s    	
$5666		 	  ?K>>+>>>	? ? ? ? ? ? ? ? ? ? ? ? ? ? ?
""""""s   >AAc                >    t          j        |           }||k    sJ d S N)r   read_from_jsonl)r#   r   r*   s      r   test_read_as_jsonlr/   )   s*    %&677I
""""""r   c                 V    t          j        d          d             }  |              d S )Nnumpydependenciesc                     dd l } d S Nr   r1   r6   s    r   	test_funcz7test_requires_dependencies_decorator.<locals>.test_func/   s    r   r   requires_dependenciesr7   s    r   $test_requires_dependencies_decoratorr;   .   s;    
 g666  76 IKKKKKr   c                 Z    t          j        ddg          d             }  |              d S )Nr1   pandasr2   c                     dd l } dd l}d S r5   r1   r=   r?   s     r   r7   z@test_requires_dependencies_decorator_multiple.<locals>.test_func7   s    r   r8   r:   s    r   -test_requires_dependencies_decorator_multipler@   6   sA    
 w.ABBB  CB IKKKKKr   c                     t          j        d          d             } t          j        t                    5   |              d d d            d S # 1 swxY w Y   d S )Nnot_a_packager2   c                     dd l } d S r5   rB   rD   s    r   r7   zDtest_requires_dependencies_decorator_import_error.<locals>.test_func@   s    r   r   r9   pytestraisesImportErrorr:   s    r   1test_requires_dependencies_decorator_import_errorrI   ?   s    
 o>>>  ?> 
{	#	#  	                 s   AAAc                     t          j        ddg          d             } t          j        t                    5   |              d d d            d S # 1 swxY w Y   d S )NrB   r1   r2   c                     dd l } dd l}d S r5   rB   r1   rL   s     r   r7   zMtest_requires_dependencies_decorator_import_error_multiple.<locals>.test_funcI   s    r   rE   r:   s    r   :test_requires_dependencies_decorator_import_error_multiplerM   H   s    
 .HIII  JI 
{	#	#  	                 s   AAAc                 j    t          j        d           G d d                      }  |              d S )Nr1   r2   c                      e Zd Zd ZdS )@test_requires_dependencies_decorator_in_class.<locals>.TestClassc                    dd l }d S r5   r6   )selfr1   s     r   __init__zItest_requires_dependencies_decorator_in_class.<locals>.TestClass.__init__U   s    LLLLLr   N)__name__
__module____qualname__rS   r   r   r   	TestClassrP   S   s#        	 	 	 	 	r   rW   r8   )rW   s    r   -test_requires_dependencies_decorator_in_classrX   R   sT    
 g666       76 IKKKKKr   iterator   )r   rZ   
   )r   c                :    t          j        |           dk    sJ d S r5   r   firstrY   s    r   test_first_gives_firstr`   [   #    ;x  A%%%%%%r   r   c                    t          j        t                    5  t          j        |            d d d            d S # 1 swxY w Y   d S r-   )rF   rG   
ValueErrorr   r^   r_   s    r   test_first_raises_if_emptyrd   `   s    	z	"	"  H                    <A A c                :    t          j        |           dk    sJ d S r5   r]   r_   s    r   test_only_gives_onlyrg   f   ra   r   c                    t          j        t                    5  t          j        |            d d d            d S # 1 swxY w Y   d S r-   rF   rG   rc   r   onlyr_   s    r   %test_only_raises_when_len_more_than_1rk   k       	z	"	"  
8                 re   c                    t          j        t                    5  t          j        |            d d d            d S # 1 swxY w Y   d S r-   ri   r_   s    r   test_only_raises_if_emptyrn   q   rl   re   )coords1coords2text1text2nested_error_tolerance_pxexpectation))      )ru      )   rw   )rx   rv   ))      )ry      rv   r{   rv   rz   Some lovely titleSome lovely textrv   zTitle(ix=0)zNarrativeText(ix=1)znested NarrativeText in Titlez100%z5.88%u   9pxˆ2u   18pxˆ2)largest_ngram_percentageoverlap_percentage_totalmax_areamin_area
total_area)overlapping_elementsparent_elementoverlapping_caseoverlap_percentagemetadataz0. Title(ix=0)z1. NarrativeText(ix=1)zFpartial overlap sharing 50.0% of the text from1. NarrativeText(2-gram)z11.11%g      I@z#partial overlap with duplicate text z6partial overlap with empty content in 1. NarrativeTextz.partial overlap with empty content in 0. Titlez Something totally different herez$partial overlap without sharing text)r|   )rv   r[   )rw   r[   )rw   r{   ))rZ   rz   )ry   rx   )r{   rx   r}   zSmall partial overlapz8.33%z3.23%u   20pxˆ2u   12pxˆ2u   32pxˆ2c           	     "   t          || t          dd          t          d                    t          ||t          dd          t          d                    g}t	          j        ||d          \  }}|du sJ |d	         |k    sJ d S )
N   widthheightrZ   page_numberr
   coordinatescoordinate_systemr         $@sm_overlap_thresholdTr   r   r   r   r   r   #catch_overlapping_and_nested_bboxes)	ro   rp   rq   rr   rs   rt   elementsoverlapping_flagoverlapping_casess	            r   (test_catch_overlapping_and_nested_bboxesr   w   s    l 	(r"===$333		
 	
 	
 	(r"===$333		
 	
 	
H +0*S!!+ + +''
 t####Q;......r   c            	        t          ddt          dd          t          d                    t          dd	t          dd          t          d                    g} t	          j        | dd
          \  }}|du sJ |g k    sJ d S )Nr~   ))ru   r{   )ru   rx   )rx   rx   )rx   r{   r   r   rZ   r   r   r   ))r{   rw   )r{   	   )r   r   )r   rw   r   r   Fr   )r   r   r   s      r   =test_catch_overlapping_and_nested_bboxes_non_overlapping_caser   #  s    $8(r"===$333		
 	
 	
 	#8(r"===$333		
 	
 	
H +0*S	!+ + +''
 u$$$$""""""r   c                 D    dg} t          j        |           }|dk    sJ d S )N*   )r   rj   )singleton_iterableresults     r   $test_only_returns_singleton_iterabler   ;  s+    Z*++FR<<<<<<r   c                     ddg} t          j        t                    5  t          j        |            d d d            d S # 1 swxY w Y   d S )Nr   r   ri   )r   s    r   *test_only_raises_on_non_singleton_iterabler   A  s    a	z	"	" ' '
%&&&' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' 's   A  AAc                 v    d} d}d}t          j        | ||          \  }}|dk    sJ t          |          rJ d S )Nr   zbanana orange pineapplery   r   )r   !calculate_shared_ngram_percentagebool)str1str2npercentcommon_ngramss        r   Ftest_calculate_shared_ngram_percentage_returns_null_vals_for_empty_strr   G  sR    D$D	A"DT4QRSSG]a<<<<M"""""""r   )%
__future__r   r   r   rF   unstructuredr   "unstructured.documents.coordinatesr   unstructured.documents.elementsr   r   r   fixturer   r   r#   r+   r/   r;   r@   rI   rM   rX   markparametrizeranger`   rd   rg   rk   rn   r   r   r   r   r   r   r   r   <module>r      s   " " " " " "  				        9 9 9 9 9 9 Q Q Q Q Q Q Q Q Q Q    2 2 2   # # ## # #
           q!ffeeBii!dEERSHH%UVV& & WV& b"X..  /.
 qc4q%:;;& & <;& q!ffeeBii%@AA  BA
 b"X..  /.
 X -,)68M(N"/$C&,0107 ( ("+  	
* -,)9;S(T"&%(&.0407 ( ("+  	
, -,)9;S(T"&$I&.0107 ( ("+  	
* -,)ACS(T"&%]&.0107 ( ("+  	
* -,)9;S(T"&$T&.0107 ( ("+  	
* -,.)9;S(T"&$J&.0107 ( ("+  	
* /,)9;S(T"&$;&-0107 ) )"+  	
uNQ Qd/ /eQ Qd/4# # #0  ' ' '# # # # #r   