
    NgU5              
       b   d dl Z d dlmZ d dlZd dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZmZmZmZmZmZmZmZ d dlmZ d d
lmZ  G d de	j                   Z! G d de	j"                  Z#d Z$d Z%ej&        '                    ddedfdedfdedfdedfg          d             Z(d Z)d Z*d Z+d Z,ej&        '                    d ej-                              de.de/e         fd             Z0ej&        '                    d!ej1        d"fej2        d#fej3        d"fej4        d#fej5        d#fej6        d"fg          de.d$e7fd%            Z8d& Z9d' Z: G d( d)          Z;d* Z<d+ Z=d, Z>ej&        '                    d-d.d/g          d0             Z?d1 Z@ ed2          fd3ZAdS )4    N)Pool)Image)layout)
TextRegionLayoutElement)example_doc_path)
PixelSpace)
TYPE_TO_TEXT_ELEMENT_MAPCheckBoxCoordinatesMetadataElementTypeFigureCaptionHeaderListItemNarrativeTextTextTitle)commonc                   >    e Zd Zdedej        fdZed             ZdS )MockPageLayoutnumberimagec                 "    || _         || _        d S Nr   r   )selfr   r   s      j/var/www/html/ai-engine/env/lib/python3.11/site-packages/test_unstructured/partition/common/test_common.py__init__zMockPageLayout.__init__    s    


    c                     t          ddd           t          ddd           t          ddd           t          dd	d           gS )
NHeadlinez#Charlie Brown and the Great Pumpkin)typetextbboxSubheadlinezThe Beginningr   z/This time Charlie Brown had it really tricky...r   z#Another book title in the same pager   r   s    r   elementszMockPageLayout.elements$   s     :  
 "$  
 F  
 :  !
 	
r    N)__name__
__module____qualname__intr   r   propertyr(    r    r   r   r      sS        s 5;     
 
 X
 
 
r    r   c                   $    e Zd Zed             ZdS )MockDocumentLayoutc                 L    t          dt          j        dd                    gS )N   1)r2   r2   r   )r   r   newr'   s    r   pageszMockDocumentLayout.pages?   s*     !59S&+A+ABBB
 	
r    N)r)   r*   r+   r-   r5   r.   r    r   r0   r0   >   s-        
 
 X
 
 
r    r0   c            	          dddgddgddgdd	ggd d
d} t          dd          }t          j        | |          }|t          d
ddgddgddgdd	gg|          k    sJ d S )Nr   r2                        Some lovely text)r#   coordinatescoordinate_systemr$   
      widthheightr@   r$   r?   r@   )r
   r   normalize_layout_elementr   layout_elementr@   elements      r   "test_normalize_layout_element_dictrL   F   s    AAAA7!"	 N #B777-+  G eVaVaVaV4+        r    c                      dddd} t          dd          }t          j        | |          }|t          dd|	          k    sJ d S )
NFigure)r2   r7   r8   r9   )r:   r;   )r<   r=   r>   r#   r?   r$   rA   rB   rC   rF   rG   )r
   r   rH   ImageElementrI   s      r   *test_normalize_layout_element_dict_captionrS   Y   s    7" N
 #B777-+  G l4+        r    )element_typeexpected_typeexpected_depthr   r"   r2   r&   r7   r   c                     | ddgddgddgddggd	d
}t          dd          }t          j        ||          }|j        j        |k    sJ t          ||          sJ d S )Nr2   r7   r8   r9   r:   r;   r<   r=   r>   rQ   rA   rB   rC   rF   )r
   r   rH   metadatacategory_depth
isinstance)rT   rU   rV   rJ   r@   rK   s         r   &test_normalize_layout_element_headliner[   k   s     AAAA7" N
 #B777-nPabbbG*n<<<<g}-------r    c            	          dddgddgddgdd	ggd
d} t          dd          }t          j        | |          }|t          d
ddgddgddgdd	gg|          k    sJ d S )Nr   r2   r7   r8   r9   r:   r;   r<   r=   r>   rQ   rA   rB   rC   rF   rG   )r
   r   rH   r   rI   s      r   1test_normalize_layout_element_dict_figure_captionr]      s    AAAA7" N
 #B777-+  G mVaVaVaV4+        r    c            	          dddgddgddgdd	ggd
d} t          dd          }t          j        | |          }|t          d
ddgddgddgdd	gg|          k    sJ d S )NMiscr2   r7   r8   r9   r:   r;   r<   r=   r>   rQ   rA   rB   rC   rF   rG   )r
   r   rH   r   rI   s      r   'test_normalize_layout_element_dict_miscr`      s    AAAA7" N
 #B777-+  G dVaVaVaV4+        r    c                      t          j        dddddd          } t          dd	
          }t          j        | |          }|t          dd|          k    sJ d S )Nr   r2   r7   r8   r9   r>   r#   x1y1x2y2r$   rA   rB   rC   rF   rO   )r2   r9   rP   )r8   r7   rG   r   from_coordsr
   r   rH   r   rI   s      r   ,test_normalize_layout_element_layout_elementrj      s    ".  N #B777-+  G m4+        r    c                      t          j        dddddd          } t          dd	
          }t          j        | |          }|t          dd|          k    sJ d S )Nr   r2   r7   r8   r9   r>   rb   rA   rB   rC   rF   rg   rG   rh   rI   s      r   ;test_normalize_layout_element_layout_element_narrative_textrl      s    ".  N #B777-+  G m4+        r    )rT   expected_element_classrT   rm   c                     t          j        | ddddd          }t          dd	          }t          j        ||
          }| |dd|          k    sJ d S )Nr2   r7   r8   r9   r>   rb   rA   rB   rC   rF   rg   rG   )r   ri   r
   r   rH   )rT   rm   rJ   r@   rK   s        r   Mtest_normalize_layout_element_layout_element_maps_to_appropriate_text_elementro      s     #.  N #B777-+  G ,,4+        r    )rT   expected_checkedFTrp   c                     t          j        | ddddd          }t          dd	          }t          j        ||
          }t          |t                    sJ |t          |d|          k    sJ d S )Nr2   r7   r8   r9    rb   rA   rB   rC   rF   rg   )checkedr?   r@   )r   ri   r
   r   rH   rZ   r   )rT   rp   rJ   r@   rK   s        r   'test_normalize_layout_element_checkablert      s     #.  N #B777-+  G gx(((((h 4+        r    c                      t          j        dddddd          } t          dd	
          }t          j        | |          }|t          dd|          t          dd|          t          dd|          gk    sJ d S )NListr2   r7   r8   r9   z61. I'm so cool! 2. You're cool too. 3. We're all cool!rb   rA   rB   rC   rF   I'm so cool!rg   rG   You're cool too.We're all cool!r   ri   r
   r   rH   r   rJ   r@   r(   s      r   -test_normalize_layout_element_enumerated_listr|     s    ".E  N #B777.+  H 8/	
 	
 	

 	#8/	
 	
 	

 	"8/	
 	
 	
      r    c                      t          j        dddddd          } t          dd	
          }t          j        | |          }|t          dd|          t          dd|          t          dd|          gk    sJ d S )Nrv   r2   r7   r8   r9   z3* I'm so cool! * You're cool too. * We're all cool!rb   rA   rB   rC   rF   rw   rg   rG   rx   ry   rz   r{   s      r   +test_normalize_layout_element_bulleted_listr~   ,  s    ".B  N #B777.+  H 8/	
 	
 	

 	#8/	
 	
 	

 	"8/	
 	
 	
      r    c                       e Zd Zd ZdS )MockRunOutputc                 0    || _         || _        || _        d S r   )
returncodestdoutstderr)r   r   r   r   s       r   r   zMockRunOutput.__init__O  s    $r    N)r)   r*   r+   r   r.   r    r   r   r   M  s#            r    r   c                     ddl m} d }|                     |d|           t          j        ddd           d	|j        v sJ d S )
Nr   )
subprocessc                  l    t          dd                                d                                          S )Nr2   zan error occurredzerror details)r   encode)argskwargss     r   mock_runz9test_convert_office_doc_captures_errors.<locals>.mock_runX  s-    Q 3 : : < <o>T>T>V>VWWWr    runzno-real.docxzfake-directorydocx)target_formatz4soffice failed to convert to format docx with code 1)$unstructured.partition.common.commonr   setattrr   convert_office_docr$   )monkeypatchcaplogr   r   s       r   'test_convert_office_doc_captures_errorsr   U  sr    ??????X X X 
E8444
n.>fUUUUAV[PPPPPPr    c                     d dD             } | D ]1}|                     d           |dz                      d           2t          d          t          d          5 }|                    t
          j        fd	| D                        d d d            n# 1 swxY w Y   t          j        d
 | D                       dk    sJ d S )Nc                 6    g | ]}t          j        |          S r.   pathlibPath.0paths     r   
<listcomp>zNtest_convert_office_docs_avoids_concurrent_call_to_soffice.<locals>.<listcomp>a  s"    ___DW\$''___r    )z
/tmp/proc1z
/tmp/proc2z
/tmp/proc3T)exist_oksimple.docx
missing_ok
simple.docr8   c                     g | ]}|fS r.   r.   r   r   file_to_converts     r   r   zNtest_convert_office_docs_avoids_concurrent_call_to_soffice.<locals>.<listcomp>h  s    0c0c0cT/41H0c0c0cr    c                 <    g | ]}|d z                                   S r   is_filer   s     r   r   zNtest_convert_office_docs_avoids_concurrent_call_to_soffice.<locals>.<listcomp>j  )    NNND=(1133NNNr    	mkdirunlinkr	   r   starmapr   r   npsumpaths_to_saver   poolr   s      @r   :test_convert_office_docs_avoids_concurrent_call_to_sofficer   `  s4   __4^___M 7 7

D
!!!		%%%6666&|44O	a eDV.0c0c0c0cUb0c0c0cddde e e e e e e e e e e e e e e 6NNNNNOOSTTTTTTTs    -BB Bc                     d dD             } | D ]2}|                     dd           |dz                      d           3t          d          t          d          5 }|                    t
          j        fd	| D                        d d d            n# 1 swxY w Y   t          j        d
 | D                       dk     sJ d S )Nc                 6    g | ]}t          j        |          S r.   r   r   s     r   r   zBtest_convert_office_docs_respects_wait_timeout.<locals>.<listcomp>n  s/       #T  r    )z/tmp/wait/proc1z/tmp/wait/proc2z/tmp/wait/proc3T)parentsr   r   r   r   r8   c                     g | ]	}|d ddf
S )r   Nr   r.   r   s     r   r   zBtest_convert_office_docs_respects_wait_timeout.<locals>.<listcomp>{  s$    PPP$otVT15PPPr    c                 <    g | ]}|d z                                   S r   r   r   s     r   r   zBtest_convert_office_docs_respects_wait_timeout.<locals>.<listcomp>  r   r    r   r   s      @r   .test_convert_office_docs_respects_wait_timeoutr   m  s;    '`  M  7 7

4$
///		%%%6666&|44O	a 
D% QPPP-PPP		
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 6NNNNNOORSSSSSSSs   !-BB!Br$   expected)u@   <table><tbody><tr><td>👨\U+1F3FB🔧</td></tr></tbody></table>T)z6<table><tbody><tr><td>Hello!</td></tr></tbody></table>Fc                 6    t          j        |           |u sJ d S r   )r   contains_emojir   s     r   test_contains_emojir     s&      &&(222222r    c                      t                      } t          j        | j        d                   }t	          |t
                    sJ d S )Nr   )r0   r   get_page_image_metadatar5   rZ   dict)docrX   s     r   2test_get_page_image_metadata_and_coordinate_systemr     s?    


C-cil;;Hh%%%%%%%r    z img/layout-parser-paper-fast.jpgc                    t          j        ddddd          t          j        ddd	d
d          g}d |D             }t          j        |           }t	          j        ||j                  }t          |          t          |          k    sJ d |D             t          j	        hk    sJ |j        \  }}t          ||          }t          ||          D ]0\  }}	|j        j        t          |	j        j        |          k    sJ 1d S )Ng     `d@g     \@g     @|@g      `@z(LayoutParser: A Unified Toolkit for Deep)r$   g     c@g     `@g     |@g     `b@z&Learning Based Document Image Analysisc                 f    g | ].}t          |j        |j        |j        t          j                   /S ))r%   r$   sourcer#   )r   r%   r$   r   r   UNCATEGORIZED_TEXT)r   rs     r   r   z-test_ocr_data_to_elements.<locals>.<listcomp>  sN         	8/		
 	
 	
  r    )ocr_data
image_sizec                     h | ]	}|j         
S r.   )category)r   els     r   	<setcomp>z,test_ocr_data_to_elements.<locals>.<setcomp>  s    +++BBK+++r    rC   )pointssystem)r   ri   r   openr   ocr_data_to_elementssizelenr   r   r
   ziprX   r?   r   r%   )
filenametext_regionsr   r   r(   image_widthimage_heightr@   r   	layout_els
             r   test_ocr_data_to_elementsr     so    	;	
 	
 	
 	9	
 	
 	
L     H Jx  E*:  H
 x==CMM))))++(+++0N/OOOOO !&
K"\JJJXx00 
 
I{&*=>-$+
 +
 +
 
 
 
 
 

 
r    )Br   multiprocessingr   numpyr   pytestPILr    unstructured_inference.inferencer   )unstructured_inference.inference.elementsr   .unstructured_inference.inference.layoutelementr   test_unstructured.unit_utilsr	   "unstructured.documents.coordinatesr
   unstructured.documents.elementsr   r   r   r   r   r   r   r   r   r   rR   unstructured.partition.commonr   
PageLayoutr   DocumentLayoutr0   rL   rS   markparametrizer[   r]   r`   rj   rl   itemsstrr#   ro   CHECK_BOX_UNCHECKEDCHECK_BOX_CHECKEDRADIO_BUTTON_UNCHECKEDRADIO_BUTTON_CHECKEDCHECKED	UNCHECKEDboolrt   r|   r~   r   r   r   r   r   r   r   r.   r    r   <module>r      sB                           3 3 3 3 3 3 @ @ @ @ @ @ H H H H H H 9 9 9 9 9 9 9 9 9 9 9 9                             1 0 0 0 0 0
 
 
 
 
V& 
 
 
>
 
 
 
 
. 
 
 
  &  $ 7	%	UA	q!	64 	 	. 	. 	.  $  $  *  * .""$$  J  	 0 (		(%0		&-		+U3		)40		d#		&
 
# QU   
 
,  B  B       Q Q Q
U 
U 
UT T T, SI 3 3 3& & & @AA-
 -
 -
 -
 -
 -
r    