
    NgN                    n   d dl mZ d dlmZmZ d dlmZmZmZm	Z	m
Z
 d dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZ d d
lmZmZmZmZmZmZm Z  dZ!e G d de                      Z"e G d de                      Z#ej$        ej%        fd6dZ&d7dZ'	 	 	 d8d9d'Z(d:d+Z)	 d;d<d1Z*d;d=d3Z+	 d;d>d5Z,dS )?    )annotations)	dataclassfield)Any
CollectionIterableListOptionalN)	TextBlock)	DataFrame)connected_components)inference_config)FULL_PAGE_REGION_THRESHOLDElementTypeSource)ImageTextRegion	Rectangle
TextRegionTextRegionscoords_intersectionsgrow_region_to_match_region)region_bounding_boxes_are_almost_the_samegHz>c                      e Zd ZU  ed           Zded<    ed           Zded<    ee          Zded<   d	 Z	ddZ
ddZedd            Zd Zedd            ZdS )LayoutElementsc                 *    t          j        g           S Nnparray     j/var/www/html/ai-engine/env/lib/python3.11/site-packages/unstructured_inference/inference/layoutelement.py<lambda>zLayoutElements.<lambda>    s    bhrll r!   )default_factory
np.ndarrayelement_probsc                 *    t          j        g           S r   r   r    r!   r"   r#   zLayoutElements.<lambda>!   s    "(2,, r!   element_class_idszdict[int, str]element_class_id_mapc           	         | j         j        d         }dD ]D}t          | |          j        dk    r)|r't	          | |t          j        d g|z                       E| j                            t                    | _        d S )Nr   )r&   r(   texts)
element_coordsshapegetattrsizesetattrr   r   r&   astypefloat)selfelement_sizeattrs      r"   __post_init__zLayoutElements.__post_init__$   s    *03C 	E 	EDtT""'1,,,dBHdVl-B$C$CDDD!/66u==r!   otherobjectreturnboolc                    t          t                    st          S t          j         j                   }t          j        j                   }t          j         j        j                  ot          j         j        j                  owt          j        ||          obt          j         j        |         j        |                   o7 fd j	        D             fdj	        D             k    o j
        j
        k    S )Nc                *    g | ]}j         |         S r    r)   ).0idxr3   s     r"   
<listcomp>z)LayoutElements.__eq__.<locals>.<listcomp>8   s!    RRRC*3/RRRr!   c                *    g | ]}j         |         S r    r=   )r>   r?   r7   s     r"   r@   z)LayoutElements.__eq__.<locals>.<listcomp>9   s!    WWWE.s3WWWr!   )
isinstancer   NotImplementedr   isnanr&   array_equalr,   r+   r(   source)r3   r7   mask
other_masks   ``  r"   __eq__zLayoutElements.__eq__,   s   %00 	"!!+,,,hu2333
N4.0DEE ,tz5;77,tZ00, t1$79LT9RSS,
 SRRR4;QRRRWWWWu?VWWWX, u|+
	
r!   c                    t          | j        |         | j        |         | j        | j        |         | j        |         | j                  S )z&slice and return only selected indices)r,   r+   rF   r&   r(   r)   )r   r,   r+   rF   r&   r(   r)   )r3   indicess     r"   slicezLayoutElements.slice>   sO    .w7*W%;,W5"4W=!%!:
 
 
 	
r!   groupsIterable[LayoutElements]c           	     D   g g g g g f\  }}}}}i }|D ]}|                     |j                   |                     |j                   |                     |j                   |                     |j                   |j        r|                     |j                   |j        r|                    |j                    | t          j	        |          t          j	        |          t          j	        |          t          j	        |          ||r|d         nd          S )zGconcatenate a sequence of LayoutElements in order as one LayoutElementsr   Nr,   r+   r&   r(   r)   rF   )
appendr,   r+   r&   r(   rF   r)   updater   concatenate)	clsrM   coordsr+   probs	class_idssourcesclass_id_mapgroups	            r"   rS   zLayoutElements.concatenateI   s,    46r2r23E0ui 	@ 	@EMM%.///LL%%%LL,---U4555| -u|,,,) @##E$>???s>&11.''.// nY77!-!(271::d
 
 
 	
r!   c                h      fdt           j         j         j         j                  D             S )z9return a list of LayoutElement for backward compatibilityc                    g | ]_\  \  }}}}}}}t                               ||||||j        rj        |         nd t          j        |          rd n|j                  `S )N)texttypeprobrF   )LayoutElementfrom_coordsr)   r   rD   rF   )	r>   x1y1x2y2r]   r_   class_idr3   s	           r"   r@   z*LayoutElements.as_list.<locals>.<listcomp>b   s     
 
 
 7 RR$h %%  +0I+ -h77Xd^^5TT{ &  
 
 
r!   )zipr,   r+   r&   r(   )r3   s   `r"   as_listzLayoutElements.as_list`   sQ    
 
 
 
 ;>#
"&	; ;
 
 
 	
r!   elementslistc                   t          |          }t          j        |dft                    }g }g }t          j        |fd          }t	          |          D ]v\  }}|j        j        |j        j        |j        j        |j        j	        g||<   |
                    |j                   |
                    |j                   |j        pd||<   wt          j        |d          \  }	}
d|	|	dk    <    | |t          j        |          t          j        |          |
t!          t#          t%          t          |	                    |	                    |r|d         j        nd	          S )
zicreate LayoutElements from a list of LayoutElement objects; the objects must have the
        same source   dtyper8   NoneT)return_inverseNr   rP   )lenr   emptyr2   	enumeratebboxrb   rc   rd   re   rQ   r]   r_   r^   uniquer   dictrg   rangerF   )rT   ri   len_elerU   r+   class_probsclass_typesielement
unique_idsrW   s              r"   	from_listzLayoutElements.from_listy   sX    h--7A,e444 hz:::#H-- 	4 	4JAw ',/7<?GLO\F1ILL&&&w|,,,$\3VKNN "	+d K K K
I+/
:'(s!(5//(;//'!%c%J*@*@*&M&M!N!N)0:8A;%%d
 
 
 	
r!   N)r7   r8   r9   r:   )r9   r   )rM   rN   r9   r   )ri   rj   )__name__
__module____qualname__r   r&   __annotations__r(   rv   r)   r6   rI   rL   classmethodrS   rh   r~   r    r!   r"   r   r      s          %6J6J K K KMKKKK$)E:N:N$O$O$OOOOO+05+F+F+FFFFF> > >
 
 
 
$	
 	
 	
 	
 
 
 
 [
,
 
 
2 
 
 
 [
 
 
r!   r   c                      e Zd ZU dZded<   dZded<   dZded<   dZded<   ddZe	dd            Z
e	dd            ZdS )r`   NzOptional[str]r^   zOptional[float]r_   
image_pathzOptional[LayoutElement]parentr9   rv   c                d    | j         dn| j         j        | j        | j        | j        | j        d}|S )z/Converts the class instance to dictionary form.N)coordinatesr]   r^   r_   rF   )rt   r   r]   r^   r_   rF   )r3   out_dicts     r"   to_dictzLayoutElement.to_dict   s>     $(9#444$):OIIIk
 
 r!   regionr   c                    t          |d          r|j        nd}t          |d          r|j        nd}t          |d          r|j        nd}t          |d          r|j        nd} | |||||j                  S )z%Create LayoutElement from superclass.r]   Nr^   r_   rF   )r]   rF   r^   r_   rt   )hasattrr]   r^   r_   rF   rt   )rT   r   r]   r^   r_   rF   s         r"   from_regionzLayoutElement.from_region   s     &ff55?v{{4%ff55?v{{4%ff55?v{{4")&(";";EsV$TTTTTr!   	textblockr   c           
         |j         \  }}}}|j        }|j        }|j        }|                     |||||t
          j        ||          S )z8Create LayoutElement from layoutparser TextBlock object.)r]   rF   r^   r_   )r   r]   r^   scorera   r   DETECTRON2_LP)	rT   r   rb   rc   rd   re   r]   r^   r_   s	            r"   from_lp_textblockzLayoutElement.from_lp_textblock   s_     #.BB~~'  	
 	
 		
r!   )r9   rv   )r   r   )r   r   )r   r   r   r^   r   r_   r   r   r   r   r   r   r    r!   r"   r`   r`      s         D D     $J$$$$&*F****	 	 	 	 U U U [U 
 
 
 [
 
 
r!   r`   inferred_layoutCollection[LayoutElement]extracted_layoutCollection[TextRegion]page_image_sizetuplesame_region_thresholdr2   subregion_thresholdr9   List[LayoutElement]c                   g }g |\  }}t          dd||          }|D ]}	t          |	t                    }
|
rt          |	j        |t
                    }|r8d}| D ]P}|j                            |	j                  r-t          |j        |	j        |          }|j                            |	j        |          }|j        t          j
        t          j        t          j        t          j        fv}|	j                            |j        |          }|p|}|rA|
r                    |           t          |j        |	j                   |	j        |_        d}|r&|r$|
rd}t          |j        |	j                   d}$|r*|j        t          j        k    r                    |           R|s|                    |	           d |D             }fd| D             }||z   }|S )z-Merge two layouts to produce a single layout.r   F)r   Tc           	         g | ]O}t          |j        t          |t                    rt          j        nt          j        |j        |j                   PS ))r]   r^   rF   rt   )	r`   r]   rB   r   r   IMAGEUNCATEGORIZED_TEXTrF   rt   )r>   els     r"   r@   z?merge_inferred_layout_with_extracted_layout.<locals>.<listcomp>  sj     - - -  	 b/224!! 39		
 		
 		
- - -r!   c                    g | ]}|v|	S r    r    )r>   r   inferred_regions_to_removes     r"   r@   z?merge_inferred_layout_with_extracted_layout.<locals>.<listcomp>"  s+       >X0X0X0X0X0Xr!   )r   rB   r   r   rt   r   
intersectsis_almost_subregion_ofr^   r   FIGUREr   
PAGE_BREAKTABLErQ   r   r]   )r   r   r   r   r   extracted_elements_to_addwhfull_page_regionextracted_regionextracted_is_imageis_full_page_imageregion_matchedinferred_region	same_bbox"inferred_is_subregion_of_extractedinferred_is_text"extracted_is_subregion_of_inferred#either_region_is_subregion_of_other%categorized_extracted_elements_to_addinferred_regions_to_addfinal_layoutr   s                         @r"   +merge_inferred_layout_with_extracted_layoutr      s    35!#DAq Aq!,,, A? A?'(8/JJ 	 "K % *" " " . 0	G 0	GO#../?/DEE .GE#($)) 	
 6E5I5`5`$)(; 6a 6 62 $3#7&%*%	@ $  6F5J5a5a#((; 6b 6 62
 7\:\ 4  G) .299/JJJJ 4O4HJZJ_```/?/D,)-7 G<L G) .). 4O4HJZJ_```)-7G',0AAA /55oFFF 	?%,,-=>>>- - ,- - -)   ,   9;RRLr!   region_ar   region_bc                    dd}|                      |          sdS | j        |j        k    r || |           dS  |||            dS )z9Reduce leftmost rectangle to don't overlap with the otherkeepr   reducec                   |j         | j         k    r|j        | j        k     r||j        | j        k    r0|j         | j         k    r | j        dz  |_        | j         dz  |_        d S |j        | j        k     r|j        | j         k     r| j         |_        d S | j         |_        d S |j        | j        k    r|j        | j        k     r| j        |_         d S |j        | j        k     r|j        | j        k     r| j        |_         d S | j        |_         d S )Ng)\(?)re   rb   rd   rc   r   r   s     r"   r   zseparate.<locals>.reduce.  s     9tw69tw#6#6y47""vy47':': GdN	 GdN	y47""vy47':': G	FIII y47""vy47':': G	y47""vy47':': G	FIIIr!   Nr   )r   r   r   r   )r   area)r   r   r   s      r"   separater   +  sx           8 x(( 3=8=((F222222F222222r!      cells
List[dict]nrowsintncolsr   c                   t          j        ||ft                    }| D ]}|d         }|d         }|d         |k    s|d         |k    rbt          j        t          |d         dz   |          t          |d         dz   |          ft                    }||d|d|f<   |}|j        \  }}|d         ||d         |d         f<   t          ||          S )	z>convert table-transformer's cells data into a pandas dataframerm   row_numscolumn_numsr   r   Nz	cell text)columns)r   rr   r8   maxr-   r   )	r   r   r   headerarrcellrowscolsnew_arrs	            r"   table_cells_to_dataframer   S  s     (E5>
0
0
0C 2 2JM"7etAw%//hDGaK 7 7T!Wq[%9P9PQY_```G&)GFUFFUFN#C9LE5 $[ 1DGT!WS&))))r!   regionsr   List[TextRegions]c           	        t          |           dk    rg S | j                                                            t                    }| j        | j        z
  t          j        z  }| j	        | j
        z
  t          j        z  }|dddfxx         |z  cc<   |dddfxx         |z  cc<   |dddfxx         |z  cc<   |dddfxx         |z  cc<   t          |          }t          |          \  }}g }t          |          D ]F}|                    |                     t#          j        ||k              d                              G|S )zPartitions regions into groups of regions based on proximity. Returns list of lists of
    regions, each list corresponding with a groupr   Nr         )rq   r,   copyr1   r2   re   rc   r   ELEMENTS_V_PADDING_COEFrd   rb   ELEMENTS_H_PADDING_COEFr   r   rw   rQ   rL   r   where)	r   padded_coordsv_padh_padintersection_mtxgroup_count
group_numsrM   rZ   s	            r"   partition_groups_from_regionsr   h  ss    7||q	*//1188??MZ'*$(8(PPEZ'*$(8(PPE!!!Q$5 !!!Q$5 !!!Q$5 !!!Q$5 +M::23CDDK "F{## G GgmmBHZ5-@$A$A!$DEEFFFFMr!         ?coords1r%   coords2	thresholdc                   t          j        | dd          \  }}}}t          j        |dd          \  }}}	}
t          j        |t          j        |                    }t          j        |t          j        |                    }t          j        |t          j        |	                    }t          j        |t          j        |
                    }t          j        ||z
  d          t          j        ||z
  d          z  S )zHcompute intersection area and own areas for two groups of bounding boxesrl   r   axisr   )r   splitmaximum	transposeminimum)r   r   r   x11y11x12y12x21y21x22y22xayaxbybs                  r"   !intersection_areas_between_coordsr     s     '11555Cc3'11555Cc3	Cc**	+	+B	Cc**	+	+B	Cc**	+	+B	Cc**	+	+B:rBw##bj"r'A&>&>>>r!   ri   c                   t          |           dk     r| S t          j        | j                   }| j        |         }t          ||          }| j        |         }|t          j        |t                    z  |k    ||j        k    z  }t          |           }t          j	        |t                    }d}	|dk    r|	dz   }
t          j        ||	|
df                   d         |	z   dz   }|                                sn]d||<   |t          |          dz   z  }t          j        ||
d                   d         }t          |          sn|d         |
z   }	|dk    ||         }t          j        |dddf                   }| j        | j        d}dD ],}t          | |          x}||         |         |         ||<   -t!          d	d||         i|}|S )
gAfter this function, the list of elements will not contain any element inside
    of the type specifiedr   rm   r   r   N)r)   rF   r(   r&   r+   r,   r    )rq   r   argsortareasr,   r   r   EPSILON_AREAT	ones_liker:   r   sumr)   rF   r.   r   )ri   r   sorted_by_areasorted_coordsself_intersectionr  r   n_candidatesrG   current_candidateplus_oneremoveremaining_candidatesfinal_coordssorted_by_y1final_attrsr5   original_attrfinal_elementss                      r"   clean_layoutelementsr    s&    8}}qZ00N+N;M :-WWN>*E 	BJul;;;>QQ	%'	 x==L<T***D


$q(H+,=xyy,HIJJ1M  	 zz|| 	VFa'!xXYY88;'(( 	03h>% 

( !&L:l111a4011L !) =/# #K @ N N$Xt444M=).9$?MD#]]<3M]Q\]]Nr!   element_classc                j   t          j        | j                   }| j        |         }| j        |         |k    }|                                }|dk    s|t          |           k    r| S ||         }||          }t          ||          }	| j        |         |         }
|	t          j        |
t                    z  |k    |
|
j
        k    z  }|}t          j        |
t                    }d}|dk    r|dz   }t          j        |||df                   d         |z   dz   }|                                sn]d||<   |t          |          dz   z  }t          j        ||d                   d         }t          |          sn|d         |z   }|dk    ||         }t          ||          }| j        |         |          }|t          j        |t                    z  |k    |                    d          |
|         j
        k    z  }|                    d                              t                     }t          j        ||         ||         g          }d| j        i}dD ]G}t%          | |          x}t          j        ||         |         ||          |         f          ||<   Ht)          d
d	|i|}|S )r   r   rm   r   N)r   r   r)   r   r,   r    )r   r  r  r,   r(   r  rq   r   r   r  r  r  r:   r   reshaper1   vstackr)   r.   rS   r   )ri   r  r   r  r  target_indices
len_targettarget_coordsother_coordstarget_self_intersectiontarget_areasr   r
  rG   r  r  r  r  target_coords_to_keepother_to_target_intersectionother_areas#other_is_almost_subregion_of_targetrH   r  r  r5   r  r  s                               r"   clean_layoutelements_for_classr$    s    Z00N+N;M/?=PN  ##%%JQ*H55!.1M .1L  AP]^^>.1.AL 	!2:lL#I#IIL__		') L<D111D


$q(H+,=xyy,HIJJ1M  	 zz|| 	VFa'!xXYY88;'(( 	03h>% 

( *$/#D$ $ 
 .0.AK$rz+|'L'LLObb			W	%	%d);)=	=+?' 699q9AAHHNNNJ9mD1<
3KLMML#98;X"YK? 
 
$Xt444M=N>*40-2PQ[2\]
 
D $OO<O;OONr!   )r   r   r   r   r   r   r   r2   r   r2   r9   r   )r   r   r   r   )r   r   N)r   r   r   r   r   r   r9   r   )r   r   r9   r   )r   )r   r%   r   r%   r   r2   )ri   r   r   r2   )ri   r   r  r   r   r2   )-
__future__r   dataclassesr   r   typingr   r   r   r	   r
   numpyr   layoutparser.elements.layoutr   pandasr   scipy.sparse.csgraphr   unstructured_inference.configr    unstructured_inference.constantsr   r   r   )unstructured_inference.inference.elementsr   r   r   r   r   r   r   r  r   r`   LAYOUT_SAME_REGION_THRESHOLDLAYOUT_SUBREGION_THRESHOLDr   r   r   r   r   r  r$  r    r!   r"   <module>r1     s   " " " " " " ( ( ( ( ( ( ( ( < < < < < < < < < < < < < <     2 2 2 2 2 2       5 5 5 5 5 5 : : : : : :         
                   v
 v
 v
 v
 v
[ v
 v
 v
r *
 *
 *
 *
 *
J *
 *
 *
b $4#P!1!Lb b b b bJ%3 %3 %3 %3T 	* * * * **   4 ? ? ? ? ?"5 5 5 5 5v "%K K K K K K Kr!   