
    Ng                     Z   d dl mZmZmZmZmZ d dlmZmZ d dl	Z
d dlmZ d dlmZ ddlmZmZ 	 	 ddee         d	eeegef         d
eee         gef         dedee         f
dZ	 ddee         dededee         fdZ	 ddee         dedeee         eee                  f         fdZdS )    )ListUnionAnyCallableIterable)partialreduceN)
csr_matrix)connected_components   )BaseLayoutElement	TextBlocksequencescoring_funcaggregation_funcdefault_score_valuereturnc                     |d }t                     }t          j        ||f          |z  }t          |          D ]9}t          |dz   |          D ]#} | |          |                   ||         |<   $:t	          |          }t          |dd          \  }	}
g }t          |	          D ]J}t          j        |
|k              d         }|                     | fd|D                                  K|S )	a  Perform connected componenet analysis for any 1D sequence based on
    the scoring function and the aggregation function.
    It will generate the adjacency_matrix for the 1D sequence object using
    the provided `scoring_func` and find the connected componenets.
    The `aggregation_func` will be used to aggregate all elements within
    identified components (when not set, it will be the identity function).

    Args:
        sequence (List[Any]):
            The provided 1D sequence of objects.
        scoring_func (Callable[[Any, Any], int]):
            The scoring function used to construct the adjacency_matrix.
            It should take two objects in the sequence and produe a integer.
        aggregation_func (Callable[[List[Any]], Any], optional):
            The function used to aggregate the elements within an identified
            component.
            Defaults to the identify function: `lambda x: x`.
        default_score_value (int, optional):
            Used to set the default (background) score values that should be
            not considered when running connected component analysis.
            Defaults to 0.

    Returns:
        List[Any]: A list of length n - the number of the detected componenets.
    Nc                     | S N )xs    _/var/www/html/ai-engine/env/lib/python3.11/site-packages/layoutparser/tools/shape_operations.py<lambda>z=generalized_connected_component_analysis_1d.<locals>.<lambda>:   s    Q        FT)csgraphdirectedreturn_labelsr   c                      g | ]
}|         S r   r   ).0ir   s     r   
<listcomp>z?generalized_connected_component_analysis_1d.<locals>.<listcomp>K   s    1S1S1S!(1+1S1S1Sr   )lennponesranger
   r   whereappend)r   r   r   r   seq_lenadjacency_matrixr"   jgraphn_componentslabelsgrouped_sequencecomp_idxelement_idxs   `             r   +generalized_connected_component_analysis_1dr3      sE   @ &;(mmGw1225HH7^^ L Lq1ug&& 	L 	LA%1\(1+x{%K%KQ""	L '((E/T  L& ,'' V Vhv12215 0 01S1S1S1S{1S1S1S T TUUUUr   
   layoutx_tolerancey_tolerancec                 X     d }t           t          |||           fd          }|S )aF  Perform line detection based on connected component analysis.

    The is_line_wise_close is the scoring function, which returns True
    if the y-difference is smaller than the y_tolerance AND the
    x-difference (the horizontal gap between two boxes) is also smaller
    than the x_tolerance, and False otherwise.

    All the detected components will then be passed into aggregation_func,
    which returns the overall union box of all the elements, or the line
    box.

    Args:
        layout (Iterable):
            A list (or Layout) of BaseLayoutElement
        x_tolerance (int, optional):
            The value used for specifying the maximum allowed y-difference
            when considered whether two tokens are from the same line.
            Defaults to 10.
        y_tolerance (int, optional):
            The value used for specifying the maximum allowed horizontal gap
            when considered whether two tokens are from the same line.
            Defaults to 10.

    Returns:
        List[BaseLayoutElement]: A list of BaseLayoutElement, denoting the line boxes.
    c                 >   | j         j        d         }|j         j        d         }| j         j        dd d         \  }}|j         j        dd d         \  }}	t          ||z
            |k    o3t	          t          ||	z
            t          ||z
                      |k    S )Nr   r   r   )blockcentercoordinatesabsmin)
token_atoken_br6   r7   y_ay_ba_lefta_rightb_leftb_rights
             r   is_line_wise_closez1simple_line_detection.<locals>.is_line_wise_closen   s    m"1%m"1%!-3ADqD9!-3ADqD9 c	NNk) QC())3w/?+@+@AA[P	
r   )r7   r6   c                 D    t          d         j        j        |           S Nr   r	   	__class__unionseqr5   s    r   r   z'simple_line_detection.<locals>.<lambda>   s    VF1I,?,Es%K%K r   r   r   )r3   r   )r5   r6   r7   rG   detected_liness   `    r   simple_line_detectionrQ   P   s\    <

 

 

  AK[
 
 
 LKKK  N r   Tunion_groupc                 B     |r fd}nd}t           d |          }|S )a0  Group textblocks based on their category (block.type).

    Args:
        layout (Iterable):
            A list (or Layout) of BaseLayoutElement
        union_group (bool):
            Whether to union the boxes within each group.
            Defaults to True.

    Returns:
        List[TextBlock]: When `union_group=True`, it produces a list of
            TextBlocks, denoting the boundaries of each texblock group.
        List[List[TextBlock]]: When `union_group=False`, it preserves
            the elements within each group for further processing.
    c                 D    t          d         j        j        |           S rI   rJ   rM   s    r   r   z4group_textblocks_based_on_category.<locals>.<lambda>   s    vfQi.A.G'M'M r   Nc                 "    | j         |j         k    S r   )type)abs     r   r   z4group_textblocks_based_on_category.<locals>.<lambda>   s    !&AF"2 r   rO   )r3   )r5   rR   r   detected_group_boxess   `   r   "group_textblocks_based_on_categoryrZ      sN    &   MMMMF22)    r   rI   )r4   r4   )T)typingr   r   r   r   r   	functoolsr   r	   numpyr%   scipy.sparser
   scipy.sparse.csgraphr   elementsr   r   intr3   rQ   boolrZ   r   r   r   <module>rc      s   8 7 7 7 7 7 7 7 7 7 7 7 7 7 % % % % % % % %     # # # # # # 5 5 5 5 5 5 3 3 3 3 3 3 3 3 48 	4 43i4C:s?+4 S	{C/04 	4
 
#Y4 4 4 4p TV6 6&'6696MP6	
6 6 6 6t 6:   Y .2 
4	?Di112           r   