
    Ng_                        d dl Z d dlZd dlmZmZmZmZmZmZm	Z	m
Z
mZmZmZmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z  ddlm!Z! ddlm"Z" ddlm#Z# ddlm$Z$ ddlm%Z%  ej&        e'          Z( G d d          Z) G d d          Z* G d d          Z+ G d d          Z, G d de+          Z- G d d e-          Z. G d! d"e.          Z/ G d# d$e.          Z0 G d% d&e-          Z1 G d' d(e+e,          Z2 G d) d*e-e,          Z3 ed+e+,          Z4 G d- d.e-ee4                   Z5 G d/ d0e5e4                   Z6 G d1 d2e6e4         e,          Z7ee3e2f         Z8 G d3 d4e7e8                   Z9 G d5 d6e9          Z: G d7 d8e9          Z; G d9 d:e7e9                   Z< G d; d<e<          Z= G d= d>e<          Z>ee<d?f         Z? G d@ d?e7e?                   Z@ G dA dBe@          ZA G dC dDe@          ZB G dE dFe5e-                   ZC G dG dHeC          ZD G dI dJeC          ZEdS )K    N)DictGenericIterableIteratorListOptionalSequenceSetTupleTypeVarUnioncast   )PDFColorSpace)PDFFont)Color)PDFGraphicState)	PDFStream)INFPathSegment)LTComponentT)Matrix)Plane)Point)Rectapply_matrix_pt)bbox2str)fsplit)	get_bound)
matrix2str)uniqc                   (    e Zd Zd	deddfdZd
dZdS )IndexAssignerr   indexreturnNc                     || _         d S Nr%   )selfr%   s     K/var/www/html/ai-engine/env/lib/python3.11/site-packages/pdfminer/layout.py__init__zIndexAssigner.__init__(   s    


    objLTItemc                     t          |t                    r| j        |_        | xj        dz  c_        d S t          |t                    r|D ]}|                     |           d S d S Nr   )
isinstance	LTTextBoxr%   LTTextGrouprun)r*   r.   xs      r+   r5   zIndexAssigner.run+   sv    c9%% 	
CIJJ!OJJJJ[)) 	  	 	 r-   r   )r.   r/   r&   N)__name__
__module____qualname__intr,   r5    r-   r+   r$   r$   '   sO         c $         r-   r$   c                   j    e Zd ZdZ	 	 	 	 	 	 	 ddededed	ed
ee         dededdfdZddZde	fdZ
dS )LAParamsa  Parameters for layout analysis

    :param line_overlap: If two characters have more overlap than this they
        are considered to be on the same line. The overlap is specified
        relative to the minimum height of both characters.
    :param char_margin: If two characters are closer together than this
        margin they are considered part of the same line. The margin is
        specified relative to the width of the character.
    :param word_margin: If two characters on the same line are further apart
        than this margin then they are considered to be two separate words, and
        an intermediate space will be added for readability. The margin is
        specified relative to the width of the character.
    :param line_margin: If two lines are are close together they are
        considered to be part of the same paragraph. The margin is
        specified relative to the height of a line.
    :param boxes_flow: Specifies how much a horizontal and vertical position
        of a text matters when determining the order of text boxes. The value
        should be within the range of -1.0 (only horizontal position
        matters) to +1.0 (only vertical position matters). You can also pass
        `None` to disable advanced layout analysis, and instead return text
        based on the position of the bottom left corner of the text box.
    :param detect_vertical: If vertical text should be considered during
        layout analysis
    :param all_texts: If layout analysis should be performed on text in
        figures.
          ?       @皙?Fline_overlapchar_marginline_marginword_margin
boxes_flowdetect_vertical	all_textsr&   Nc                     || _         || _        || _        || _        || _        || _        || _        |                                  d S r(   )rB   rC   rD   rE   rF   rG   rH   	_validate)r*   rB   rC   rD   rE   rF   rG   rH   s           r+   r,   zLAParams.__init__P   sP     )&&&$."r-   c                     | j         hd}t          | j         t                    s)t          | j         t                    st	          |          d| j         cxk    rdk    sn t          |          d S d S )Nz@LAParam boxes_flow should be None, or a number between -1 and +1r   )rF   r2   r;   float	TypeError
ValueError)r*   boxes_flow_err_msgs     r+   rJ   zLAParams._validated   s    ?&U  4?C0044>tPU4V4V4   2333----A---- !3444 '& .-r-   c                 <    d| j         | j        | j        | j        fz  S )NzM<LAParams: char_margin=%.1f, line_margin=%.1f, word_margin=%.1f all_texts=%r>)rC   rD   rE   rH   r*   s    r+   __repr__zLAParams.__repr__p   s)    -!143CT^TU	
r-   )r?   r@   r?   rA   r?   FFr&   N)r8   r9   r:   __doc__rM   r   boolr,   rJ   strrS   r<   r-   r+   r>   r>   4   s         : "   &) %   	
  UO   
   (
5 
5 
5 
5
# 
 
 
 
 
 
r-   r>   c                   "    e Zd ZdZdeddfdZdS )r/   z)Interface for things that can be analyzedlaparamsr&   Nc                     dS )zPerform the layout analysis.Nr<   r*   rY   s     r+   analyzezLTItem.analyze{   s    r-   )r8   r9   r:   rU   r>   r\   r<   r-   r+   r/   r/   x   s<        33 T      r-   r/   c                   *    e Zd ZdZdefdZdefdZdS )LTTextz#Interface for things that have textr&   c                 L    d| j         j        d|                                 dS N< >)	__class__r8   get_textrR   s    r+   rS   zLTText.__repr__   s&      N333T]]____EEr-   c                     t           )zText contained in this objectNotImplementedErrorrR   s    r+   re   zLTText.get_text   s    !!r-   N)r8   r9   r:   rU   rW   rS   re   r<   r-   r+   r^   r^      sU        --F# F F F F"# " " " " " "r-   r^   c                       e Zd ZdZdeddfdZdefdZdede	fdZ
dede	fd	Zdede	fd
Zdede	fdZdeddfdZde	fdZdd de	fdZdd defdZdd defdZdd de	fdZdd defdZdd defdZdS )LTComponentzObject with a bounding boxbboxr&   Nc                 d    t                               |            |                     |           d S r(   )r/   r,   set_bboxr*   rk   s     r+   r,   zLTComponent.__init__   s,    dr-   c                 L    d| j         j        dt          | j                  dS r`   )rd   r8   r   rk   rR   s    r+   rS   zLTComponent.__repr__   s*      N333Xdi5H5H5H5HIIr-   _c                     t           r(   rO   r*   rp   s     r+   __lt__zLTComponent.__lt__       r-   c                     t           r(   rr   rs   s     r+   __le__zLTComponent.__le__   ru   r-   c                     t           r(   rr   rs   s     r+   __gt__zLTComponent.__gt__   ru   r-   c                     t           r(   rr   rs   s     r+   __ge__zLTComponent.__ge__   ru   r-   c                     |\  }}}}|| _         || _        || _        || _        ||z
  | _        ||z
  | _        || _        d S r(   )x0y0x1y1widthheightrk   )r*   rk   r}   r~   r   r   s         r+   rm   zLTComponent.set_bbox   sM    RR"W
2g			r-   c                 .    | j         dk    p
| j        dk    S Nr   )r   r   rR   s    r+   is_emptyzLTComponent.is_empty   s    zQ2$+"22r-   r.   c                     t          |t                    s$J t          t          |                                |j        | j        k    o| j        |j        k    S r(   )r2   rj   rW   typer}   r   r*   r.   s     r+   is_hoverlapzLTComponent.is_hoverlap   I    #{++;;Sc^^;;;v 6TW%66r-   c                 *   t          |t                    s$J t          t          |                                |                     |          rdS t          t          | j        |j        z
            t          | j        |j        z
                      S r   	r2   rj   rW   r   r   minabsr}   r   r   s     r+   	hdistancezLTComponent.hdistance   z    #{++;;Sc^^;;;C   	E1s47SV+,,c$'CF2B.C.CDDDr-   c                 *   t          |t                    s$J t          t          |                                |                     |          rDt          t          | j        |j        z
            t          | j        |j        z
                      S dS r   r   r   s     r+   hoverlapzLTComponent.hoverlap   y    #{++;;Sc^^;;;C   	s47SV+,,c$'CF2B.C.CDDD1r-   c                     t          |t                    s$J t          t          |                                |j        | j        k    o| j        |j        k    S r(   )r2   rj   rW   r   r~   r   r   s     r+   is_voverlapzLTComponent.is_voverlap   r   r-   c                 *   t          |t                    s$J t          t          |                                |                     |          rdS t          t          | j        |j        z
            t          | j        |j        z
                      S r   	r2   rj   rW   r   r   r   r   r~   r   r   s     r+   	vdistancezLTComponent.vdistance   r   r-   c                 *   t          |t                    s$J t          t          |                                |                     |          rDt          t          | j        |j        z
            t          | j        |j        z
                      S dS r   r   r   s     r+   voverlapzLTComponent.voverlap   r   r-   )r8   r9   r:   rU   r   r,   rW   rS   objectrV   rt   rw   ry   r{   rm   r   r   rM   r   r   r   r   r   r<   r-   r+   rj   rj      s       $$T d    J# J J J J 4     4     4     4    T d    3$ 3 3 3 37} 7 7 7 7 7E] Eu E E E EM e    7} 7 7 7 7 7E] Eu E E E EM e      r-   rj   c                       e Zd ZdZ	 	 	 	 	 	 	 ddedee         dededed	ee	         d
ee	         deee
                  deeeef                  ddfdZdefdZdS )LTCurvez
    A generic Bezier curve

    The parameter `original_path` contains the original
    pathing information from the pdf (e.g. for reconstructing Bezier Curves).

    `dashing_style` contains the Dashing information if any.
    FN	linewidthptsstrokefillevenoddstroking_colornon_stroking_colororiginal_pathdashing_styler&   c
                     t                               | t          |                     || _        || _        || _        || _        || _        || _        || _	        || _
        |	| _        d S r(   )rj   r,   r    r   r   r   r   r   r   r   r   r   )
r*   r   r   r   r   r   r   r   r   r   s
             r+   r,   zLTCurve.__init__   si     	T9S>>222"	,"4**r-   c                 J    d                     d | j        D                       S )N,c              3       K   | ]	}d |z  V  
dS )z	%.3f,%.3fNr<   ).0ps     r+   	<genexpr>z"LTCurve.get_pts.<locals>.<genexpr>   s&      ::Aa::::::r-   )joinr   rR   s    r+   get_ptszLTCurve.get_pts   s%    xx::::::::r-   FFFNNNN)r8   r9   r:   rU   rM   r   r   rV   r   r   r   r   r   r,   rW   r   r<   r-   r+   r   r      s          *..2599=+ ++ %[+ 	+
 + + !+ %UO+  [ 12+  ffn 56+ 
+ + + +.; ; ; ; ; ; ;r-   r   c                       e Zd ZdZ	 	 	 	 	 	 	 ddededededed	ed
ee         dee         dee	e
                  deeeef                  ddfdZdS )LTLinezOA single straight line.

    Could be used for separating text or figures.
    FNr   p0p1r   r   r   r   r   r   r   r&   c                 P    t                               | |||g||||||	|

  
         d S r(   r   r,   )r*   r   r   r   r   r   r   r   r   r   r   s              r+   r,   zLTLine.__init__   sH     	H	
 	
 	
 	
 	
r-   r   )r8   r9   r:   rU   rM   r   rV   r   r   r   r   r   r   r,   r<   r-   r+   r   r      s          *..2599=
 

 
 	

 
 
 
 !
 %UO
  [ 12
  ffn 56
 

 
 
 
 
 
r-   r   c                       e Zd ZdZ	 	 	 	 	 	 	 ddededededed	ee         d
ee         dee	e
                  deeeef                  ddfdZdS )LTRectzMA rectangle.

    Could be used for framing another pictures or figures.
    FNr   rk   r   r   r   r   r   r   r   r&   c
                 r    |\  }
}}}t                               | ||
|f||f||f|
|fg|||||||	
  
         d S r(   r   )r*   r   rk   r   r   r   r   r   r   r   r}   r~   r   r   s                 r+   r,   zLTRect.__init__   sk      RR"XBx"bB84	
 	
 	
 	
 	
r-   r   )r8   r9   r:   rU   rM   r   rV   r   r   r   r   r   r   r,   r<   r-   r+   r   r     s          *..2599=
 

 
 	

 
 
 !
 %UO
  [ 12
  ffn 56
 

 
 
 
 
 
r-   r   c                   6    e Zd ZdZdedededdfdZdefdZdS )	LTImagezKAn image object.

    Embedded images can be in JPEG, Bitmap or JBIG2.
    namestreamrk   r&   Nc                    t                               | |           || _        || _        |                    d          |                    d          f| _        |                    d          | _        |                    dd          | _        |                    d          | _        t          | j        t                    s| j        g| _        d S d S )N)WWidth)HHeight)IM	ImageMask)BPCBitsPerComponentr   )CS
ColorSpace)rj   r,   r   r   get_anysrcsize	imagemaskbits
colorspacer2   list)r*   r   r   rk   s       r+   r,   zLTImage.__init__A  s    T4(((	~668W8WX(;<<NN#>BB	 ..)=>>$/400 	0#/DOOO	0 	0r-   c           	      l    d| j         j        d| j        dt          | j                  d| j        d	S Nra   () rb   rc   )rd   r8   r   r   rk   r   rR   s    r+   rS   zLTImage.__repr__L  sA     N###IIITYLLL	
 	
r-   )	r8   r9   r:   rU   rW   r   r   r,   rS   r<   r-   r+   r   r   ;  si         
	0S 	0) 	04 	0D 	0 	0 	0 	0
# 
 
 
 
 
 
r-   r   c                   .    e Zd ZdZdeddfdZdefdZdS )LTAnnoa  Actual letter in the text as a Unicode string.

    Note that, while a LTChar object has actual boundaries, LTAnno objects does
    not, as these are "virtual" characters, inserted by a layout analyzer
    according to the relationship between two characters (e.g. a space).
    textr&   Nc                     || _         d S r(   _text)r*   r   s     r+   r,   zLTAnno.__init__]  s    
r-   c                     | j         S r(   r   rR   s    r+   re   zLTAnno.get_texta  
    zr-   )r8   r9   r:   rU   rW   r,   re   r<   r-   r+   r   r   U  s[         S T    #      r-   r   c                       e Zd ZdZdededededededed	eee	e
e         ef         f         d
ededdfdZdefdZdefdZdedefdZdS )LTCharz.Actual letter in the text as a Unicode string.matrixfontfontsizescalingriser   	textwidthtextdispncsgraphicstater&   Nc                 <   t                               |            || _        || _        |j        | _        |	| _        |
| _        ||z  |z  | _        |                                rSt          |t                    sJ |\  }}||dz  }n||z  dz  }d|z
  |z  dz  }| ||z   | j        z   f}| |z   ||z   f}n-|                                |z  }d||z   f}| j        ||z   |z   f}| j        \  }}}}}}d||z  |z  k     o||z  dk    | _        t          | j        |          \  }}t          | j        |          \  }}||k     r||}}||k     r||}}t                              | ||||f           |                                r| j        | _        n| j        | _        d S )Nr?   gMbP?i  r   )r^   r,   r   r   fontnamer   r   advis_verticalr2   tupleget_descentuprightr   rj   r   sizer   )r*   r   r   r   r   r   r   r   r   r   r   vxvybbox_lower_leftbbox_upper_rightdescentabcdefr}   r~   r   r   s                             r+   r,   zLTChar.__init__h  s    	
(x''1 	Eh.....HRz^(]U*)x'%/B "sBI$89O!#hT	: &&((83G 'D.1O $'D.8*CD![Aq!Q1q57?*9q1uz"4;@@R"4;0@AAR77BR77BRTBB#3444 	$
DIIDIr-   c                     d| j         j        dt          | j                  dt	          | j                  d| j        d| j        d|                                 dS )Nra   rb    matrix=z font=z adv=z text=rc   )	rd   r8   r   rk   r!   r   r   r   re   rR   s    r+   rS   zLTChar.__repr__  sc     N###TYt{####MMMHHHMMOOOO
 	
r-   c                     | j         S r(   r   rR   s    r+   re   zLTChar.get_text  r   r-   r.   c                     dS )z<Returns True if two characters can coexist in the same line.Tr<   r   s     r+   is_compatiblezLTChar.is_compatible  s    tr-   )r8   r9   r:   rU   r   r   rM   rW   r   r   r   r   r   r,   rS   re   r   rV   r   r<   r-   r+   r   r   e  s       8822 2 	2
 2 2 2 2 uXe_e%;<<=2 2 &2 
2 2 2 2h
# 
 
 
 
#     D      r-   r   LTItemT)boundc                       e Zd ZdZdeddfdZdee         fdZde	fdZ
deddfd	Zd
ee         ddfdZdeddfdZdS )LTContainerz(Object that can be extended and analyzedrk   r&   Nc                 J    t                               | |           g | _        d S r(   )rj   r,   _objsrn   s     r+   r,   zLTContainer.__init__  s$    T4((($&
r-   c                 *    t          | j                  S r(   )iterr  rR   s    r+   __iter__zLTContainer.__iter__  s    DJr-   c                 *    t          | j                  S r(   )lenr  rR   s    r+   __len__zLTContainer.__len__  s    4:r-   r.   c                 :    | j                             |           d S r(   )r  appendr   s     r+   addzLTContainer.add  s    
#r-   objsc                 :    |D ]}|                      |           d S r(   )r  )r*   r  r.   s      r+   extendzLTContainer.extend  s'     	 	CHHSMMMMr-   rY   c                 D    | j         D ]}|                    |           d S r(   )r  r\   r*   rY   r.   s      r+   r\   zLTContainer.analyze  s-    : 	" 	"CKK!!!!r-   )r8   r9   r:   rU   r   r,   r   r   r  r;   r	  r  r   r  r>   r\   r<   r-   r+   r  r    s        22T d    
 (7+            w 4    8G,     
 T      r-   r  c                   &    e Zd ZddZdeddfdZdS )LTExpandableContainerr&   Nc                 t    t                               | t          
 t          
 t           t           f           d S r(   )r  r,   r   rR   s    r+   r,   zLTExpandableContainer.__init__  s.    TSD3$sd#;<<<r-   r.   c           	      T   t                               | t          t          |                     |                     t          | j        |j                  t          | j        |j                  t          | j	        |j	                  t          | j
        |j
                  f           d S r(   )r  r  r   r   rm   r   r}   r~   maxr   r   r   s     r+   r  zLTExpandableContainer.add  s    d7C00111DGSV$$DGSV$$DGSV$$DGSV$$		
 	
 	
 	r-   rT   )r8   r9   r:   r,   rj   r  r<   r-   r+   r  r    sJ           
{ 
t 
 
 
 
 
 
r-   r  c                   "    e Zd ZddZdefdZdS )LTTextContainerr&   Nc                 n    t                               |            t                              |            d S r(   )r^   r,   r  rR   s    r+   r,   zLTTextContainer.__init__  s-    &&t,,,r-   c                 @    d                     d | D                       S )N c              3      K   | ]@}t          |t                    t          t          |                                          V  Ad S r(   )r2   r^   r   re   )r   r.   s     r+   r   z+LTTextContainer.get_text.<locals>.<genexpr>  s[       
 
-0JsF<S<S
&&((
 
 
 
 
 
r-   )r   rR   s    r+   re   zLTTextContainer.get_text  s4    ww 
 
48
 
 
 
 
 	
r-   rT   )r8   r9   r:   r,   rW   re   r<   r-   r+   r  r    sC           

# 
 
 
 
 
 
r-   r  c                        e Zd ZdZdeddf fdZdefdZdeddfdZ	d	e
e         d
eded          fdZdef fdZ xZS )
LTTextLinezContains a list of LTChar objects that represent a single text line.

    The characters are aligned either horizontally or vertically, depending on
    the text's writing mode.
    rE   r&   Nc                 V    t                                                       || _        d S r(   )superr,   rE   )r*   rE   rd   s     r+   r,   zLTTextLine.__init__  s&    &r-   c                 v    d| j         j        dt          | j                  d|                                 dS r`   )rd   r8   r   rk   re   rR   s    r+   rS   zLTTextLine.__repr__  s>     N###TYMMOOOO
 	
r-   rY   c                     | j         D ]}|                    |           t                              | t	          d                     d S )N
)r  r\   r  r  r   r  s      r+   r\   zLTTextLine.analyze  sG    : 	" 	"CKK!!!!fTll+++r-   planeratioc                     t           r(   rg   )r*   r$  r%  s      r+   find_neighborszLTTextLine.find_neighbors
  s
     "!r-   c                     t                                                      p%|                                                                 S r(   )r   r   re   isspace)r*   rd   s    r+   r   zLTTextLine.is_empty  s2    ww!!>T]]__%<%<%>%>>r-   )r8   r9   r:   rU   rM   r,   rW   rS   r>   r\   r   r   r   r'  rV   r   __classcell__rd   s   @r+   r  r    s         E d      

# 
 
 
 
 T    "<("16"	l	" " " "
?$ ? ? ? ? ? ? ? ? ? ?r-   r  c                        e Zd ZdeddfdZdeddf fdZdee         dede	e
         fd	ZddededefdZddededefdZ	 ddededefdZddededefdZ xZS )LTTextLineHorizontalrE   r&   Nc                 V    t                               | |           t          
 | _        d S r(   )r  r,   r   _x1r*   rE   s     r+   r,   zLTTextLineHorizontal.__init__  &    D+...$r-   r.   c                 T   t          |t                    rd| j        r]| j        t          |j        |j                  z  }| j        |j        |z
  k     r(t          	                    | t          d                     |j        | _        t                      	                    |           d S Nrb   )r2   r   rE   r  r   r   r/  r}   r  r  r   r   r   r*   r.   marginrd   s      r+   r  zLTTextLineHorizontal.add  s    c6"" 	3t'7 	3%CIsz(B(BBFx#&6/))fSkk2226Cr-   r$  r%  c                      | j         z  |                     j         j        z
   j         j        z   f          } fd|D             S )aK  
        Finds neighboring LTTextLineHorizontals in the plane.

        Returns a list of other LTTestLineHorizontals in the plane which are
        close to self. "Close" can be controlled by ratio. The returned objects
        will be the same height as self, and also either left-, right-, or
        centrally-aligned.
        c                     g | ]u}t          |t                                        |           .                    |           s.                    |           s                    |           s|vS )	tolerance)r2   r-  _is_same_height_as_is_left_aligned_with_is_right_aligned_with_is_centrally_aligned_withr   r.   r   r*   s     r+   
<listcomp>z7LTTextLineHorizontal.find_neighbors.<locals>.<listcomp>1  s     
 
 
3 455	

 ++C1+==
 ..sa.@@
 223!2DD
 66sa6HH

 
 
r-   )r   findr}   r~   r   r   r*   r$  r%  r  r   s   `   @r+   r'  z#LTTextLineHorizontal.find_neighbors$  sl     DKzz47DGaK$'A+FGG
 
 
 
 

 
 
 	
r-   r   otherr9  c                 B    t          |j        | j        z
            |k    S )zN
        Whether the left-hand edge of `other` is within `tolerance`.
        )r   r}   r*   rB  r9  s      r+   r;  z*LTTextLineHorizontal._is_left_aligned_with?        58dg%&&)33r-   c                 B    t          |j        | j        z
            |k    S )zO
        Whether the right-hand edge of `other` is within `tolerance`.
        )r   r   rD  s      r+   r<  z+LTTextLineHorizontal._is_right_aligned_withE  rE  r-   c                 n    t          |j        |j        z   dz  | j        | j        z   dz  z
            |k    S )zQ
        Whether the horizontal center of `other` is within `tolerance`.
           )r   r}   r   rD  s      r+   r=  z/LTTextLineHorizontal._is_centrally_aligned_withK  8     EHux'1,$'0AQ/FFGG9TTr-   c                 B    t          |j        | j        z
            |k    S r(   )r   r   rD  s      r+   r:  z'LTTextLineHorizontal._is_same_height_asS  s    5<$+-..);;r-   r7   )r8   r9   r:   rM   r,   rj   r  r   r   r   r  r'  rV   r;  r<  r=  r:  r*  r+  s   @r+   r-  r-    sm       E d    { t      
<(
16
	j	
 
 
 
64 4; 45 4QU 4 4 4 44 4K 4E 4RV 4 4 4 4 67U U U-2U	U U U U< < < <d < < < < < < < <r-   r-  c                        e Zd ZdeddfdZdeddf fdZdee         dede	e
         fd	ZddededefdZddededefdZ	 ddededefdZdededefdZ xZS )LTTextLineVerticalrE   r&   Nc                 V    t                               | |           t           | _        d S r(   )r  r,   r   _y0r0  s     r+   r,   zLTTextLineVertical.__init__X  r1  r-   r.   c                 T   t          |t                    rd| j        r]| j        t          |j        |j                  z  }|j        |z   | j        k     r(t          	                    | t          d                     |j        | _        t                      	                    |           d S r3  )r2   r   rE   r  r   r   r   rN  r  r  r   r~   r   r4  s      r+   r  zLTTextLineVertical.add_  s    c6"" 	3t'7 	3%CIsz(B(BBFv))fSkk2226Cr-   r$  r%  c                      | j         z  |                     j        z
   j         j        z    j        f          } fd|D             S )aG  
        Finds neighboring LTTextLineVerticals in the plane.

        Returns a list of other LTTextLineVerticals in the plane which are
        close to self. "Close" can be controlled by ratio. The returned objects
        will be the same width as self, and also either upper-, lower-, or
        centrally-aligned.
        c                     g | ]u}t          |t                                        |           .                    |           s.                    |           s                    |           s|vS r8  )r2   rL  _is_same_width_as_is_lower_aligned_with_is_upper_aligned_withr=  r>  s     r+   r?  z5LTTextLineVertical.find_neighbors.<locals>.<listcomp>u  s     
 
 
3 233	

 **3!*<<
 //q/AA
 223!2DD
 66sa6HH

 
 
r-   )r   r@  r}   r~   r   r   rA  s   `   @r+   r'  z!LTTextLineVertical.find_neighborsh  sl     DJzz47Q;1dgFGG
 
 
 
 

 
 
 	
r-   r   rB  r9  c                 B    t          |j        | j        z
            |k    S )zJ
        Whether the lower edge of `other` is within `tolerance`.
        )r   r~   rD  s      r+   rS  z)LTTextLineVertical._is_lower_aligned_with  rE  r-   c                 B    t          |j        | j        z
            |k    S )zJ
        Whether the upper edge of `other` is within `tolerance`.
        )r   r   rD  s      r+   rT  z)LTTextLineVertical._is_upper_aligned_with  rE  r-   c                 n    t          |j        |j        z   dz  | j        | j        z   dz  z
            |k    S )zO
        Whether the vertical center of `other` is within `tolerance`.
        rH  )r   r~   r   rD  s      r+   r=  z-LTTextLineVertical._is_centrally_aligned_with  rI  r-   c                 B    t          |j        | j        z
            |k    S r(   )r   r   rD  s      r+   rR  z$LTTextLineVertical._is_same_width_as  s    5;+,,	99r-   r7   )r8   r9   r:   rM   r,   rj   r  r   r   r   r  r'  rV   rS  rT  r=  rR  r*  r+  s   @r+   rL  rL  W  sh       E d    { t      
<(
16
	j	
 
 
 
64 4K 4E 4RV 4 4 4 44 4K 4E 4RV 4 4 4 4 67U U U-2U	U U U U:{ :u : : : : : : : : :r-   rL  c                   2    e Zd ZdZddZdefdZdefdZdS )r3   zRepresents a group of text chunks in a rectangular area.

    Note that this box is created by geometric analysis and does not
    necessarily represents a logical boundary of the text. It contains a list
    of LTTextLine objects.
    r&   Nc                 H    t                               |            d| _        d S )NrL   )r  r,   r%   rR   s    r+   r,   zLTTextBox.__init__  s"      &&&
r-   c           	          d| j         j        d| j        dt          | j                  d|                                 d	S r   )rd   r8   r%   r   rk   re   rR   s    r+   rS   zLTTextBox.__repr__  sG     N###JJJTYMMOOOO	
 	
r-   c                     t           r(   rg   rR   s    r+   get_writing_modezLTTextBox.get_writing_mode  s    !!r-   rT   )r8   r9   r:   rU   r,   rW   rS   r]  r<   r-   r+   r3   r3     sh            

# 
 
 
 
"# " " " " " "r-   r3   c                   4     e Zd Zdeddf fdZdefdZ xZS )LTTextBoxHorizontalrY   r&   Nc                     t                                          |           | j                            d            d S )Nc                     | j          S r(   )r   r.   s    r+   <lambda>z-LTTextBoxHorizontal.analyze.<locals>.<lambda>  
     r-   keyr   r\   r  sortr*   rY   rd   s     r+   r\   zLTTextBoxHorizontal.analyze  8    !!!
//000r-   c                     dS )Nzlr-tbr<   rR   s    r+   r]  z$LTTextBoxHorizontal.get_writing_mode      wr-   r8   r9   r:   r>   r\   rW   r]  r*  r+  s   @r+   r_  r_    f         T      
#        r-   r_  c                   4     e Zd Zdeddf fdZdefdZ xZS )LTTextBoxVerticalrY   r&   Nc                     t                                          |           | j                            d            d S )Nc                     | j          S r(   )r   rb  s    r+   rc  z+LTTextBoxVertical.analyze.<locals>.<lambda>  rd  r-   re  rg  ri  s     r+   r\   zLTTextBoxVertical.analyze  rj  r-   c                     dS )Nztb-rlr<   rR   s    r+   r]  z"LTTextBoxVertical.get_writing_mode  rl  r-   rm  r+  s   @r+   rp  rp    rn  r-   rp  r4   c                   4     e Zd Zdee         ddf fdZ xZS )r4   r  r&   Nc                 r    t                                                       |                     |           d S r(   )r   r,   r  )r*   r  rd   s     r+   r,   zLTTextGroup.__init__  s0    Dr-   )r8   r9   r:   r   TextGroupElementr,   r*  r+  s   @r+   r4   r4     sQ        X&67 D          r-   c                   (     e Zd Zdeddf fdZ xZS )LTTextGroupLRTBrY   r&   Nc                     t                                          |           |j        J |j        | j                            fd           d S )Nc                 J    dz
  | j         z  dz   | j        | j        z   z  z
  S r1   )r}   r~   r   r.   rF   s    r+   rc  z)LTTextGroupLRTB.analyze.<locals>.<lambda>  s+    Q^sv5:~#&36/23 r-   re  r   r\   rF   r  rh  r*   rY   rF   rd   s     @r+   r\   zLTTextGroupLRTB.analyze  si    !!!"...(

3 3 3 3 	 	
 	
 	
 	r-   r8   r9   r:   r>   r\   r*  r+  s   @r+   rx  rx    K        	 	T 	 	 	 	 	 	 	 	 	 	r-   rx  c                   (     e Zd Zdeddf fdZ xZS )LTTextGroupTBRLrY   r&   Nc                     t                                          |           |j        J |j        | j                            fd           d S )Nc                 L    dz    | j         | j        z   z  dz
  | j        z  z
  S r1   )r}   r   r   r{  s    r+   rc  z)LTTextGroupTBRL.analyze.<locals>.<lambda>  s.    a*n-#&A:~'( r-   re  r|  r}  s     @r+   r\   zLTTextGroupTBRL.analyze  si    !!!"...(

( ( ( ( 	 	
 	
 	
 	r-   r~  r+  s   @r+   r  r    r  r-   r  c                       e Zd ZdeddfdZdedee         dee	         fdZ
dedee	         dee         fd	Zded
ee         dee         fdZdeddfdZdS )LTLayoutContainerrk   r&   Nc                 J    t                               | |           d | _        d S r(   )r  r,   groupsrn   s     r+   r,   zLTLayoutContainer.__init__  s$    T4(((37r-   rY   r  c              #     K   d }d }|D ]I}|A|                     |          o|                    |          oqt          |j        |j                  |j        z  |                    |          k     o8|                    |          t          |j        |j                  |j	        z  k     }|j
        o|                     |          o|                    |          oqt          |j        |j                  |j        z  |                    |          k     o8|                    |          t          |j        |j                  |j	        z  k     }|rt          |t                    s|r+t          |t                     r|                    |           n||V  d }n|rA|s?t!          |j                  }|                    |           |                    |           nr|rA|s?t          |j                  }|                    |           |                    |           n/t          |j                  }|                    |           |V  d }|}K|-t          |j                  }|J |                    |           |V  d S r(   )r   r   r   r   rB   r   r   r  r   rC   rG   r   r   r   r2   r-  rL  r  rE   )r*   rY   r  obj0lineobj1halignvaligns           r+   group_objectszLTLayoutContainer.group_objects  s       C	 C	D &&t,, I((..IDK558MMmmD))*I t,,$*dj11H4HHI 0 , K**400K((..K DJ
33h6KKmmD))*K
 t,,$+t{33h6JJK   $z$0DEE $$)$0BCC$ HHTNNNN%JJJDD $f $1(2FGG $ $3H4HII3H4HII"


#DD<'(<==D###HHTNNN


r-   linesc              #     K   t          | j                  }|                    |           i }|D ]}|                    ||j                  }|g}|D ]C}|                    |           ||v r(|                    |                    |                     Dt          |t                    rt                      }	nt                      }	t          |          D ]}
|	                    |
           |	||
<   t                      }|D ]A}||vr||         }	|	|v r|                    |	           |	                                s|	V  BdS )z$Group neighboring lines to textboxesN)r   rk   r  r'  rD   r  popr2   r-  r_  rp  r"   r  setr   )r*   rY   r  r$  boxesr  	neighborsmembersr  boxr.   dones               r+   group_textlinesz!LTLayoutContainer.group_textlinesC  sm      $)#3#3U-/ 	! 	!D++E83GHHIfG! 4 4t$$$5==NN599T??333$ 455 *!4!6!6'))G}} ! ! c

! uu 	 	D5  +Cd{{HHSMMM<<>> 			r-   r  c           
      \   t           t          t          f         }t          | j                  dt
          dt
          dt          fd}d|d|dt          |         ffd}g }t          t          |                    D ]t}||         }t          |dz   t          |                    D ]I}	||	         }
|
                    d |||
          t          |          t          |
          ||
f           Jut          j        |                               |           t                      }t          |          dk    rXt          j        |          \  }}}}}}||vr$||vr|s( |||          rt          j        |d	|||||f           ct%          |t&          t(          f          st%          |t&          t(          f          rt)          ||g          }nt+          ||g          }                    |                               |           |                    ||g           D ]A}t          j        |d |||          t          |          t          |          ||f           B                    |           t          |          dk    Xt3          d
 D                       S )ax  Group textboxes hierarchically.

        Get pair-wise distances, via dist func defined below, and then merge
        from the closest textbox pair. Once obj1 and obj2 are merged /
        grouped, the resulting group is considered as a new object, and its
        distances to other objects & groups are added to the process queue.

        For performance reason, pair-wise distances and object pair info are
        maintained in a heap of (idx, dist, id(obj1), id(obj2), obj1, obj2)
        tuples. It ensures quick access to the smallest element. Note that
        since comparison operators, e.g., __lt__, are disabled for
        LTComponent, id(obj) has to appear before obj in element tuples.

        :param laparams: LAParams object.
        :param boxes: All textbox objects to be grouped.
        :return: a list that has only one element, the final top level group.
        r  obj2r&   c                 (   t          | j        |j                  }t          | j        |j                  }t          | j        |j                  }t          | j        |j                  }||z
  ||z
  z  | j        | j        z  z
  |j        |j        z  z
  S )a  A distance function between two TextBoxes.

            Consider the bounding rectangle for obj1 and obj2.
            Return its area less the areas of obj1 and obj2,
            shown as 'www' below. This value may be negative.
                    +------+..........+ (x1, y1)
                    | obj1 |wwwwwwwwww:
                    +------+www+------+
                    :wwwwwwwwww| obj2 |
            (x0, y0) +..........+------+
            )r   r}   r~   r  r   r   r   r   )r  r  r}   r~   r   r   s         r+   distz/LTLayoutContainer.group_textboxes.<locals>.dist|  s     TWdg&&BTWdg&&BTWdg&&BTWdg&&BbR"W%*t{*+*t{*+r-   c                 N   t          | j        |j                  }t          | j        |j                  }t          | j        |j                  }t          | j        |j                  }t                              ||||f                    }|                    | |f          S )z8Check if there's any other object between obj1 and obj2.)	r   r}   r~   r  r   r   r  r@  
difference)r  r  r}   r~   r   r   r  r$  s          r+   isanyz0LTLayoutContainer.group_textboxes.<locals>.isany  s    TWdg&&BTWdg&&BTWdg&&BTWdg&&Buzz2r2r"23344D??D$<000r-   r   Fr   Tc              3   @   K   | ]}t          t          |          V  d S r(   )r   r4   )r   gs     r+   r   z4LTLayoutContainer.group_textboxes.<locals>.<genexpr>  s,      88QDa((888888r-   )r   r3   r4   r   rk   rj   rM   r
   ranger  r  idheapqheapifyr  r  heappopheappushr2   rp  r  rx  removeupdater  r   )r*   rY   r  ElementTr  r  distsibox1jbox2r  
skip_isanyr   id1id2r  r  grouprB  r$  s                       @r+   group_textboxesz!LTLayoutContainer.group_textboxesd  s   * K/0!&ty!1!1	{ 	+ 	% 	 	 	 	,	1 	1 	1S] 	1 	1 	1 	1 	1 	1 IKs5zz"" 	X 	XA8D1q5#e**-- X XQxeTT$%5%5r$xxD4QUVWWWWX 	eUuu%jj1nn49M%4H4H1ZCdD4coo! eeD$&7&7 N54CdD*IJJJd%6$HII :Z,o>N N : *9$)F)FEE+T4L99ET"""T"""S#J'''"  ENUE 2 2BuIIr%yy%QVW    		%   - %jj1nn0 88%888888r-   c                    t          d |           \  }}|D ]}|                    |           |sd S t          |                     ||                    }t          d |          \  }}|D ]}|                    |           t          |                     ||                    }|j        [|D ]}|                    |           dt          dt          t          t          t          f         fd}	|
                    |	           nt|                     ||          | _        t                      }
| j        D ],}|                    |           |
                    |           -|
                    d            t          t           t"                   |          |z   t          t           t"                   |          z   | _        d S )Nc                 ,    t          | t                    S r(   )r2   r   rb  s    r+   rc  z+LTLayoutContainer.analyze.<locals>.<lambda>  s    :c63J3J r-   c                 *    |                                  S r(   )r   rb  s    r+   rc  z+LTLayoutContainer.analyze.<locals>.<lambda>  s    #,,.. r-   r  r&   c                 n    t          | t                    rd| j         | j         fS d| j         | j        fS )Nr   r   )r2   rp  r   r~   r}   r  s    r+   getkeyz)LTLayoutContainer.analyze.<locals>.getkey  s=    c#455 0w00w//r-   re  c                     | j         S r(   r)   r  s    r+   rc  z+LTLayoutContainer.analyze.<locals>.<lambda>  s    39 r-   )r   r\   r   r  r  rF   r3   r   r;   rM   rh  r  r  r$   r5   r   r   rj   r  )r*   rY   textobjs	otherobjsr.   	textlinesempties	textboxestextboxr  assignerr  s               r+   r\   zLTLayoutContainer.analyze  s    !''J'JD Q Q9 	" 	"CKK!!!! 	F++Hh??@@	%&@&@)LL) 	" 	"CKK!!!!--h	BBCC	&$ * *))))0I 0%UE0A*B 0 0 0 0 NNvN&&&&..xCCDK$H $ $h'''U####NN44N555k"I..4$g../ 	

 	r-   )r8   r9   r:   r   r,   r>   r   rj   r   r  r  r3   r  r	   r   r4   r  r\   r<   r-   r+   r  r    s       T d    N N(0(=N	*	N N N N` )1*)=	)	   BY9 Y9)1))<Y9	k	Y9 Y9 Y9 Y9v$ $T $ $ $ $ $ $r-   r  c                   F    e Zd ZdZdedededdfdZdefdZd	e	ddfd
Z
dS )LTFigurezRepresents an area used by PDF Form objects.

    PDF Forms can be used to present figures or pictures by embedding yet
    another PDF document within a page. Note that LTFigure objects can appear
    recursively.
    r   rk   r   r&   Nc                     || _         | _        |\  }}}}||f||z   |f|||z   f||z   ||z   ff}t          fd|D                       }t                              | |           d S )Nc              3   B   K   | ]\  }}t          ||f          V  d S r(   r   )r   r   qr   s      r+   r   z$LTFigure.__init__.<locals>.<genexpr>  s5      NNVa!Q88NNNNNNr-   )r   r   r    r  r,   )	r*   r   rk   r   r6   ywhboundss	      `     r+   r,   zLTFigure.__init__  s    	Aq!a&1q5!*q!a%j1q5!a%.ANNNNvNNNNN""4...r-   c           
          d| j         j        d| j        dt          | j                  dt          | j                  d	S )Nra   r   r   r   rc   )rd   r8   r   r   rk   r!   r   rR   s    r+   rS   zLTFigure.__repr__  sK     N###IIITYt{####	
 	
r-   rY   c                 N    |j         sd S t                              | |           d S r(   )rH   r  r\   r[   s     r+   r\   zLTFigure.analyze  s-    ! 	F!!$111r-   )r8   r9   r:   rU   rW   r   r   r,   rS   r>   r\   r<   r-   r+   r  r    s         S  f     
# 
 
 
 
 T      r-   r  c            	       8    e Zd ZdZd
dedededdfdZdefd	Z	dS )LTPagezRepresents an entire page.

    Like any other LTLayoutContainer, an LTPage can be iterated to obtain child
    objects like LTTextBox, LTFigure, LTImage, LTRect, LTCurve and LTLine.
    r   pageidrk   rotater&   Nc                 X    t                               | |           || _        || _        d S r(   )r  r,   r  r  )r*   r  rk   r  s       r+   r,   zLTPage.__init__  s+    ""4...r-   c           	      l    d| j         j        d| j        dt          | j                  d| j        d	S )Nra   r   r   z rotate=rc   )rd   r8   r  r   rk   r  rR   s    r+   rS   zLTPage.__repr__  sA     N###KKKTYKKK	
 	
r-   r7   )
r8   r9   r:   rU   r;   r   rM   r,   rW   rS   r<   r-   r+   r  r    sn          s $  d    
# 
 
 
 
 
 
r-   r  )Fr  loggingtypingr   r   r   r   r   r   r	   r
   r   r   r   r   pdfcolorr   pdffontr   	pdfinterpr   r   pdftypesr   utilsr   r   r   r   r   r   r   r   r   r   r    r!   r"   	getLoggerr8   loggerr$   r>   r/   r^   rj   r   r   r   r   r   r   r   r  r  r  TextLineElementr  r-  rL  r3   r_  rp  rv  r4   rx  r  r  r  r  r<   r-   r+   <module>r     s                                $ # # # # #             & & & & & &       # # # # # # # #                               " " " " " "                              		8	$	$
 
 
 
 
 
 
 
A
 A
 A
 A
 A
 A
 A
 A
H       " " " " " " " "F F F F F& F F FR"; "; "; "; ";k "; "; ";J
 
 
 
 
W 
 
 
B
 
 
 
 
W 
 
 
B
 
 
 
 
k 
 
 
4    VV    F F F F F[& F F FR ')6
*
*
*    +ww/   :    K0   (	
 	
 	
 	
 	
+G4f 	
 	
 	
 '? ? ? ? ?1 ? ? ?DA< A< A< A< A<: A< A< A<HA: A: A: A: A: A: A: A:H" " " " "
+ " " "2    )       	    M12     /"23   
 
 
 
 
k 
 
 

 
 
 
 
k 
 
 
w w w w wK0 w w wt        @
 
 
 
 
 
 
 
 
 
r-   