
    Ngb              	       V   d dl Z d dlmZ d dlmZmZmZmZmZm	Z	m
Z
mZmZmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZ d dlmZmZ d dlmZ d d	l m!Z! d
dl"m#Z# d
dl$m%Z%m&Z&m'Z'm(Z( d
dl)m*Z* d
dl+m,Z,m-Z- d
dl.m/Z/m0Z0m1Z1m2Z2 d
dl#m3Z3m4Z4m5Z5 d
dl6m7Z7  e j8        d          Z9 e:g d          Z;erd
dl<m=Z= d
dl>m?Z? ddddddZ@deAdeBfdZCdeed f         dee
eeeDeEf         d f                  e
eB         f         fd!ZFdedee
eeeDeEf         d f                  e
eB         f         fd"ZGd#eeBef         deeBef         fd$ZH G d% d&e          ZId7d'e%d(e&de%fd)ZJd'e%d*e&de%fd+ZK G d, d-e*          ZL G d. d/eL          ZMd0e%d1e%ddfd2ZN G d3 d4eM          ZO G d5 d6eM          ZPdS )8    N)	lru_cache)
TYPE_CHECKINGAnyCallableDict	GeneratorListOptionalPatternTupleUnion)	normalize)PDFPageAggregator)LTCharLTComponentLTContainerLTCurveLTItemLTPageLTTextContainer)PDFPageInterpreter	PDFStackT)PDFPage)	PSLiteral   )utils)T_bboxT_numT_obj
T_obj_list)	Container)PDFStructTreeStructTreeMissing)T_table_settingsTableTableFinderTableSettings)decode_textresolve_allresolve_and_decode)TextMapz^LT)advheight	linewidthptssizesrcsizewidthx0x1y0y1bitsmatrixuprightfontnametext	imagemask
colorspaceevenoddfillnon_stroking_colorstrokestroking_colorstreammcidtag)	PageImage)PDFzSimSun,RegularzSimHei,RegularzSimKai,RegularzSimFang,RegularzSimLi,Regular)s   s   s   _GB2312s   _GB2312s   r:   returnc                     d| v r-|                      d          dz   }| d |         | |d          }}nd| }}t                              |t          |          dd                   }t          |          dd         |z   S )N   +r          )indexCP936_FONTNAMESgetstr)r:   split_atprefixsuffix
suffix_news        K/var/www/html/ai-engine/env/lib/python3.11/site-packages/pdfplumber/page.pyfix_fontname_bytesrW   X   s    x>>$''!+!)8),hxyy.Ah $$VS[[2->??Jv;;qtz))rK   color.c                     t          | d         t                    r&| d d         pd t          | d         j                  fS | d fS )NrM   )
isinstancer   r(   name)rX   s    rV   separate_patternr\   c   sJ     %)Y'' crc
"d[r%@%@@@d{rK   c                     | dS t          | t                    r| }n(t          | t                    rt          |           }n| f}t          |          S )N)NN)rZ   tuplelistr\   )rX   	tuplefieds     rV   normalize_colorra   l   s[     }|	E5	!	! 			E4	 	  %LL		H	I&&&rK   kwargsc                 >    d |                                  D             S )Nc                 b    i | ],\  }}|t          |t                    rt          |          n|-S  )rZ   r_   r^   ).0keyvalues      rV   
<dictcomp>z'tuplify_list_kwargs.<locals>.<dictcomp>{   sG       C 	j55@eElll5  rK   )items)rb   s    rV   tuplify_list_kwargsrk   z   s+      ,,..   rK   c                        e Zd ZU dZdZee         ed<   dZee	         ed<   dde
dee         ddfdZdd	Zdd
Zdef fdZd fdZd fdZ xZS )"PDFPageAggregatorWithMarkedContentzZExtract layout from a specific page, adding marked-content IDs to
    objects where found.Ncur_mcidcur_tagrE   propsrH   c                     t          |j                  | _        t          |t                    rd|v r|d         | _        dS d| _        dS )z5Handle beginning of tag, setting current MCID if any.MCIDN)r(   r[   ro   rZ   dictrn   )selfrE   rp   s      rV   	begin_tagz,PDFPageAggregatorWithMarkedContent.begin_tag   sG    "38,,eT"" 	!v!&MDMMM DMMMrK   c                 "    d| _         d| _        dS )z/Handle beginning of tag, clearing current MCID.N)ro   rn   rt   s    rV   end_tagz*PDFPageAggregatorWithMarkedContent.end_tag   s    rK   c                 v    | j         j        r,| j         j        d         }| j        |_        | j        |_        dS dS )z^Add current MCID to what we hope to be the most recent object created
        by pdfminer.six.rM   N)cur_item_objsrn   rD   ro   rE   )rt   cur_objs     rV   tag_cur_itemz/PDFPageAggregatorWithMarkedContent.tag_cur_item   s@     = 	'm)"-G=GL,GKKK	' 	'rK   c                 b     t                      j        |i |}|                                  |S )z;Hook for rendering characters, adding the `mcid` attribute.)superrender_charr}   )rt   argsrb   r,   	__class__s       rV   r   z.PDFPageAggregatorWithMarkedContent.render_char   s5    !egg!42622
rK   c                 b     t                      j        |i | |                                  dS )z7Hook for rendering images, adding the `mcid` attribute.N)r   render_imager}   rt   r   rb   r   s      rV   r   z/PDFPageAggregatorWithMarkedContent.render_image   s7    d-f---rK   c                 b     t                      j        |i | |                                  dS )zAHook for rendering lines and curves, adding the `mcid` attribute.N)r   
paint_pathr}   r   s      rV   r   z-PDFPageAggregatorWithMarkedContent.paint_path   s7    D+F+++rK   NrH   N)__name__
__module____qualname____doc__rn   r
   int__annotations__ro   rQ   r   r   ru   rx   r}   floatr   r   r   __classcell__r   s   @rV   rm   rm      s          #Hhsm"""!GXc]!!!! !Y !x	/B !d ! ! ! !   
' ' ' 'e           
         rK   rm   box_rawrotationc                     t          | d         | d         f          \  }}t          | d         | d         f          \  }}|dv r||||fS ||||fS )Nr   rL   r      )Z   i  )sorted)r   r   r3   r4   r5   r6   s         rV   _normalize_boxr      si    
 WQZ,--FBWQZ,--FB9BBBBrK   	mb_heightc                 (    | \  }}}}|||z
  |||z
  fS r   re   )r   r   r3   r5   r4   r6   s         rV   _invert_boxr      s&    NBB	BIN33rK   c                      e Zd ZU ej        dgz   Zee         ed<   dZe	ed<   dZ
	 dGddd	ed
edefdZdHdZedefd            Zedefd            Zedeeeef                  fd            Zedefd            Zedefd            Zedefd            Zedeeef         fd            Zdeeef         deeef         fdZdedefdZ dee!         de"eddf         fdZ#deeef         fdZ$	 dIde%e&         de'fdZ(	 dIde%e&         dee)         fdZ*	 dIde%e&         de%e)         fd Z+	 dIde%e&         deeee%e                                    fd!Z,	 dIde%e&         de%eee%e                                    fd"Z-d#ede.fd$Z/	 	 	 	 	 dJd%e0ee1e         f         d&e	d'e	d(ed)e	d*e	d#edeeeef                  fd+Z2d#edefd,Z3d#edefd-Z4d#edefd.Z5	 dKd/e	d)e	d#edefd0Z6	 dLd2e7d3e	d4e	dd5fd6Z8	 dLd2e7d3e	d4e	dd5fd7Z9	 dLd2e7d3e	d4e	dd5fd8Z:d9e;ege	f         dd:fd;Z<d#edd:fd<Z=	 	 	 	 	 dMd=e%e0ee>f                  d>e%e0ee>f                  d?e%e0ee>f                  d@e	dAe	ddBfdCZ?dIdDe%ee                  deeef         fdEZ@defdFZAdS )NPage_layoutcached_propertiesTis_originalNr   pdfrG   page_objpage_numberinitial_doctopc                 "   || _         | | _        | _        || _        || _        ddt
          dt          dt          ffd} |dd          }|dz  | _        t           |d          | j                  }|d	         |d
         z
  }t          ||          | _
        dj        v r2t          t           |d          | j                  |          | _        n| j
        | _        | j
        | _         t                      | j                  | _        d S )Nrg   defaultrH   c                 ^    t          j                            |                     }||n|S r   )r)   attrsrP   )rg   r   rh   r   s      rV   get_attrzPage.__init__.<locals>.get_attr   s.     2 23 7 788E#m776rK   Rotater   ih  MediaBoxr   r   CropBoxr   )r   	root_pager   r   r   rQ   r   r   r   r   mediaboxr   cropboxbboxr   _get_textmapget_textmap)	rt   r   r   r   r   r   	_rotationmb_rawr   s	     `      rV   __init__zPage.__init__   s0     &,	7 	7# 	7 	7s 	7 	7 	7 	7 	7 	7 HXq))	!C 4 4dmDD1Iq	)	#FI66&&&xx	22DMBBI DLL  =DL M	 '9;;t'899rK   rH   c                 `    |                                   | j                                         d S r   )flush_cacher   cache_clearrw   s    rV   closez
Page.close   s/    $$&&&&&rK   c                 8    | j         d         | j         d         z
  S )NrL   r   r   rw   s    rV   r2   z
Page.width       y|dil**rK   c                 8    | j         d         | j         d         z
  S )Nr   r   r   rw   s    rV   r-   zPage.height   r   rK   c                 f    	 d t          | j        |           D             S # t          $ r g cY S w xY w)z-Return the structure tree for a page, if any.c                 6    g | ]}|                                 S re   )to_dict)rf   elems     rV   
<listcomp>z'Page.structure_tree.<locals>.<listcomp>  s     MMMtDLLNNMMMrK   )r"   r   r#   rw   s    rV   structure_treezPage.structure_tree  sK    	MM}TXt/L/LMMMM  	 	 	III	s   ! 00c                 .   t          | d          r| j        S t          | j        j        | j        | j        j                  }t          | j        j        |          }|                    | j	                   |
                                | _        | j        S )Nr   )pagenolaparams)hasattrr   rm   r   rsrcmgrr   r   r   process_pager   
get_result)rt   deviceinterpreters      rV   layoutzPage.layout  s    4## 	 <3H#X&
 
 

 ))96BB  ///%0022|rK   c                 v    dt           t          t          f         dt          dt           t          t          f         f fddt          dt          f fd}t	           j        j                  pg }t          t          ||                    }t           t                    r                     |          S |S )NptrrH   c                 |    |dz  }t          |          D ]%}| \  }}||dz  k    rj        nj        }|||z
  f} &| S )Nr   rL   )ranger2   r-   )r   r   turnsixycomprt   s          rV   rotate_pointz!Page.annots.<locals>.rotate_point  sW    GE5\\ % %1%&%!)^^tzz$(_IrK   annotc                    | d         \  }}}} ||fj                   } ||fj                   }j        j        }t          t	          g ||R           |          \  }}	}
}|                     di           }|                    d          |                     d          |                     d          d}|                                D ]I\  }}|B	 |                    d          ||<   !# t          $ r |                    d          ||<   Y Ew xY wJj	        d	|||z
  |
||	z
  j
        |	z   |	||
|z
  ||	z
  d
}|                    |           d| v r| d<   | |d<   |S )NRectAURITContents)urititlecontentszutf-8zutf-16r   )r   object_typer3   r5   r4   r6   doctoptopbottomr2   r-   Pdata)r   r   r-   r   r   rP   rj   decodeUnicodeDecodeErrorr   r   update)r   _a_b_c_dpt0pt1rhr3   r   r4   r   aextraskvparsedr   rt   s                    rV   parsezPage.annots.<locals>.parse#  s   "6]NBB,Bx77C,Bx77C&B"-n\s\S\\.J.JB"O"OBR		#r""AuuU||3!IIj11 F
  7 71=7$%HHW$5$5q		- 7 7 7$%HHX$6$6q			7 !  $/&6k3h-3 b 3, F MM&!!! e||!c
"F6NMs   C22"DD)r   r   r   r   r)   r   annotsr_   maprZ   CroppedPage_crop_fn)rt   r   rawr   r   s   `   @rV   r   zPage.annots  s    	U5%<0 	S 	U5%<=P 	 	 	 	 	 	'	 '	5 '	 '	 '	 '	 '	 '	 '	R $-.//52c%oo&&dK(( 	==(((MrK   c                 $    d | j         D             S )Nc                 "    g | ]}|d          
|S )r   re   )rf   r   s     rV   r   z#Page.hyperlinks.<locals>.<listcomp>U  s!    ???a!E(*>*>*>*>rK   )r   rw   s    rV   
hyperlinkszPage.hyperlinksS  s    ??4;????rK   c                 p    t          | d          r| j        S |                                 | _        | j        S )N_objects)r   r  parse_objectsrw   s    rV   objectszPage.objectsW  s7    4$$ 	!= /3/A/A/C/C}rK   r   c                 j    | j         d         |d         z   | j         d         | j        z   |d         z
  fS )Nr   r   )r   r-   )rt   r   s     rV   point2coordzPage.point2coord^  s4    a 2a5($-*:T[*H2a5*PQQrK   objc           
          t          j        t          d|j        j                                                  }dt          t          t          f         dt          t          t          t          f                  fd}t          t          d t          ||j                                                                      }||d<    j        |d<   dD ]7}t!          ||          r%t#          t%          ||          j                  ||<   8dD ]'\  }}||v rt)          ||                   \  ||<   ||<   (t+          |t,          t.          f          r?|                                } j        j        t7           j        j        |          n||d	<   t+          |t,                    ru|j        }	t)          |	j                  \  |d
<   |d<   t)          |	j                  \  |d<   |d<   t+          |d         t>                    rtA          |d                   |d<   nat+          |tB          f          rKtE          t           j#        |d                             |d<    fd|j$        D             |d<   |j%        |d<    j&        d d         \  }
}d|v r? j'        |d         z
  |z   |d<    j'        |d         z
  |z   |d<    j(        |d         z   |d<   d|v r"|
dk    r|d         |
z   |d<   |d         |
z   |d<   |S )N itemrH   c                 H    | \  }}|t           v rt          |          }||fS d S r   )	ALL_ATTRSr)   )r  r   r   ress       rV   process_attrz)Page.process_object.<locals>.process_attre  s-    DAqI~~!!nn3xtrK   r   r   )ncsscs))rB   stroking_pattern)r@   non_stroking_patternr;   rB   r  r@   r  r:   r/   c                 F    g | ]^}}|gt          j        |          R S re   )r   r  )rf   cmdr/   rt   s      rV   r   z'Page.process_object.<locals>.<listcomp>  s5    bbb93S>3t'7#=#=>>bbbrK   pathdashrL   r5   r6   r   r   r   r3   r   r4   ))resublt_patr   r   lowerr   rQ   r   r
   rs   filterr   __dict__rj   r   r   r*   getattrr[   ra   rZ   r   r   get_textr   unicode_normnormalize_unicodegraphicstatescolorncolorbytesrW   r   r_   r  original_pathdashing_styler   r-   r   )rt   r  kindr  attrcs
color_attrpattern_attrr;   gsmb_x0mb_tops   `           rV   process_objectzPage.process_objectb  s.   vfb#-"899??AA	uS#X 	8E#s(O3L 	 	 	 	 F4\3<3E3E3G3G!H!HIIJJ"]".]  	E 	EB sB E-gc2.>.>.CDDR)
 	Y 	Y$J T!!7FtJGW7X7X4Z $|"4cFO455 	<<>>D 8(4 "$("7>>> L c6"" 	- !B?N	@ @<D!"D);$< HW	H HDD%&-C(D
 $z*E22 H#5d:6F#G#GZ gZ(( 	-s4#3T%[AABBDK cbbbPSPabbbDL,DL
 bqb)v4<<;d3v=DK"kDJ6&@DN!04;>DN4<<EQJJde+DJde+DJrK   layout_objectsc              #      K   |D ]r}t          |t                    rD| j        j        |                     |          V  |                     |j                  E d {V  [|                     |          V  sd S r   )rZ   r   r   r   r0  iter_layout_objectsr{   )rt   r1  r  s      rV   r3  zPage.iter_layout_objects  s       " 		/ 		/C#{++ /8$0--c2222233CI>>>>>>>>>>))#......		/ 		/rK   c                     i }|                      | j        j                  D ]D}|d         }|dv r|                    |          g ||<   ||                             |           E|S )Nr   )anno)r3  r   r{   rP   append)rt   r  r  r(  s       rV   r  zPage.parse_objects  s|    )+++DK,=>> 	& 	&C}%Dx{{4  ( "DM  %%%%rK   table_settingsc                 J    t          j        |          }t          | |          S r   )r'   resolver&   rt   r7  tsets      rV   debug_tablefinderzPage.debug_tablefinder  s$     $^444&&&rK   c                 T    t          j        |          }t          | |          j        S r   )r'   r9  r&   tablesr:  s      rV   find_tableszPage.find_tables  s'     $^444&&--rK   c                    t          j        |          }|                     |          }t          |          dk    rd S dt          dt
          t          t          t          f         fd}t          t          ||                    d         }|S )Nr   r   rH   c                 ^    t          | j                   | j        d         | j        d         fS )Nr   r   )lencellsr   r   s    rV   sorterzPage.find_table.<locals>.sorter  s%    \\M16!9afQi88rK   )rg   )
r'   r9  r?  rB  r%   r   r   r   r_   r   )rt   r7  r;  r>  rE  largests         rV   
find_tablezPage.find_table  s     $^44!!$''v;;!4	9e 	9c5%&7 8 	9 	9 	9 	9 vf&1112215rK   c                 r    t          j        |          |                               }fd|D             S )Nc                 8    g | ]} |j         d i j        pi S )re   )extracttext_settings)rf   tabler;  s     rV   r   z'Page.extract_tables.<locals>.<listcomp>  s3    PPP;;!3!9r;;PPPrK   )r'   r9  r?  )rt   r7  r>  r;  s      @rV   extract_tableszPage.extract_tables  sB     $^44!!$''PPPPPPPPrK   c                     t          j        |          }|                     |          }|d S  |j        di |j        pi S Nre   )r'   r9  rG  rJ  rK  )rt   r7  r;  rL  s       rV   extract_tablezPage.extract_table  sO     $^44%%=4 5=>>D$6$<">>>rK   rb   c                     t          | j                  }d|vr|                    d| j        i           d|vr|                    d| j        i           i ||}t          j        | j        fi |S )N)layout_bboxlayout_width_charslayout_widthlayout_height_charslayout_height)rs   r   r   r2   r-   r   chars_to_textmapchars)rt   rb   defaultsfull_kwargss       rV   r   zPage._get_textmap  s    #'	$
 $
 $
  v--OO^TZ8999 ..OO_dk:;;;&<&<V&<%dj@@K@@@rK   patternregexcase
main_groupreturn_charsreturn_groupsc                 l     | j         di t          |          }|                    ||||||          S )N)r\  r]  r^  r_  r`  re   )r   rk   search)	rt   r[  r\  r]  r^  r_  r`  rb   textmaps	            rV   rb  zPage.search  sR     #$"AA%8%@%@AA~~!%'  
 
 	
rK   c                 @     | j         di t          |          j        S rO  )r   rk   	as_stringrt   rb   s     rV   extract_textzPage.extract_text  s'    t>>"5f"="=>>HHrK   c                 0    t          j        | j        fi |S r   )r   extract_text_simplerX  rf  s     rV   ri  zPage.extract_text_simple  s    (>>v>>>rK   c                 0    t          j        | j        fi |S r   )r   extract_wordsrX  rf  s     rV   rk  zPage.extract_words  s    "4:88888rK   stripc                 `     | j         di t          |                              ||          S )N)rl  r_  re   )r   rk   extract_text_lines)rt   rl  r_  rb   s       rV   rn  zPage.extract_text_lines"  sC      t>>"5f"="=>>QQl R 
 
 	
rK   Fr   relativestrictr   c                 (    t          | |||          S )N)ro  rp  )r   rt   r   ro  rp  s       rV   cropz	Page.crop)  s     4HHHHrK   c                 >    t          | |||t          j                  S zS
        Same as .crop, except only includes objects fully within the bbox
        )ro  rp  crop_fn)r   r   within_bboxrr  s       rV   rw  zPage.within_bbox.  s)     $&%BS
 
 
 	
rK   c                 >    t          | |||t          j                  S ru  )r   r   outside_bboxrr  s       rV   ry  zPage.outside_bbox8  s)     $&%BT
 
 
 	
rK   test_functionFilteredPagec                 "    t          | |          S r   )r{  )rt   rz  s     rV   r  zPage.filterB  s    D-000rK   c                     t          | d           }d | j                                        D             |_        t	          j        | j        fi ||j        d<   |S )u   
        Removes duplicate chars — those sharing the same text and positioning
        (within `tolerance`) as other characters in the set. Adjust extra_args
        to be more/less restrictive with the properties checked.
        c                     dS )NTre   rD  s    rV   <lambda>z#Page.dedupe_chars.<locals>.<lambda>K  s     rK   c                     i | ]\  }}||	S re   re   )rf   r(  objss      rV   ri   z%Page.dedupe_chars.<locals>.<dictcomp>L  s    HHHZT4dDHHHrK   char)r{  r  rj   r  r   dedupe_charsrX  )rt   rb   ps      rV   r  zPage.dedupe_charsE  s^     ~~..HH4<3E3E3G3GHHH
"/
EEfEE
6rK   
resolutionr2   r-   	antialiasforce_mediaboxrF   c                     ddl m}m} t          d |||fD                       }|dk    rt	          d|           |d|z  | j        z  }n|d|z  | j        z  } || |p|||          S )z
        You can pass a maximum of 1 of the following:
        - resolution: The desired number pixels per inch. Defaults to 72.
        - width: The desired image width in pixels.
        - height: The desired image width in pixels.
        r   )DEFAULT_RESOLUTIONrF   c              3      K   | ]}|d uV  	d S r   re   )rf   r   s     rV   	<genexpr>z Page.to_image.<locals>.<genexpr>`  s&      KK!KKKKKKrK   zUOnly one of these arguments can be provided: resolution, width, height. You provided NH   )r  r  r  )displayr  rF   sum
ValueErrorr2   r-   )	rt   r  r2   r-   r  r  r  rF   	num_specss	            rV   to_imagezPage.to_imageP  s     	;:::::::KK
E6/JKKKKK	q==shqss   edj0JJft{2Jy!7%7)	
 
 
 	
rK   object_typesc           	         |+t          | j                                                  dgz   }n|}| j        | j        | j        | j        | j        | j        | j	        | j
        d}|D ]}t          | |dz             ||dz   <   |S )Nr   )r   r   r   r   r   r   r2   r-   s)r_   r  keysr   r   r   r   r   r   r2   r-   r  )rt   r  _object_typesdts        rV   r   zPage.to_dictq  s     !2!2!4!455	AMM(M+"1|IZk	
 	
  	0 	0A q3w//Aa#gJJrK   c                     d| j          dS )Nz<Page:>)r   rw   s    rV   __repr__zPage.__repr__  s    +(++++rK   r   r   r   )TTr   TT)TT)FT)NNNFF)Br   r   r   r!   r   r	   rQ   r   r   boolpagesr   r   r   r   r   propertyr2   r-   r   r   r   r   r   r    r   r  r  r   r  r   r   r0  r   r   r3  r  r
   r$   r&   r<  r%   r?  rG  rM  rP  r+   r   r   r   rb  rg  ri  rk  rn  r   rs  rw  ry  r   r  r  r   r  r   r  re   rK   rV   r   r      s)        #,#>)#LtCyLLLKE !"': ':': ': 	':
 ': ': ': ':R' ' ' ' +u + + + X+ + + + + X+ T#s(^ 4    X     X 7
 7 7 7 X7r @J @ @ @ X@ c:o.    XReE5L1 ReE5L6I R R R RO& OU O O O Ob/";//	5$$	%/ / / /	tCO4 	 	 	 	 <@' '&'78'	' ' ' ' <@. .&'78.	e. . . . <@ &'78	%   $ <@Q Q&'78Q	d4&'	(Q Q Q Q <@? ?&'78?	$tHSM*+	,? ? ? ?	AS 	AW 	A 	A 	A 	A !"
 
sGCL()
 
 	

 
 
 
 
 
d38n	
 
 
 
(IS IS I I I I?C ?C ? ? ? ?9c 9j 9 9 9 9 8<
 

04
GJ
	
 
 
 
 DHI II&*I<@I	I I I I DH
 

&*
<@
	
 
 
 
 DH
 

&*
<@
	
 
 
 
1HeWd]$; 1 1 1 1 1	S 	^ 	 	 	 	 37-1.2$
 
U3:./
 c5j)*
 sEz*+	

 
 
 

 
 
 
B HT#Y$7 4S>    &,# , , , , , ,rK   r   c                   *    e Zd ZU dZeed<   defdZdS )DerivedPageFr   parent_pagec                 T   || _         |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        | 	                    t          j                    t                      | j                  | _        d S r   )r  r   r   r   r   r   r   r   r   r   r!   r   r   r   r   )rt   r  s     rV   r   zDerivedPage.__init__  s    &$.?#,&2)8#,#,"*4555&9;;t'899rK   N)r   r   r   r   r  r   r   r   re   rK   rV   r  r    sA         K:D : : : : : :rK   r  r   parent_bboxc                    t          j        |           }|dk    rt          d|  d          t          j        | |          }|t          d|  d|           t          j        |          }||k     rt          d|  d|           d S )Nr   zBounding box z has an area of zero.z. is entirely outside parent page bounding box z. is not fully within parent page bounding box )r   calculate_arear  get_bbox_overlap)r   r  	bbox_areaoverlapoverlap_areas        rV   test_proposed_bboxr    s    $T**IA~~DDDDEEE$T;77G6D 6 6(36 6
 
 	

 '00Li6D 6 6(36 6
 
 	
  rK   c                        e Zd Zej        ddfdededeeegef         de	de	f
 fdZ
ed	eeef         fd
            Z xZS )r   FTr  	crop_bboxrv  ro  rp  c                 H   |r%|j         \  }}}}\  }	}
}}|	|z   |
|z   ||z   ||z   f|rt          |j                    dt          dt          ffd}t                                          |           || _        t          j        u r|j         | _         d S | _         d S )Nr  rH   c                      |           S r   re   )r  r  rv  s    rV   r   z&CroppedPage.__init__.<locals>._crop_fn  s    74+++rK   )r   r  r    r   r   r   r   ry  )rt   r  r  rv  ro  rp  o_x0o_top_r3   r   r4   r   r   r   s     ``          rV   r   zCroppedPage.__init__  s      	L + 0D%A"+BRdC%KdFUNKI 	<y+*:;;;	,: 	,* 	, 	, 	, 	, 	, 	, 	, 	%%%  e(((#(DIII!DIIIrK   rH   c                      t           d          r j        S  fd j        j                                        D              _         j        S )Nr  c                 B    i | ]\  }}|                     |          S re   )r   rf   r   r   rt   s      rV   ri   z'CroppedPage.objects.<locals>.<dictcomp>  s8     0
 0
 0
$(AqAt}}Q0
 0
 0
rK   r   r  r  r  rj   rw   s   `rV   r  zCroppedPage.objects  sc    4$$ 	!= 0
 0
 0
 0
,0,<,D,J,J,L,L0
 0
 0
 }rK   )r   r   r   r   crop_to_bboxr   r   r   r    r  r   r  r   rQ   r  r   r   s   @rV   r   r     s        
 ?D>P" "" " :v.
:;	"
 " " " " " " ": c:o.    X    rK   r   c                   f     e Zd Zdedeegef         f fdZede	e
ef         fd            Z xZS )r{  r  	filter_fnc                 p    |j         | _         || _        t                                          |           d S r   )r   r  r   r   )rt   r  r  r   s      rV   r   zFilteredPage.__init__  s2    $	"%%%%%rK   rH   c                      t           d          r j        S  fd j        j                                        D              _         j        S )Nr  c           	      \    i | ](\  }}|t          t          j        |                    )S re   )r_   r  r  r  s      rV   ri   z(FilteredPage.objects.<locals>.<dictcomp>  sC     0
 0
 0
1 tF4>1--..0
 0
 0
rK   r  rw   s   `rV   r  zFilteredPage.objects  sc    4$$ 	!= 0
 0
 0
 0
(066880
 0
 0
 }rK   )r   r   r   r   r   r   r  r   r  r   rQ   r    r  r   r   s   @rV   r{  r{    s        &D &Xugtm5L & & & & & &
 c:o.    X    rK   r{  r  )Qr  	functoolsr   typingr   r   r   r   r   r	   r
   r   r   r   unicodedatar   r!  pdfminer.converterr   pdfminer.layoutr   r   r   r   r   r   r   pdfminer.pdfinterpr   r   pdfminer.pdfpager   pdfminer.psparserr   r
  r   _typingr   r   r   r    	containerr!   	structurer"   r#   rL  r$   r%   r&   r'   r(   r)   r*   
utils.textr+   compiler  setr  r  rF   r   rG   rO   r%  rQ   rW   r   r   r\   ra   rk   rm   r   r   r   r  r  r   r{  re   rK   rV   <module>r     s   				                              7 6 6 6 6 6 0 0 0 0 0 0                  = < < < < < < < $ $ $ $ $ $ ' ' ' ' ' '       5 5 5 5 5 5 5 5 5 5 5 5             7 7 7 7 7 7 7 7 F F F F F F F F F F F F ? ? ? ? ? ? ? ? ? ?      	F		C   	@  """"""
 *) 0 1( * *3 * * * *c?
8E%s
+S012HSMAB   ''
8E%s
+S012HSMAB' ' ' 'S#X 4S>    0 0 0 0 0): 0 0 0f
  
 F 
 e 
 F 
  
  
  
  4 4E 4f 4 4 4 4
|, |, |, |, |,9 |, |, |,~: : : : :$ : : :"
V 
& 
T 
 
 
 
(% % % % %+ % % %P    ;     rK   