
    Ng܋                     $   d dl Z d dlZd dlZd dlmZmZmZmZmZm	Z	m
Z
mZmZmZmZ d dlmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z  ddlm!Z! ddlm"Z" ddlm#Z# ddlm$Z$ ddlm%Z% ddlm&Z& ddlm'Z' ddlm(Z( ddl)m*Z* ddl+m,Z, ddl+m-Z- ddl.m/Z/m0Z0 ddl1m2Z2 ddl3m4Z4 ddlm5Z5m6Z6m7Z7m8Z8m9Z9m:Z: ddlm;Z; dd lm<Z< dd!lm=Z= dd"lm>Z>  ej?        e@          ZA G d# d$e*          ZB G d% d&eB          ZC ed'e
ee5          ZD G d( d)eBeeD                   ZE G d* d+eEe5                   ZF G d, d-eEe5                   ZG G d. d/eEe5                   ZH G d0 d1eEe5                   ZIdS )2    N)BinaryIODictGenericListOptionalSequenceTextIOTupleTypeVarUnioncast)PDFColorSpace   )utils)ImageWriter)LAParamsLTComponentTextGroupElement)LTAnno)LTChar)LTContainer)LTCurve)LTFigure)LTImageLTItem)LTLayoutContainer)LTLine)LTPage)LTRect)LTText)	LTTextBox)LTTextBoxVertical)LTTextGroup)
LTTextLine)PDFTextDevice)PDFFont)PDFUnicodeNotDefined)PDFGraphicStatePDFResourceManager)PDFPage)	PDFStream)AnyIOPointMatrixRectPathSegmentmake_compat_str)apply_matrix_pt)bbox2str)enc)mult_matrixc                   *   e Zd ZU eed<   eed<   	 	 d&dededee	         ddfd	Z
d
ededdfdZd
eddfdZdedededdfdZdeddfdZdededdfdZdededededee         ddfdZdedededededed ed!edefd"Zdededefd#Zd$eddfd%ZdS )'PDFLayoutAnalyzercur_itemctmr   Nrsrcmgrpagenolaparamsreturnc                 Z    t          j        | |           || _        || _        g | _        d S N)r&   __init__r<   r=   _stackselfr;   r<   r=   s       N/var/www/html/ai-engine/env/lib/python3.11/site-packages/pdfminer/converter.pyrA   zPDFLayoutAnalyzer.__init__9   s0     	tW--- /1    pagec                     |j         \  }}}}t          |||f          \  }}t          |||f          \  }}ddt          ||z
            t          ||z
            f}t          | j        |          | _        d S )Nr   )mediaboxr3   absr   r<   r9   )rD   rG   r:   x0y0x1y1rI   s           rE   
begin_pagezPDFLayoutAnalyzer.begin_pageD   sy    =RR"3R11R"3R11Rq#b2g,,BG5t{H55rF   c                    | j         r)J t          t          | j                                         t          | j        t
                    s)J t          t          | j                                        | j        | j                            | j                   | xj	        dz  c_	        | 
                    | j                   d S )Nr   )rB   strlen
isinstancer9   r   typer=   analyzer<   receive_layout)rD   rG   s     rE   end_pagezPDFLayoutAnalyzer.end_pageK   s    ;55C$4$4 5 5555$-00JJ#d4=6I6I2J2JJJJ=$M!!$-000qDM*****rF   namebboxmatrixc                     | j                             | j                   t          ||t	          || j                            | _        d S r@   )rB   appendr9   r   r6   r:   )rD   rX   rY   rZ   s       rE   begin_figurezPDFLayoutAnalyzer.begin_figureS   s=    4=))) t[-J-JKKrF   _c                 
   | j         }t          | j         t                    s)J t          t	          | j                                         | j                                        | _         | j                             |           d S r@   )r9   rS   r   rQ   rT   rB   popadd)rD   r^   figs      rE   
end_figurezPDFLayoutAnalyzer.end_figureW   sj    m$-22LLCT]8K8K4L4LLLL))#rF   streamc                 :   t          | j        t                    s)J t          t	          | j                                        t          ||| j        j        | j        j        | j        j        | j        j	        f          }| j        
                    |           d S r@   )rS   r9   r   rQ   rT   r   rK   rL   rM   rN   ra   )rD   rX   rd   items       rE   render_imagezPDFLayoutAnalyzer.render_image]   s    $-22LLCT]8K8K4L4LLLL]t}/1A4=CST
 

 	$rF   gstatestrokefillevenoddpathc                 6    d                     d D                       }|dd         dk    rdS |                    d          dk    rct          j        d|          D ]K}|                    d          |                    d                   }                     |||||           LdS fdD             }	 fd	|	D             }
d
 D             } fdD             }d t          ||          D             }|dv rUt          |j	        |
d         |
d         ||||j
        |j        ||j        
  
        } j                            |           dS |dv r|
\  \  }}\  }}\  }}\  }}}|
d         |
d         k    }||k    o||k    o||k    o||k    p||k    o||k    o||k    o||k    }|rZ|rXt          |j	        g |
d         |
d         R ||||j
        |j        ||j        	  	        } j                            |           dS t!          |j	        |
||||j
        |j        ||j        	  	        } j                            |           dS t!          |j	        |
||||j
        |j        ||j        	  	        } j                            |           dS )z@Paint paths described in section 4.4 of the PDF reference manual c              3   &   K   | ]}|d          V  dS )r   N ).0xs     rE   	<genexpr>z/PDFLayoutAnalyzer.paint_path.<locals>.<genexpr>o   s&      ++!++++++rF   Nr   mzm[^m]+r   c                     g | ];}t          t          |d          dk    r
|dd         nd          dd                   <S )r   hN)r   r.   )rq   prl   s     rE   
<listcomp>z0PDFLayoutAnalyzer.paint_path.<locals>.<listcomp>   sT       IJUadckkAbccFFtAwrss|DD  rF   c                 :    g | ]}t          j        |          S rp   )r3   r:   )rq   ptrD   s     rE   ry   z0PDFLayoutAnalyzer.paint_path.<locals>.<listcomp>   s%    CCCR?48R00CCCrF   c                 8    g | ]}t          |d                    S )r   )rQ   )rq   	operations     rE   ry   z0PDFLayoutAnalyzer.paint_path.<locals>.<listcomp>   s$    AAAyYq\**AAArF   c           
      l    g | ]0}fd t          |ddd         |ddd                   D             1S )c           	      x    g | ]6\  }}t          j        t          |          t          |          f          7S rp   )r3   r:   float)rq   operand1operand2rD   s      rE   ry   z;PDFLayoutAnalyzer.paint_path.<locals>.<listcomp>.<listcomp>   sJ       *( $DHuXh.PQQ  rF   r   N   )zip)rq   r}   rD   s     rE   ry   z0PDFLayoutAnalyzer.paint_path.<locals>.<listcomp>   sk     " " "
 	   .1)ADqD/9QTPQT?.S.S  " " "rF   c                 F    g | ]\  }}t          t          |g|R           S rp   )r   r1   )rq   orx   s      rE   ry   z0PDFLayoutAnalyzer.paint_path.<locals>.<listcomp>   s>          Aq [1'q''**     rF   >   mlmlh)original_pathdashing_style>   mlllhmllll   r   )joincountrefinditerstartend
paint_pathr   r   	linewidthscolorncolordashr9   ra   r    r   )rD   rh   ri   rj   rk   rl   shapert   subpathraw_ptspts	operatorstransformed_pointstransformed_pathlinerK   rL   rM   rN   x2y2x3y3r^   is_closed_loophas_square_coordinatesrectcurves   `    `                      rE   r   zPDFLayoutAnalyzer.paint_pathf   s    ++d+++++!9 D[[!![E22 H HqwwqzzAEE!HH45gwGGGGH H   NR  G DCCC7CCCCAADAAAI" " " "
 "&" " "   	+=>>     
 %%
 $FFMM"2"(+   !!$''''',,,<?9R(2rHRhr2!$Q3q6!1"HCrCbBhC28*GBhE28EbER2X ' " -&< -!(*#a&*3q6**(
 
D M%%d+++++#((
 
E M%%e,,,,,$MM$K
 
 !!%(((((rF   fontfontsizescalingrisecidncsgraphicstatec	                    	 |                     |          }	t          |	t                    s$J t          t          |	                                n&# t          $ r |                     ||          }	Y nw xY w|                    |          }
|                    |          }t          ||||||	|
|||
  
        }| j	        
                    |           |j        S r@   )	to_unichrrS   rQ   rT   r(   handle_undefined_char
char_width	char_dispr   r9   ra   adv)rD   rZ   r   r   r   r   r   r   r   text	textwidthtextdisprf   s                rE   render_charzPDFLayoutAnalyzer.render_char   s    	9>>#&&DdC((99#d4jj//9999# 	9 	9 	9--dC88DDD	9OOC((	>>#&&
 
 	$xs   AA  A43A4c                 D    t                               d||           d|z  S )Nzundefined: %r, %rz(cid:%d))logdebug)rD   r   r   s      rE   r   z'PDFLayoutAnalyzer.handle_undefined_char   s$    		%tS111CrF   ltpagec                     d S r@   rp   rD   r   s     rE   rV   z PDFLayoutAnalyzer.receive_layout  s    rF   r   N) __name__
__module____qualname__r   __annotations__r/   r*   intr   r   rA   r+   rO   rW   rQ   r0   r]   rc   r,   rg   r)   boolr   r1   r   r'   r   r   r   r   r   rV   rp   rF   rE   r8   r8   5   s[        	KKK
 '+		2 	2#	2 	2 8$		2
 
	2 	2 	2 	26w 6V 6 6 6 6 6+W + + + + +L LD L& LT L L L LC D       i  D        t)t) t) 	t)
 t) {#t) 
t) t) t) t)l  	
     & 
   B '            V       rF   r8   c            	       T    e Zd Z	 	 ddededee         ddfdZdeddfd	Z	defd
Z
dS )PDFPageAggregatorr   Nr;   r<   r=   r>   c                 P    t                               | |||           d | _        d S N)r<   r=   )r8   rA   resultrC   s       rE   rA   zPDFPageAggregator.__init__  s,     	""4("SSS(,rF   r   c                     || _         d S r@   r   r   s     rE   rV   z PDFPageAggregator.receive_layout  s    rF   c                 "    | j         J | j         S r@   r   rD   s    rE   
get_resultzPDFPageAggregator.get_result  s    {&&&{rF   r   )r   r   r   r*   r   r   r   rA   r   rV   r   rp   rF   rE   r   r     s         '+	- -#- - 8$	-
 
- - - -V     F      rF   r   IOTypec                   b    e Zd Z	 	 	 ddededededee         d	dfd
Z	e
ded	efd            ZdS )PDFConverterutf-8r   Nr;   outfpcodecr<   r=   r>   c                     t                               | |||           || _        || _        |                     | j                  | _        d S r   )r8   rA   r   r   _is_binary_streamoutfp_binary)rD   r;   r   r   r<   r=   s         rE   rA   zPDFConverter.__init__  sM     	""4("SSS"

 224:>>rF   c                     dt          | dd          v rdS t          | d          rdS t          | t          j                  rdS t          | t          j                  rdS t          | t          j                  rdS dS )z"Test if an stream is binary or notbmodern   TF)getattrhasattrrS   ioBytesIOStringIO
TextIOBase)r   s    rE   r   zPDFConverter._is_binary_stream)  s     '%,,,,4UF## 	5rz** 	4r{++ 	5r}-- 	5trF   )r   r   N)r   r   r   r*   r   rQ   r   r   r   rA   staticmethodr-   r   r   rp   rF   rE   r   r     s        
 '+? ?#? ? 	?
 ? 8$? 
? ? ? ?  4    \  rF   r   c                        e Zd Z	 	 	 	 	 ddedededed	ee         d
e	dee
         ddf fdZdeddfdZdeddfdZdededdfdZdede	de	de	dee         ddfdZ xZS )TextConverterr   r   NFr;   r   r   r<   r=   
showpagenoimagewriterr>   c                 p    t                                          |||||           || _        || _        d S )Nr   r<   r=   )superrA   r   r   )	rD   r;   r   r   r<   r=   r   r   	__class__s	           rE   rA   zTextConverter.__init__<  s=     	%uVhWWW$&rF   r   c                 &   t          j        || j        d          }| j        rAt	          t
          | j                                      |                                           d S t	          t          | j                                      |           d S )Nignore)
r   compatible_encode_methodr   r   r   r   r   writeencoder	   rD   r   s     rE   
write_textzTextConverter.write_textJ  sw    -dDJII 	14:&&,,T[[]];;;;;$$**400000rF   r   c                      dt           dd f fd j        r                     d|j        z              |                                d           d S )Nrf   r>   c                    t          | t                    r| D ]} |           n<t          | t                    r'                    |                                            t          | t
                    r                    d           d S t          | t                    r#j        j                            |            d S d S d S )N
)	rS   r   r!   r   get_textr"   r   r   export_image)rf   childrenderrD   s     rE   r   z,TextConverter.receive_layout.<locals>.renderR  s    $,, 1! " "EF5MMMM"D&)) 1000$	** 8%%%%%D'** 8#/$11$777778 8//rF   zPage %s
)r   r   r   pageidrD   r   r   s   ` @rE   rV   zTextConverter.receive_layoutQ  s    
	8 
	8D 
	8 
	8 
	8 
	8 
	8 
	8 
	8 ? 	9OOK&-7888vrF   rX   rd   c                 P    | j         d S t                              | ||           d S r@   )r   r   rg   )rD   rX   rd   s      rE   rg   zTextConverter.render_imagef  s-    #F!!$f555rF   rh   ri   rj   rk   rl   c                     d S r@   rp   )rD   rh   ri   rj   rk   rl   s         rE   r   zTextConverter.paint_pathl  s	     	rF   )r   r   NFN)r   r   r   r*   r-   rQ   r   r   r   r   r   rA   r   r   rV   r,   rg   r)   r   r1   r   __classcell__)r   s   @rE   r   r   ;  sk       
 '+ -1' '#' ' 	'
 ' 8$' ' k*' 
' ' ' ' ' '1s 1t 1 1 1 1V     * i D      	
  {# 
       rF   r   c                       e Zd ZdddddddZddd	Z	 	 	 	 	 	 	 	 	 	 	 	 d>dedededede	e
         dededededede	e         dede	eeef                  de	eeef                  d dfd!Zd"ed dfd#Zd?d$Zd?d%Zd"ed dfd&Zd'ed(ed)ed*ed+ed,ed dfd-Zd'ed(ed.ed dfd/Zd.ed(ed)ed*ed+ed,ed dfd0Zd'ed"ed)ed*ed1ed dfd2Z	 d@d'ed(ed)ed*ed+ed,ed4ed dfd5Zd'ed dfd6Zd"ed7ed8ed dfd9Zd?d:Zd;ed dfd<Zd?d=Z dS )AHTMLConverteryellowmagentacyanredblackgray)figuretextlinetextbox	textgroupr   rG   blue)r
  charr   r   N      ?normalT2   r   r;   r   r   r<   r=   scale	fontscale
layoutmoder   
pagemarginr   r   rect_colorstext_colorsr>   c                    t                               | |||||           | j        | j         k    rt	          d          |ddi}|ddd}|| _        || _        || _        |	| _        |
| _	        || _
        || _        || _        |r>| j                            | j                   | j                            | j                   | j	        | _        d | _        g | _        |                                  d S )Nr   )Codec is required for a binary I/O outputr  r  r  )r   rG   )r   rA   r   r   
ValueErrorr  r  r  r   r  r   r  r  updateRECT_COLORSTEXT_COLORS_yoffset_font
_fontstackwrite_header)rD   r;   r   r   r<   r=   r  r  r  r   r  r   r   r  r  s                  rE   rA   zHTMLConverter.__init__  s   " 	'5fx 	 	
 	
 	

 TZ00HIII!7+K$+V<<K
"$$$&&& 	6##D$4555##D$4555#26
=?rF   r   c                     | j         rFt          t          | j                                      |                    | j                              n-t          t          | j                                      |           d S r@   r   r   r   r   r   r   r	   r   s     rE   r   zHTMLConverter.write  b    : 	14:&&,,T[[-D-DEEEE$$**4000rF   c                     |                      d           | j        rd| j        z  }nd}|                      |           |                      d           d S )Nz<html><head>
zA<meta http-equiv="Content-Type" content="text/html; charset=%s">
z5<meta http-equiv="Content-Type" content="text/html">
z</head><body>
)r   r   )rD   ss     rE   r   zHTMLConverter.write_header  se    

#$$$: 	I!#':. A
 IA

1

$%%%rF   c                     d t          d| j                  D             }dd                    |          z  }|                     |           |                     d           d S )Nc                 :    g | ]}d                      ||          S )z<a href="#{}">{}</a>)format)rq   is     rE   ry   z.HTMLConverter.write_footer.<locals>.<listcomp>  s6     
 
 
45"))!Q//
 
 
rF   r   z8<div style="position:absolute; top:0px;">Page: %s</div>
z, z</body></html>
)ranger<   r   r   )rD   
page_linksr%  s      rE   write_footerzHTMLConverter.write_footer  sw    
 
9>q$+9N9N
 
 

 H$))K
 K
 
 	

1

%&&&rF   c                 J    |                      t          |                     d S r@   )r   r5   r   s     rE   r   zHTMLConverter.write_text  s    

3t99rF   colorborderwidthrr   ywrv   c                     | j                             |          }|Hd|||| j        z  | j        |z
  | j        z  || j        z  || j        z  fz  }|                     |           d S )Nzn<span style="position:absolute; border: %s %dpx solid; left:%dpx; top:%dpx; width:%dpx; height:%dpx;"></span>
)r  getr  r  r   )	rD   r.  r/  rr   r0  r1  rv   color2r%  s	            rE   
place_rectzHTMLConverter.place_rect  s     !%%e,,K 
N]Q&$*4
N
N	  JJqMMMrF   rf   c                 b    |                      |||j        |j        |j        |j                   d S r@   )r5  rK   rN   widthheight)rD   r.  r/  rf   s       rE   place_borderzHTMLConverter.place_border  s+    {DGTWdj$+VVVrF   c                     | j         o| j                             |          }dt          |          ||| j        z  | j        |z
  | j        z  || j        z  || j        z  fz  }|                     |           d S )Nzd<img src="%s" border="%d" style="position:absolute; left:%dpx; top:%dpx;" width="%d" height="%d" />
)r   r   r5   r  r  r   )	rD   rf   r/  rr   r0  r1  rv   rX   r%  s	            rE   place_imagezHTMLConverter.place_image  s     '#0066DD II
N]Q&$*4
N
N	  JJqMMMrF   sizec                    | j                             |          }|pd||| j        z  | j        |z
  | j        z  || j        z  | j        z  fz  }|                     |           |                     |           |                     d           d S )NzP<span style="position:absolute; color:%s; left:%dpx; top:%dpx; font-size:%dpx;"></span>
)r  r3  r  r  r  r   r   )rD   r.  r   rr   r0  r<  r4  r%  s           rE   
place_textzHTMLConverter.place_text  s     !%%e,,. 
N]Q&$*44:%6	  JJqMMMOOD!!!JJ{###rF   Falsewriting_modec           	          | j                             | j                   d | _        d||||| j        z  | j        |z
  | j        z  || j        z  || j        z  fz  }|                     |           d S )Nzv<div style="position:absolute; border: %s %dpx solid; writing-mode:%s; left:%dpx; top:%dpx; width:%dpx; height:%dpx;">)r  r\   r  r  r  r   )	rD   r.  r/  rr   r0  r1  rv   rA  r%  s	            rE   	begin_divzHTMLConverter.begin_div  s     	tz***
 DJ"dj0DJDJ 	
 	

1rF   c                     | j         |                     d           | j                                        | _         |                     d           d S )N</span>z</div>)r  r   r  r`   )rD   r.  s     rE   end_divzHTMLConverter.end_div5  sH    :!JJy!!!_((**


8rF   fontnamer   c                    ||f}|| j         k    rh| j         |                     d           |                    d          d         }|                     d||| j        z  | j        z  fz             || _         |                     |           d S )NrE  +z.<span style="font-family: %s; font-size:%dpx">)r  r   splitr  r  r   )rD   r   rG  r   r   fontname_without_subset_tags         rE   put_textzHTMLConverter.put_text<  s    (#4:z%

9%%%*2..*=*=b*A'JJ@.4:0E0VWX   DJrF   c                 0    |                      d           d S )Nz<br>r   r   s    rE   put_newlinezHTMLConverter.put_newlineK  s    

6rF   r   c                      dt           t          t          f         dd f fddt          dd f fd |            xj         j        z  c_        d S )Nrf   r>   c                     t          | t                    r'                    dd|            | D ]} |           d S )Nr  r   )rS   r$   r9  rf   r   rD   
show_groups     rE   rT  z0HTMLConverter.receive_layout.<locals>.show_groupP  sT    $,, &!!+q$777! & &EJu%%%%FrF   c           
      j   t          | t                    rxj        | j        z  c_                            dd|            j        r`                    dj        | j        z
  j        z  z                                 d                    | j	        | j	                             | D ]} |           | j
        | j
        D ]} |           n[t          | t                    r                    dd|            n-t          | t                    rU                    dd| j        | j        | j        | j                   | D ]} |                               d           nt          | t$                    r0                    | d| j        | j        | j        | j                   n~j        dk    rt          | t*                    r)                    dd|            | D ]} |           n4t          | t,                    ra                    d	d|                                d	t1          | j        dz             | j        | j        d
           | D ]} |           nt          | t4                    rQ                    dd|                                d|                                 | j        | j        | j                   nVt          | t*                    r1| D ]} |           j        dk    r                                 nt          | t,                    rg                    d	d| j        | j        | j        | j        |                                            | D ]} |                               d	           nt          | t4                    rCt?          | j                   }!                    |                                 || j                   n<t          | tD                    r'#                    |                                            d S )NrG   r   z*<div style="position:absolute; top:%dpx;">z<a name="{}">Page {}</a></div>
r   r  exactr	  r
     r  loose)$rS   r   r  rN   r9  r   r   r  r(  r   groupsr   r   rC  rK   r7  r8  rF  r   r;  r  r%   r"   r?  rQ   indexr   r   r<  rP  get_writing_moder2   rG  rM  r!   r   )rf   r   grouprG  r   rD   rT  s       rE   r   z,HTMLConverter.receive_layout.<locals>.renderW  sQ   $'' D9(!!&!T222? 	JJD MDG3tzAC   JJ:AA K   
 " " "EF5MMMM;*!% * *"
5))))D'** 29!!'1d3333D(++ 09xDGTWdj$+VVV! " "EF5MMMMX&&&&D'** +9  q$'47DJTTTT?g--!$
33 ))*a>>>%) * *E"F5MMMM*#D)44 )))Q===%s4:>':':DGTWb   &* * *E"F5MMMM*#D&11 ))&!T:::"DMMOOTWdgty   "$
33 9%) * *E"F5MMMM?g55 ,,...#D)44 9% G G J K 1133   &* * *E"F5MMMMY////#D&11 9#24=#A#AdmmooxKKKK#D&11 9888FrF   )r   r$   r   r   r  r  rD   r   r   rT  s   ` @@rE   rV   zHTMLConverter.receive_layoutO  s    	U;0@#@A 	d 	 	 	 	 	 	 	G	 G	D G	 G	 G	 G	 G	 G	 G	 G	R 	v(rF   c                 .    |                                   d S r@   r,  r   s    rE   closezHTMLConverter.close      rF   )r   r   Nr   r  r  Tr  Nr   NNr>   N)r@  )!r   r   r   r  r  r*   r-   rQ   r   r   r   r   r   r   r   rA   r   r   r,  r   r5  r   r9  r   r;  r?  rC  rF  rM  rP  r   rV   r`  rp   rF   rE   r  r  w  s        K  K '+"-10404- -#- - 	-
 - 8$- - - - - - k*- - d38n-- d38n--  
!- - - -^# $       	 	 	 	s t    '*/49>CHMR	   (# C { t    *-27<AFKPU	   ( #(-27?D	   8 $   	
     
   :S T    S C 5 T       SV S S S S Sj     rF   r  c                       e Zd Z ej        d          Z	 	 	 	 	 ddededed	e	d
e
e         de
e         deddfdZdeddfdZddZddZdeddfdZdeddfdZddZdS )XMLConverterz[ ---]r   r   NFr;   r   r   r<   r=   r   stripcontrolr>   c                     t                               | |||||           | j        | j         k    rt	          d          || _        || _        |                                  d S )Nr   r  )r   rA   r   r   r  r   re  r   )rD   r;   r   r   r<   r=   r   re  s           rE   rA   zXMLConverter.__init__  sx     	'5fx 	 	
 	
 	

 TZ00HIII&(rF   r   c                     | j         rFt          t          | j                                      |                    | j                              n-t          t          | j                                      |           d S r@   r"  r   s     rE   r   zXMLConverter.write  r#  rF   c                     | j         r|                     d| j         z             n|                     d           |                     d           d S )Nz%<?xml version="1.0" encoding="%s" ?>
z<?xml version="1.0" ?>
z<pages>
r   r   r   s    rE   r   zXMLConverter.write_header  sS    : 	3JJ?$*LMMMMJJ1222

;rF   c                 0    |                      d           d S )Nz	</pages>
rO  r   s    rE   r,  zXMLConverter.write_footer  s    

<   rF   c                     | j         r| j                            d|          }|                     t	          |                     d S Nrn   )re  CONTROLsubr   r5   r   s     rE   r   zXMLConverter.write_text  sA     	.<##B--D

3t99rF   r   c                 d     dt           dd f fddt           dd f fd |           d S )Nrf   r>   c                 ^   t          | t                    r2                    d| j        t	          | j                  fz             ndt          | t                    rO                    dt	          | j                  z             | D ]} |                               d           d S )Nz<textbox id="%d" bbox="%s" />
z<textgroup bbox="%s">
z</textgroup>
)rS   r"   r   rZ  r4   rY   r$   rS  s     rE   rT  z/XMLConverter.receive_layout.<locals>.show_group  s    $	** 	-

5z8DI#6#678    D+.. -

4x	7J7JJKKK! & &EJu%%%%

+,,,FrF   c                 J	   t          | t                    rd| j        t          | j                  | j        fz  }                    |           | D ]} |           | j        ?                    d           | j        D ]} |                               d                               d           nt          | t                    r5d| j	        t          | j                  fz  }                    |           nt          | t                    r5d| j	        t          | j                  fz  }                    |           nRt          | t                    rHd| j	        t          | j                  |                                 fz  }                    |           nt          | t                    r\d| j        d	t          | j                  d
}                    |           | D ]} |                               d           nt          | t                    rQ                    dt          | j                  z             | D ]} |                               d           nt          | t                     rtd}t          | t"                    rd}d| j        t          | j                  |fz  }                    |           | D ]} |                               d           nt          | t&                    rdt)          | j                  t          | j                  | j        j        | j        j        | j        fz  }                    |                               |                                                                d           nt          | t8                    r+                    d|                                 z             nt          | t:                    ryj        Mj                            |           }                    dt)          |          | j         | j!        fz             n>                    d| j         | j!        fz             nJ tE          d| f                      d S )Nz%<page id="%s" bbox="%s" rotate="%d">
z	<layout>
z
</layout>
z</page>
z"<line linewidth="%d" bbox="%s" />
z"<rect linewidth="%d" bbox="%s" />
z+<curve linewidth="%d" bbox="%s" pts="%s"/>
z<figure name="z" bbox="z">
z
</figure>
z<textline bbox="%s">
z</textline>
rn   z wmode="vertical"z<textbox id="%d" bbox="%s"%s>
z</textbox>
zD<text font="%s" bbox="%s" colourspace="%s" ncolour="%s" size="%.3f">z</text>
z<text>%s</text>
z*<image src="%s" width="%d" height="%d" />
z!<image width="%d" height="%d" />
F	Unhandled)#rS   r   r   r4   rY   rotater   rY  r   r   r    r   get_ptsr   rX   r%   r"   r#   rZ  r   r5   rG  r   r   r   r<  r   r   r!   r   r   r   r7  r8  rQ   )	rf   r%  r   r\  wmoderX   r   rD   rT  s	         rE   r   z+XMLConverter.receive_layout.<locals>.render  s   $'' W7<KTY''K@ 
 

1! " "EF5MMMM;*JJ|,,,!% * *"
5))))JJ}---

;''''D&)) H79NTY''=  

1D&)) B79NTY''=  

1D'** <7BNTY''LLNNF 
 

1D(++ 57 578<			8DICVCVCVCVW

1! " "EF5MMMM

=))))D*-- /7

3hty6I6IIJJJ! " "EF5MMMM

?++++D),, *7d$566 0/E5JTY''9 
 

1! " "EF5MMMM

>****D&)) 70 DM** ++)0	  

1000

;''''D&)) 7

.@AAAAD'** 7#/+88>>DJJEt99dj$+>?   
 JJ<
DK?XX    7c;"566666FrF   r   r]  s   ` @@rE   rV   zXMLConverter.receive_layout  s    	V 	 	 	 	 	 	 	 	Z	 Z	D Z	 Z	 Z	 Z	 Z	 Z	 Z	 Z	x 	vrF   c                 .    |                                   d S r@   r_  r   s    rE   r`  zXMLConverter.closeJ  ra  rF   )r   r   NNFrb  )r   r   r   r   compilerm  r*   r-   rQ   r   r   r   r   r   rA   r   r   r,  r   r   rV   r`  rp   rF   rE   rd  rd    sQ       bj899G '+-1" #  	
  8$ k*  
   .# $          s t    kV k k k k kZ     rF   rd  c                       e Zd ZdZ ej        d          Z	 	 	 	 ddeded	e	d
e
dee         defdZdede	fdZde	ddfdZddZddZde	ddfdZddZdeddfdZddZdS )HOCRConverterzKExtract an hOCR representation from explicit text information within a PDF.z[\x00-\x08\x0b-\x0c\x0e-\x1f]utf8r   NFr;   r   r   r<   r=   re  c                     t                               | |||||           || _        d| _        |                                  d S )Nr   F)r   rA   re  within_charsr   )rD   r;   r   r   r<   r=   re  s          rE   rA   zHOCRConverter.__init__b  sX     	'5fx 	 	
 	
 	
 )!rF   rY   r>   c                     |\  }}}}t          |          }t          | j        d         |z
            }t          |          }t          | j        d         |z
            }	d| d| d| d|	 S )N   zbbox  )r   	page_bbox)
rD   rY   in_x0in_y0in_x1in_y1out_x0out_y0out_x1out_y1s
             rE   	bbox_reprzHOCRConverter.bbox_reprr  s~    '+$ueUT^A&.//UT^A&.//:v::::::&:::rF   r   c                     | j         rI|                    | j                   }t          t          | j                                      |           d S t          t          | j                                      |           d S r@   )r   r   r   r   r   r   r	   )rD   r   encoded_texts      rE   r   zHOCRConverter.write{  sk    : 	1;;tz22L4:&&,,\:::::$$**400000rF   c                    | j         r|                     d| j         z             n|                     d           |                     d           |                     d           |                     d           |                     d           |                     d           |                     d           |                     d	           d S )
NzQ<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en' lang='en' charset='%s'>
zD<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en' lang='en'>
z<head>
z<title></title>
zE<meta http-equiv='Content-Type' content='text/html;charset=utf-8' />
zA<meta name='ocr-system' content='pdfminer.six HOCR Converter' />
zR  <meta name='ocr-capabilities' content='ocr_page ocr_block ocr_line ocrx_word'/>
z</head>
z<body>
ri  r   s    rE   r   zHOCRConverter.write_header  s    : 		JJ:<@JG   
 JJ-   	

:

&'''

W	
 	
 	
 	

S	
 	
 	
 	

C	
 	
 	
 	

;

:rF   c                 Z    |                      d           |                      d           d S )Nz0<!-- comment in the following line to debug -->
zD<!--script src='https://unpkg.com/hocrjs'></script--></body></html>
rO  r   s    rE   r,  zHOCRConverter.write_footer  s8    

FGGG

V	
 	
 	
 	
 	
rF   c                 t    | j         r| j                            d|          }|                     |           d S rl  )re  rm  rn  r   r   s     rE   r   zHOCRConverter.write_text  s<     	.<##B--D

4rF   c                 >   t          | j                  dk    r}d}d| j        v rd}d| j        v r|dz  }|                     d| j        | j        ||                     | j                  | j        | j        | j                                        fz             d| _        d S )	Nr   rn   Italiczfont-style: italic; Boldzfont-weight: bold; zg<span style='font:"%s"; font-size:%d; %s' class='ocrx_word' title='%s; x_font %s; x_fsize %d'>%s</span>F)	rR   working_textworking_fontr   working_sizer  working_bboxstripr|  )rD   bold_and_italic_styless     rE   
write_wordzHOCRConverter.write_word  s    t !!A%%%'"4,,,)?&***&*??&JJ(
 )).t'899)))//11	    "rF   r   c                 @     dt           dd f fd |           d S )Nrf   r>   c                 t   j         r)t          | t                    r                                 t          | t                    rl| j        _                            d| j        d	                    | j                  d           | D ]} |                               d           d S t          | t                    rW                    d	                    | j                  z             | D ]} |                               d           d S t          | t                    r^                    d| j        	                    | j                  fz             | D ]} |                               d           d S t          | t                    rj         sFd_         |                                 _        | j        _        | j        _        | j        _        d S t+          |                                                                           d	k    r=                                                     |                                            d S j        d
         | j        d
         k    s j        | j        k    sj        | j        k    r8                                 | j        _        | j        _        | j        _        xj        |                                 z  c_        j        d	         j        d
         | j        d         j        d         f_        d S d S )Nz<div class='ocr_page' id='z	' title='z'>
z</div>
z"<span class='ocr_line' title='%s'>r>  z+<div class='ocr_block' id='%d' title='%s'>
Tr   r   r   r~  )r|  rS   r   r  r   rY   r  r   r   r  r%   r"   rZ  r   r   r  r  rG  r  r<  r  rR   r  )rf   r   
child_liner   rD   s      rE   r   z,HOCRConverter.receive_layout.<locals>.render  s1     "Zf%=%= "!!!$'' 3!%


{{{DNN49$=$=$=$=?   " " "EF5MMMM

:&&&&&D*-- *

8T^^DI=V=VX   #' ' 'JF:&&&&

;'''''D),, #

Bz4>>$)#<#<=>   " " "EF5MMMM

:&&&&&D&)) ( (,D%(,D%(,	D%(,D%(,	D%%%4==??002233q88)))

4==??33333 !-a0DIaL@@#0DMAA#0DI== OO---04	D-04D-04	D-))T]]__<)) -a0 -a0 IaL -a0	-)))- rF   r   r   s   ` @rE   rV   zHOCRConverter.receive_layout  sI    6	 6	D 6	 6	 6	 6	 6	 6	 6	p 	vrF   c                 .    |                                   d S r@   r_  r   s    rE   r`  zHOCRConverter.close  s    rF   )rz  r   NFrb  )r   r   r   __doc__r   rw  rm  r*   r-   rQ   r   r   r   r   rA   r0   r  r   r   r,  r   r  r   rV   r`  rp   rF   rE   ry  ry  O  sm       UU  bj9::G '+" #  	
  8$     ;d ;s ; ; ; ;1# 1$ 1 1 1 1   4
 
 
 
s t    
" " " "29V 9 9 9 9 9v     rF   ry  )Jr   loggingr   typingr   r   r   r   r   r   r	   r
   r   r   r   pdfminer.pdfcolorr   rn   r   imager   layoutr   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   	pdfdevicer&   pdffontr'   r(   	pdfinterpr)   r*   pdfpager+   pdftypesr,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   	getLoggerr   r   r8   r   r   r   r   r  rd  ry  rp   rF   rE   <module>r     s   				  				                          , + + + + +             ; ; ; ; ; ; ; ; ; ;                                           % % % % % %                               % % % % % %             $ $ $ $ $ $       ) ) ) ) ) ) : : : : : : : :             K K K K K K K K K K K K K K K K " " " " " "                  g!!M M M M M M M M`    )   & 
68U	3	3    $gfo   @9 9 9 9 9L' 9 9 9xo o o o oL' o o od	c c c c c<& c c cLm m m m mL' m m m m mrF   