
    Ng8                     0   d Z ddlZddlZddlZddlZddlmZmZm	Z	m
Z
mZmZmZmZmZ ddlmZ ddlZddlmZmZmZ ddlmZ ddlmZ ddlmZmZ dd	lmZmZm Z m!Z! dd
l"m#Z#m$Z$m%Z% ddl&m'Z'  ej(                      ej)        e*          Z+ ej,        d          Z-dee.e/f         de.fdZ0d*dede1dee.         ddfdZ2	 d+dedede3ddfdZ4	 	 d,dededee.         de3ddf
dZ5	 	 	 	 d-dede.dedee6         de.de3dee.         d ee.         ddfd!Z7 e%d"          Z8 e%d#          Z9de.de.d e.ddfd$Z:	 	 	 	 	 d.dede.de
e6         dee6         de.de3dee.         d ee.         de3ddfd%Z;defd&Z<d*d'eee.                  ddfd(Z=e*d)k    r e=             dS dS )/z#Extract pdf structure in XML format    N)	Any	ContainerDictIterableListOptionalTextIOUnioncast)ArgumentParser)PDFDocumentPDFNoOutlinesPDFXRefFallback)PDFPage)	PDFParser)PDFObjectNotFoundPDFValueError)	PDFStream	PDFObjRefresolve1stream_value)	PSKeyword	PSLiteralLIT)isnumberz&[\000-\037&<>()"\042\047\134\177-\377]sreturnc                     t          | t                    rt          | d          }n| }t                              d |          S )Nzlatin-1c                 L    dt          |                     d                    z  S )Nz&#%d;r   )ordgroup)ms    P/var/www/html/ai-engine/env/lib/python3.11/site-packages/../../../bin/dumppdf.py<lambda>zescape.<locals>.<lambda>   s    3qwwqzz??!:     )
isinstancebytesstrESC_PATsub)r   uss     r#   escaper,      sB    !U I;;::B???r%   outobjcodecc                    ||                      d           d S t          |t                    r|                      dt          |          z             |                                D ]W\  }}|                      d|z             |                      d           t          | |           |                      d           X|                      d           d S t          |t                    rf|                      dt          |          z             |D ]'}t          | |           |                      d           (|                      d	           d S t          |t          t          f          r6|                      d
t          |          t          |          fz             d S t          |t                    r |dk    r(|                      |                                           n|dk    r(|                      |                                           n|                      d           t          | |j                   |                      d           |dk    rH|                                }|                      dt          |          t          |          fz             |                      d           d S t          |t                    r|                      d|j        z             d S t          |t                     r|                      d|j        z             d S t          |t$                    r|                      d|j        z             d S t'          |          r|                      d|z             d S t)          |          )Nz<null />z<dict size="%d">
z<key>%s</key>
z<value>z	</value>
z</dict>z<list size="%d">

z</list>z<string size="%d">%s</string>rawbinaryz<stream>
<props>
z

</props>
textz<data size="%d">%s</data>
z	</stream>z<ref id="%d" />z<keyword>%s</keyword>z<literal>%s</literal>z<number>%s</number>)writer&   dictlenitemsdumpxmllistr(   r'   r,   r   get_rawdataget_dataattrsr   objidr   namer   r   	TypeError)r-   r.   r/   kvdatas         r#   r9   r9   !   s=   
{		*#t 		&S1222iikk 	$ 	$FQII'!+,,,IIi   COOOIIl####		)#t 		&S1222 	 	ACOOOIIdOOOO		)#U|$$ 		1SXXvc{{4KKLLL#y!! E>>IIcoo''((((hIIcllnn%%%%II+,,,C###IIn%%%||~~		73t99fTll:SSTTTIIk"""#y!! 		#ci/000#y!! 		)CH4555#y!! 		)CH4555}} 		'#-...
C..r%   Fdocshow_fallback_xrefc                 \   |j         D ]e}t          |t                    r|rL|                     d           t	          | |                                           |                     d           ft          d |j         D                       }|r|sd}t                              |           d S )Nz
<trailer>
z
</trailer>

c              3   @   K   | ]}t          |t                    V  d S N)r&   r   ).0xrefs     r#   	<genexpr>zdumptrailers.<locals>.<genexpr>j   s,      KK:dO44KKKKKKr%   zThis PDF does not have an xref. Use --show-fallback-xref if you want to display the content of a fallback xref that contains all objects.)	xrefsr&   r   r5   r9   get_trailerallloggerwarning)r-   rD   rE   rJ   no_xrefsmsgs         r#   dumptrailersrS   b   s     	 * *$00 	*4F 	*IIm$$$C))++,,,II()))KKKKKKKH * $ 	
 	s
Fr%   c                    t                      }|                     d           |j        D ]}|                                D ]}||v r|                    |           	 |                    |          }|5|                     d|z             t          | ||           |                     d           u# t          $ r}t          d|z             Y d }~d }~ww xY wt          | ||           |                     d           d S )Nz<pdf>z<object id="%d">
r/   z
</object>

znot found: %rz</pdf>)
setr5   rL   
get_objidsaddgetobjr9   r   printrS   )	r-   rD   r/   rE   visitedrJ   r>   r.   es	            r#   dumpallobjsr]   u   s<    eeGIIg	 + +__&& 	+ 	+EKK+jj'';		.6777S....		+,,,,$ + + +o)********+	+ c-...IIh
Fs   B66?B66
C CC outfpfnameobjidspagenospassworddumpall
extractdirc                 t   t          |d          }t          |          }	t          |	|          d t          t	          j                  d          D             }
dt          dt          ffd}	                                 }| 	                    d           |D ]K\  }}}}}d }|r ||          }|
|d         j
                 }n||rz|}t          |t                    rc|                    d	          }|rLt          |          d
k    r9|                    d          r$ ||d                   }|
|d         j
                 }t          |          }| 	                    d                    ||                     |:| 	                    d           t#          | |           | 	                    d           || 	                    d|z             | 	                    d           M| 	                    d           n# t$          $ r Y nw xY w|	                                 |                                 d S )Nrbc                 $    i | ]\  }}|j         |S  )pageid)rI   pagenopages      r#   
<dictcomp>zdumpoutline.<locals>.<dictcomp>   s0       VT 	V  r%      destr   c                    t          | t          t          f          r#t                              |                     } n<t          | t
                    r't                              | j                            } t          | t                    r| d         } t          | t                    r| 	                                } | S )ND)
r&   r(   r'   r   get_destr   r?   r6   r   resolve)ro   rD   s    r#   resolve_destz!dumpoutline.<locals>.resolve_dest   s    dS%L)) 	5CLL..//DDi(( 	5CLL3344DdD!! 	9DdI&& 	"<<>>Dr%   z<outlines>
r   Sz/'GoTo'rq   z"<outline level="{!r}" title="{}">
z<dest>z</dest>
z<pageno>%r</pageno>
z</outline>
z</outlines>
)openr   r   	enumerater   create_pagesobjectr   get_outlinesr5   r>   r&   r6   getreprr,   formatr9   r   close)r_   r`   ra   rb   rc   rd   r/   re   fpparserpagesrt   outlinesleveltitlero   aserk   actionsubtyper   rD   s                         @r#   dumpoutliner      sd    
eT		Br]]F
fh
'
'C '(<S(A(A1EE  E
	6 	c 	 	 	 	 	 	##%%N###+3 	( 	('UE4BF 	6#|D))tAw}- 6fd++ 6$jjooG 64==I#=#=&**S//#=+|F3K88!&tAw}!5uAKK=DDUANNOOOH%%%t$$$K(((!3f<===KK''''O$$$$   
LLNNNHHJJJ
Fs   2FH   
HHFilespecEmbeddedFilec                    
 dt           dt          t          t          f         dd f
fd}t	          | d          5 }t          |          }t          ||          
t                      }
j        D ]}|	                                D ]m}

                    |          }	||vrRt          |	t                    r=|	                    d          t          u r!|                    |            |||	           n	 d d d            n# 1 swxY w Y   d S )Nr>   r.   r   c                    t           j                            |                    d          p9t	          t
          |                    d                                                              }|d                             d          p|d                             d          }                    |j                  }t          |t                    sd|z  }t          |          |                    d          t          urt          d|z            t           j                            	d| |fz            }t           j                            |          rt          d|z            t!          d	|z             t          j        t           j                            |          d
           t'          |d          }|                    |                                           |                                 d S )NUFFEFz:unable to process PDF: reference for %r is not a PDFStreamTypez>unable to process PDF: reference for %r is not an EmbeddedFilez%.6d-%szfile exists: %rzextracting: %rT)exist_okwb)ospathbasenamer{   r   r'   decoderY   r>   r&   r   r   LITERAL_EMBEDDEDFILEjoinexistsIOErrorrZ   makedirsdirnamerv   r5   r<   r~   )
r>   r.   filenamefilereffileobj	error_msgr   r-   rD   re   s
           r#   extract1z!extractembedded.<locals>.extract1   s   7##CGGDMM$WT%5N5N5U5U5W5WXXd)--%%;Ts););**W]++'9-- 	+&'   	***;;v&:::),46   w||J	UH4E(EFF7>>$ 	4+d2333%&&&
BGOOD))D99994		'""$$%%%		r%   rg   r   )intr   r(   r   rv   r   r   rV   rL   rW   rY   r&   r6   r{   LITERAL_FILESPECrX   )r`   rc   re   r   r   r   extracted_objidsrJ   r>   r.   rD   s     `       @r#   extractembeddedr      ss    $sCx. T       2 
eT		 )b2&(++55I 		) 		)D** ) )jj''!111"3-- 2+;;;$((///HUC((()		)	) ) ) ) ) ) ) ) ) ) ) ) ) ) ) Fs   B:DD
Dc	                 P   t          |d          }	t          |	          }
t          |
|          }|r,|D ])}|                    |          }t	          | ||           *|rnt          t          j        |                    D ]L\  }}||v rC|r,|j        D ]#}t          |          }t	          | ||           $7t	          | |j
                   M|rt          | |||           |s|s|st          | ||           |	                                 |dvr|                     d           d S )Nrg   rU   )r2   r3   r1   )rv   r   r   rY   r9   rw   r   rx   contentsr   r=   r]   rS   r~   r5   )r_   r`   ra   rb   rc   rd   r/   re   rE   r   r   rD   r>   r.   rk   rl   s                   r#   dumppdfr      st    
eT		Br]]F
fh
'
'C - 	- 	-E**U##CE3e,,,,, /'(<S(A(ABB 	/ 	/NVT   /#} 9 9*3//s%888889 E4:... ;E3'9::: 5W 5w 5UC!3444HHJJJ%%%D
Fr%   c                  ^   t          t          d          } |                     dt          d dd           |                     ddd	d
                    t
          j                             |                     ddddd           |                                 }|                    ddddd           |                    ddt          d           |                     dd          }|                    dt          d dd           |                    ddt          d            |                    d!d"t          d#           |                    d$d%ddd&           |                    d'dd()           |                    d*d+t          d,d-.           |                     d/d0          }|                    d1d2t          d3d4.           |                                }|                    d5d6ddd7           |                    d8d9ddd:           |                    d;d<ddd=           | S )>NT)descriptionadd_helpfiles+zOne or more paths to PDF files.)typedefaultnargshelpz	--versionz-vversionzpdfminer.six v{})r   r   z--debugz-dF
store_truezUse debug logging level.)r   r   r   z--extract-tocz-TzExtract structure of outlinez--extract-embeddedz-EzExtract embedded files)r   r   ParserzUsed during PDF parsing)r   z--page-numbersz0A space-seperated list of page numbers to parse.z	--pagenosz-pzA comma-separated list of page numbers to parse. Included for legacy applications, use --page-numbers for more idiomatic argument entry.z	--objectsz-iz1Comma separated list of object numbers to extractz--allz-az3If the structure of all objects should be extractedz--show-fallback-xrefzAdditionally show the fallback xref. Use this if the PDF has zero or only invalid xref's. This setting is ignored if --extract-toc or --extract-embedded is used.)r   r   z
--passwordz-Pr^   z,The password to use for decrypting PDF file.)r   r   r   OutputzUsed during output generation.z	--outfilez-o-zJPath to file where output is written. Or "-" (default) to write to stdout.z--raw-streamz-rz%Write stream objects without encodingz--binary-streamz-bz)Write stream objects with binary encodingz--text-streamz-tz"Write stream objects as plain text)
r   __doc__add_argumentr(   r}   pdfminer__version__add_mutually_exclusive_groupadd_argument_groupr   )r   procedure_parserparse_paramsoutput_paramscodec_parsers        r#   create_parserr   !  s5   $???F
.     "))(*>??	     '     ::<<!!+ "    !!d3K "    ,,7 -  L ?     	     @	     B     7     ;     --> .  M      !==??L4     8     1     Mr%   argvc                 l   t                      }|                    |           }|j        r0t          j                                        t          j                   |j        dk    rt          j	        }nt          |j        d          }|j        r%d |j                            d          D             }ng }|j        rd |j        D             }n:|j        r%d |j                            d          D             }nt                      }|j        }|j        rd}n|j        rd	}n|j        rd
}nd }|j        D ]g}|j        rt-          ||||||j        |d            &|j        rt3          |||j                   Et5          ||||||j        |d |j        	  	         h|                                 d S )N)argsr   wc                 ,    g | ]}t          |          S ri   r   rI   xs     r#   
<listcomp>zmain.<locals>.<listcomp>  s    :::Q#a&&:::r%   ,c                     h | ]}|d z
  S rn   ri   r   s     r#   	<setcomp>zmain.<locals>.<setcomp>  s    444Q1q5444r%   c                 2    h | ]}t          |          d z
  S r   r   r   s     r#   r   zmain.<locals>.<setcomp>  s"    ???!3q66A:???r%   r2   r3   r4   )rc   rd   r/   re   )rc   re   )rc   rd   r/   re   rE   )r   
parse_argsdebuglogging	getLoggersetLevelDEBUGoutfilesysstdoutrv   objectssplitpage_numbersrb   rV   rc   
raw_streambinary_streamtext_streamr   extract_tocr   rN   extract_embeddedr   r   rE   r~   )	r   r   r   r_   ra   rb   rc   r/   r`   s	            r#   mainr     s
   __F$''Dz 4$$W]333|s
T\3''| ::$,"4"4S"9"9::: 44$"3444	 ??t|'9'9#'>'>???%%}H $		 		    	!	 	 	 	 	 " 	EHAVWWWWW!#'#:
 
 
 
 
 
KKMMMMMr%   __main__rH   )F)NF)r^   FNN)r^   FNNF)>r   r   os.pathr   rer   typingr   r   r   r   r   r   r	   r
   r   argparser   r   pdfminer.pdfdocumentr   r   r   pdfminer.pdfpager   pdfminer.pdfparserr   pdfminer.pdftypesr   r   r   r   r   r   pdfminer.psparserr   r   r   pdfminer.utilsr   basicConfigr   __name__rO   compiler)   r(   r'   r,   ry   r9   boolrS   r]   r   r   r   r   r   r   r   r   ri   r%   r#   <module>r      sY   ) )   				 



 V V V V V V V V V V V V V V V V V V V V V V # # # # # #  L L L L L L L L L L $ $ $ $ $ $ ( ( ( ( ( ( > > > > > > > > J J J J J J J J J J J J 7 7 7 7 7 7 7 7 7 7 # # # # # #     		8	$	$
"*>
?
?@eCJ @C @ @ @ @> > >f >Xc] >d > > > >D ?D 	!7;	   ,  $	 		 C= 	
 
   @  $: ::: : s^	:
 : : C=: : 
: : : :z 3z?? s>** (3 (# (3 (4 ( ( ( (`  $$" """ SM" s^	"
 " " C=" " " 
" " " "Js~ s s s sl> >xS	" >d > > > >B zDFFFFF r%   