
    Ng                     .   d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
mZmZ ddlmZ dedee         fd	Zd
ee         de j        fdZdedeeee	f                  ddfdZej        dd         fd
ee         ddfdZedk    r e             dS dS )    N)defaultdictdeque)chain)AnyDefaultDictDictList   )PDFp_strreturnc                     d| v rKt          t          |                     d                    \  }}t          t	          ||dz                       S t          |           gS )N-r
   )mapintsplitlistrange)r   startends      J/var/www/html/ai-engine/env/lib/python3.11/site-packages/pdfplumber/cli.pyparse_page_specr      sT    
e||ekk#..//
sE%q))***E

|    args_rawc                 R   t          j        d          }|                    ddt          j        d          t          j        j                   |                                }|                    ddd	           |                    d
dd	           |                    dddgd           |                    dd           |                    ddd           |                    ddd           |                    dt          j	                   |                    dt                     |                    ddt                     |                    dt          d           |                    |           }|j         t          t          |j                   |_        |S ) N
pdfplumberinfile?rb)nargstypedefaultz--structurezoWrite the structure tree as JSON.  All other arguments except --pages, --laparams, and --indent will be ignored
store_true)helpactionz--structure-textzWrite the structure tree as JSON including text contents.  All other arguments except --pages, --laparams, and --indent will be ignoredz--formatcsvjson)choicesr"   z--types+)r    z--include-attrsz1Include *only* these object attributes in output.)r    r$   z--exclude-attrsz,Exclude these object attributes from output.z
--laparams)r!   z--precisionz--pages)r    r!   z--indentz&Indent level for JSON pretty-printing.)r!   r$   )argparseArgumentParseradd_argumentFileTypesysstdinbufferadd_mutually_exclusive_groupr'   loadsr   r   
parse_argspagesr   r   )r   parsergroupargss       r   r3   r3      s   $\22F
("3D"9"939CS     //11E	W	     
W	     
UFOUKKK
	---
@     ;     4:666
C000
	?CCC
#K     X&&Dz%,--
Kr   pdfdatac                   	 t          d           }| j        D ]G}||j                 	|j        D ]0}|                    d          }|	|xx         |d         z  cc<   1Ht          |          }|rr|                                }d|v r|                    |d                    |                    d          }|M||         	d|v r	fd|d         D             |d<   |pd S d S )Nc                  *    t          t                    S )N)r   str r   r   <lambda>z#add_text_to_mcids.<locals>.<lambda>I   s    {3?O?O r   mcidtextchildrenpage_numbermcidsc                      g | ]
}|         S r=   r=   ).0r?   text_contentss     r   
<listcomp>z%add_text_to_mcids.<locals>.<listcomp>[   s    FFF$--FFFr   )r   r4   rB   charsgetr   popleftextend)
r8   r9   page_contentspagecr?   delpagenorF   s
            @r   add_text_to_mcidsrR   H   s6   +67O7O+P+PM	 - -%d&67 	- 	-A55==D|$1V9,		-
 	dA
 	GYY[[HHR
^$$$&&>%f-b==FFFF"W+FFFBvJ  	G 	G 	G 	G 	Gr   c           	         t          |           }t          j        |j        |j        |j                  5 }|j        r.t          t          j	        |j
        |j                             n|j        rA|j
        }t          ||           t          t          j	        ||j        d                     n|j        dk    r9|                    t           j        |j        |j        |j        |j                   n>|                    t           j        |j        |j        |j        |j        |j                   d d d            d S # 1 swxY w Y   d S )N)r4   laparams)indentF)rU   ensure_asciir&   )	precisioninclude_attrsexclude_attrs)rW   rX   rY   rU   )r3   r   openr   r4   rT   	structureprintr'   dumpsstructure_treerU   structure_textrR   formatto_csvr.   stdouttypesrW   rX   rY   to_json)r   r7   r8   trees       r   mainrf   ^   s   hD	$+TZ$-	H	H	H C> 	$*S/DDDEEEE  	%Dc4((($*T$+EJJJKKKK[E!!JJ

."0"0      KK

."0"0{    !                 s   D EE
E__main__)r*   r'   r.   collectionsr   r   	itertoolsr   typingr   r   r   r	   r8   r   r<   r   r   	Namespacer3   rR   argvrf   __name__r=   r   r   <module>rn      sk     



 * * * * * * * *       / / / / / / / / / / / /      3 49    1c 1x'9 1 1 1 1hG3 Gd4S>&: Gt G G G G,  #x|  49     : zDFFFFF r   