
    Ng                         d dl Z d dlZd dlmZmZmZmZmZmZm	Z	 d dl
mZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ  ej        e          Z ed          Z ed          Z  G d d          Z!dS )    N)BinaryIO	ContainerDictIteratorListOptionalTuple)Rect   )settings)PDFDocumentPDFTextExtractionNotAllowedPDFNoPageLabels)	PDFParser)PDFObjectNotFound)
dict_value)	int_value)
list_value)resolve1)LITPagePagesc                       e Zd ZdZdedededee         ddf
dZdefd	Z	h d
Z
ededed          fd            Ze	 	 	 	 	 ddedeee                  dededededed          fd            ZdS )PDFPageak  An object that holds the information about a page.

    A PDFPage object is merely a convenience class that has a set
    of keys and values, which describe the properties of a page
    and point to its contents.

    Attributes:
      doc: a PDFDocument object.
      pageid: any Python object that can uniquely identify the page.
      attrs: a dictionary of page attributes.
      contents: a list of PDFStream objects that represents the page content.
      lastmod: the last modified time of the page.
      resources: a dictionary of resources used by the page.
      mediabox: the physical size of the page.
      cropbox: the crop rectangle of the page.
      rotate: the page rotation (in degree).
      annots: the page annotations.
      beads: a chain that represents natural reading order.
      label: the page's label (typically, the logical page number).
    docpageidattrslabelreturnNc                 6   || _         || _        t          |          | _        || _        t          | j                            d                    | _        t          | j                            dt                                          | _	        t          | j        d                   | _
        d| j        v r t          | j        d                   | _        n| j
        | _        t          | j                            dd                    dz   dz  | _        | j                            d          | _        | j                            d	          | _        d
| j        v rt          | j        d
                   }ng }t!          |t"                    s|g}|| _        dS )zInitialize a page object.

        doc: a PDFDocument object.
        pageid: any Python object that can uniquely identify the page.
        attrs: a dictionary of page attributes.
        label: page label string.
        LastModified	ResourcesMediaBoxCropBoxRotater   ih  AnnotsBContentsN)r   r   r   r   r   r   getlastmoddict	resourcesmediaboxcropboxr   rotateannotsbeads
isinstancelistcontents)selfr   r   r   r   r4   s         L/var/www/html/ai-engine/env/lib/python3.11/site-packages/pdfminer/pdfpage.py__init__zPDFPage.__init__-   sR    &&


~ > >??/7JNN;//0
 0
 'tz*'=>>
""!)$*Y*?!@!@DLL=DL !!<!<==CsJjnnX..Z^^C((
##
: 677HHH(D)) 	" zH&.    c                 B    d                     | j        | j                  S )Nz(<PDFPage: Resources={!r}, MediaBox={!r}>)formatr,   r-   )r5   s    r6   __repr__zPDFPage.__repr__O   s#    9@@NDM
 
 	
r8   >   r%   r$   r#   r"   documentc              #   .   	K   dt           dt          t          t           f         dt          t          t
          t          t           t          t           t           f         f         f                  f 	fd		                                 }n$# t          $ r t          j	        d           }Y nw xY wd}dj
        v rC 	j
        d         j
                  }|D ]$\  }}  ||t          |                    V  d}%|sj        D ]}|                                D ]v}	                     |          }t          |t                     r9|                    d          t$          u r  ||t          |                    V  g# t&          $ r Y sw xY wd S )	Nobjparentr   c              3     K   t          | t                    r7| }t          	                    |                                                    }n(| j        }t          |                                           }|                                D ]\  }}|j        v r	||vr|||<   |                    d          }|!t          j
        s|                    d          }|t          u rQd|v rMt                              d|d                    t          |d                   D ]} 
||          E d {V  d S |t          u r#t                              d|           ||fV  d S d S )NTypetypeKidszPages: Kids=%rzPage: %r)r2   intr   getobjcopyobjiditemsINHERITABLE_ATTRSr)   r   STRICTLITERAL_PAGESlogdebugr   LITERAL_PAGE)r>   r?   rG   treekv	tree_typecclsr<   searchs           r6   rU   z$PDFPage.create_pages.<locals>.searchX   s      #s## .!(//%"8"899>>@@ 	!#++-- ,,..    A---!4--DG((I   HHV,,	M))fnn		*DL999#DL11 / /A%va......../ /l**		*d+++dm##### +*r8   Fr   TrA   )objectr   strr   r	   rD   get_page_labelsr   	itertoolsrepeatcatalognextxrefs
get_objidsrE   r2   r+   r)   rN   r   )
rT   r<   page_labelspagesobjectsrG   rO   xrefr>   rU   s
   ``       @r6   create_pageszPDFPage.create_pagesV   s     	$	$!%c6k!2	$eCfd66>.B&B!CCDE	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$6	13;3K3K3M3MKK 	1 	1 	1#*400KKK	1 h&&&fX-g68HIIG!(  c(E4k1B1BCCCCC 		   !__..  E&ooe44%c400 OSWWV__5T5T"%#hsD<M<M"N"NNNN,    	s%   7B B-,B- A#F
FFr    TFfppagenosmaxpagespasswordcachingcheck_extractablec              #   D  K   t          |          }t          |||          }|j        s5|rd|z  }	t          |	          d|z  }
t                              |
           t          |                     |                    D ]\  }}|r||vr|V  |r||dz   k    r nd S )N)rh   ri   z"Text extraction is not allowed: %rzThe PDF %r contains a metadata field indicating that it should not allow text extraction. Ignoring this field and proceeding. Use the check_extractable if you want to raise an error in this caser   )r   r   is_extractabler   rL   warning	enumeraterc   )rT   re   rf   rg   rh   ri   rj   parserr   	error_msgwarning_msgpagenopages                r6   	get_pageszPDFPage.get_pages   s       2&8WEEE ! 	)  )@2E	1)<<<A DF	F  K((('(8(8(=(=>> 	 	NVT F'11JJJ H
22r8   )Nr   rd   TF)__name__
__module____qualname____doc__r   rV   r   rW   r7   r;   rI   classmethodr   rc   r   r   rD   boolrt    r8   r6   r   r      sC        * / /(. /7= /FNsm /	 /  /  /  /D
# 
 
 
 

 GFF1K 1HY4G 1 1 1 [1f  -1"'# ## )C.)# 	#
 # #  # 
)	# # # [# # #r8   r   )"rY   loggingtypingr   r   r   r   r   r   r	   pdfminer.utilsr
   rd   r   pdfdocumentr   r   r   	pdfparserr   pdftypesr   r   r   r   r   psparserr   	getLoggerru   rL   rN   rK   r   r{   r8   r6   <module>r      sz        M M M M M M M M M M M M M M M M M M             R R R R R R R R R R             ' ' ' ' ' '                                          g!! s6{{GW W W W W W W W W Wr8   