
    Ngl              #          d Z ddlZddlZddlmZ ddlmZmZmZm	Z	m
Z
mZ ddlmZmZmZmZmZ ddlmZ ddlmZmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlm Z m!Z!m"Z" 	 	 	 	 	 	 	 	 	 	 	 	 	 d*dede"de#de#de
e         de$de
ee$                  de#de%de$de#de
e#         de&d e&d!e&d"ed#df"d$Z'	 	 	 	 	 	 d+d&e!de#de
ee$                  de$d'e&de#de
e         d#e#fd(Z(	 	 	 	 	 d,d&e!de#de
ee$                  de$d'e&de
e         d#e	e         fd)Z)dS )-zIFunctions that can be used for the most common use-cases for pdfminer.six    N)StringIO)AnyBinaryIO	ContainerIteratorOptionalcast   )XMLConverterHTMLConverterTextConverterPDFPageAggregatorHOCRConverter)ImageWriter)LAParamsLTPage)	PDFDeviceTagExtractor)PDFResourceManagerPDFPageInterpreter)PDFPage)open_filename
FileOrNameAnyIOtextutf-8       ?normalFinfoutfpoutput_typecodeclaparamsmaxpagespage_numberspasswordscalerotation
layoutmode
output_dirstrip_controldebugdisable_cachingkwargsreturnc           	      :   |r0t          j                                        t           j                   d}|rt	          |          }t          |           }d}|dk    r!|t          j        k    rt          j        j        }|dk    rt          |||||          }n|dk    rt          ||||||          }nx|dk    rt          |||||
||          }n[|d	k    rt          |||||
          }n@|dk    r&t          |t          t          |          |          }nd| }t!          |          |J t#          ||          }t%          j        | ||||           D ])}|j        |	z   dz  |_        |                    |           *|                                 dS )ak  Parses text from inf-file and writes to outfp file-like object.

    Takes loads of optional arguments but the defaults are somewhat sane.
    Beware laparams: Including an empty LAParams is not the same as passing
    None!

    :param inf: a file-like object to read PDF structure from, such as a
        file handler (using the builtin `open()` function) or a `BytesIO`.
    :param outfp: a file-like object to write the text to.
    :param output_type: May be 'text', 'xml', 'html', 'hocr', 'tag'.
        Only 'text' works properly.
    :param codec: Text decoding codec
    :param laparams: An LAParams object from pdfminer.layout. Default is None
        but may not layout correctly.
    :param maxpages: How many pages to stop parsing after
    :param page_numbers: zero-indexed page numbers to operate on.
    :param password: For encrypted PDFs, the password to decrypt.
    :param scale: Scale factor
    :param rotation: Rotation factor
    :param layoutmode: Default is 'normal', see
        pdfminer.converter.HTMLConverter
    :param output_dir: If given, creates an ImageWriter for extracted images.
    :param strip_control: Does what it says on the tin
    :param debug: Output more logging data
    :param disable_caching: Does what it says on the tin
    :param other:
    :return: nothing, acting as it does on two streams. Use StringIO to get
        strings.
    Ncachingr   )r#   r$   imagewriterxml)r#   r$   r4   stripcontrolhtml)r#   r(   r*   r$   r4   hocr)r#   r$   r6   tag)r#   z1Output type can be text, html, xml or tag but is r%   r'   r3   ih  )logging	getLoggersetLevelDEBUGr   r   sysstdoutbufferr   r   r   r   r   r	   r   
ValueErrorr   r   	get_pagesrotateprocess_pageclose)r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r4   rsrcmgrdevicemsginterpreterpages                         O/var/www/html/ai-engine/env/lib/python3.11/site-packages/pdfminer/high_level.pyextract_text_to_fprM      s   ^  4$$W]333K .!*-- _)<===G"&Ff#*!4!4
!fU%(
 
 
 
		#&
 
 
 
		!#
 
 
 
		U%(
 
 
 
		gtHe'<'<EJJJ TkSSoo$Wf55K!##   ' ' {X-4  &&&&
LLNNNNN    Tpdf_filer3   c           	         |t                      }t          | d          5 }t                      5 }t          t          |          }t          |          }	t          |	|||          }
t          |	|
          }t          j	        |||||          D ]}|
                    |           |                                cddd           cddd           S # 1 swxY w Y   ddd           dS # 1 swxY w Y   dS )aw  Parse and return the text contained in a PDF file.

    :param pdf_file: Either a file path or a file-like object for the PDF file
        to be worked on.
    :param password: For encrypted PDFs, the password to decrypt.
    :param page_numbers: List of zero-indexed page numbers to extract.
    :param maxpages: The maximum number of pages to parse
    :param caching: If resources should be cached
    :param codec: Text decoding codec
    :param laparams: An LAParams object from pdfminer.layout. If None, uses
        some default settings that often work well.
    :return: a string containing all of the text extracted.
    Nrbr2   )r#   r$   r:   )r   r   r   r	   r   r   r   r   r   rC   rE   getvalue)rO   r'   r&   r%   r3   r#   r$   fpoutput_stringrG   rH   rJ   rK   s                rL   extract_textrU      s   , ::	x	&	& ("hjj (M(B$W555wUXVVV(&99%
 
 
 	+ 	+D $$T****%%''( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( (s5   C.BC=C.C	C.C	C..C25C2c              #     K   |t                      }t          | d          5 }t          t          |          }t	          |          }t          ||          }t          ||          }	t          j        |||||          D ]/}
|		                    |
           |
                                }|V  0	 ddd           dS # 1 swxY w Y   dS )a  Extract and yield LTPage objects

    :param pdf_file: Either a file path or a file-like object for the PDF file
        to be worked on.
    :param password: For encrypted PDFs, the password to decrypt.
    :param page_numbers: List of zero-indexed page numbers to extract.
    :param maxpages: The maximum number of pages to parse
    :param caching: If resources should be cached
    :param laparams: An LAParams object from pdfminer.layout. If None, uses
        some default settings that often work well.
    :return: LTPage objects
    NrQ   r2   )r$   r:   )r   r   r	   r   r   r   r   r   rC   rE   
get_result)rO   r'   r&   r%   r3   r$   rS   resource_managerrH   rJ   rK   layouts               rL   extract_pagesrZ      s.     ( ::	x	&	& 
"(B-g>>>"#3hGGG()96BB%x(G
 
 
 	 	D $$T***&&((FLLLL	
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
s   BCCC)r   r   Nr   Nr   r   r   r   NFFF)r   Nr   Tr   N)r   Nr   TN)*__doc__r;   r?   ior   typingr   r   r   r   r   r	   	converterr   r   r   r   r   imager   rY   r   r   	pdfdevicer   r   	pdfinterpr   r   pdfpager   utilsr   r   r   strintfloatboolrM   rU   rZ    rN   rL   <module>ri      sS   O O  



       E E E E E E E E E E E E E E E E                    $ $ $ $ $ $ $ $ . . . . . . . . = = = = = = = =       3 3 3 3 3 3 3 3 3 3 #'-1 $!o o	oo o 	o
 x o o 9S>*o o o o o o o o o  !o" 
#o o o oh -1#'(( (((((( 9S>*(( 	((
 (( (( x (( 	(( (( (( ((Z -1#'! !!! 9S>*! 	!
 ! x ! f! ! ! ! ! !rN   