
    Ng                         d dl mZmZ d dlmZ d dlmZ d dlmZ dde	de
fdZ	 	 	 	 	 dde	de
d
e
de
de
deee	                  fdZd Z	 	 	 dd
e
de
de
fdZd	S )    )ListOptional)logger)PartitionStrategy)dependency_existsFstrategyis_imagec                     t           j        t           j        t           j        t           j        g}| |vrt          |  d          | t           j        k    r|rt          d          dS dS )z?Determines if the strategy is valid for the specified filetype.z is not a valid strategy.z3The fast strategy is not available for image files.N)r   AUTOFASTOCR_ONLYHI_RES
ValueError)r   r	   valid_strategiess      ]/var/www/html/ai-engine/env/lib/python3.11/site-packages/unstructured/partition/strategies.pyvalidate_strategyr      sz     	" 	 '''H???@@@$)))h)NOOO *)))    Npdf_text_extractableinfer_table_structureextract_images_in_pdfextract_image_block_typesc                    t          d          }t          d          }| t          j        k    r4|pt          |          }|rt	                      } nt          |||          } t          | | | g          rt          d          | t          j        k    rX|sVt          j
        d           |r t          j
        d           t          j        S t          j
        d           t          j        S | t          j        k    rX|sVt          j
        d           |r t          j
        d           t          j        S t          j
        d	           t          j        S | S )
zDetermines what strategy to use for processing PDFs or images, accounting for fallback
    logic if some dependencies are not available.unstructured_pytesseractunstructured_inferencer   r   extract_elementzunstructured_inference is not installed, pytesseract is not installed and the text of the PDF is not extractable. To process this file, install unstructured_inference, install pytesseract, or remove copy protection from the PDF.zunstructured_inference is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with another strategy.z+Falling back to partitioning with ocr_only.z'Falling back to partitioning with fast.zpytesseract is not installed. Cannot use the ocr_only partitioning strategy. Falling back to partitioning with another strategy.z)Falling back to partitioning with hi_res.)r   r   r   bool_determine_image_auto_strategy_determine_pdf_auto_strategyallr   r   r   warningr   r   )	r   r	   r   r   r   r   pytesseract_installed unstructured_inference_installedr   s	            r   determine_pdf_or_image_strategyr$      s    ..HII'89Q'R'R$$)))/R48Q3R3R 	577HH3%9&; /  H -	-3H/HNbJbc  
 6
 
 	
 $+++4T+L	
 	
 	
 ! 	*NHIII$--NDEEE$))	&/	/	/8M	/L	
 	
 	
   	,NDEEE$))NFGGG$++Or   c                      t           j        S )zWIf "auto" is passed in as the strategy, determines what strategy to use
    for images.)r   r    r   r   r   r   W   s     ##r   r   c                 V    |s|rt           j        S | rt           j        S t           j        S )zUIf "auto" is passed in as the strategy, determines what strategy to use
    for PDFs.)r   r   r   r   r   s      r   r   r   ^   s6      ( ( '' * %% ))r   )F)FFFFN)FFF)typingr   r   unstructured.loggerr   &unstructured.partition.utils.constantsr   unstructured.utilsr   strr   r   r$   r   r   r&   r   r   <module>r-      sD   ! ! ! ! ! ! ! ! & & & & & & D D D D D D 0 0 0 0 0 0P P Pt P P P P$ !&"'"'59< <<< <  	<
  <  (S	2< < < <~$ $ $ "'"'!* *** * * * * * *r   