
    Ng!                         d Z ddlZddlZddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ  ed	          d
edefd            Ze G d d                      Z e            ZdS )a  
This module contains variables that can permitted to be tweaked by the system environment. For
example, model parameters that changes the output of an inference call. Constants do NOT belong in
this module. Constants are values that are usually names for common options (e.g., color names) or
settings that should not be altered without making a code change (e.g., definition of 1Gb of memory
in bytes). Constants should go into `./constants.py`
    N)	dataclass)	lru_cache)Path)Optional)OCR_AGENT_TESSERACT   )maxsizedirreturnc                 n    t          |           dt          j        d           z  }t          |          S )Nztmp/r   )r   osgetpgidstr)r
   tempdirs     _/var/www/html/ai-engine/env/lib/python3.11/site-packages/unstructured/partition/utils/config.pyget_tempdirr      s/    3ii0A000Gw<<    c                   B   e Zd ZdZd Zd'dededefdZdededefdZdede	de	fd	Z
dededefd
ZdeddfdZedefd            Zedefd            Zede	fd            Zedefd            Zedefd            Zedefd            Zedefd            Zedefd            Zedefd            Zedefd            Zedefd            Zede	fd            Zede	fd            Zede	fd            Zede	fd            Zede	fd            Zedefd            Zedefd            Z edefd             Z!edefd!            Z"edefd"            Z#edefd#            Z$edefd$            Z%ede&e	         fd%            Z'edefd&            Z(dS )(	ENVConfigz+class for configuring enviorment parametersc                 L    | j         r|                     | j                   d S d S N)GLOBAL_WORKING_DIR_ENABLED_setup_tmpdirGLOBAL_WORKING_PROCESS_DIRselfs    r   __post_init__zENVConfig.__post_init__   s7    * 	@t>?????	@ 	@r    vardefault_valuer   c                 B    t           j                            ||          S )zhattempt to get the value of var from the os environment; if not present return the
        default_value)r   environget)r   r   r    s      r   _get_stringzENVConfig._get_string!   s     z~~c=111r   c                 R    |                      |          x}rt          |          S |S r   )r$   intr   r   r    values       r   _get_intzENVConfig._get_int&   s/    $$S)))5 	u::r   c                 R    |                      |          x}rt          |          S |S r   )r$   floatr'   s       r   
_get_floatzENVConfig._get_float+   s/    $$S)))5 	 <<r   c                 `    |                      |          x}r|                                dv S |S )N)true1t)r$   lowerr'   s       r   	_get_boolzENVConfig._get_bool0   s7    $$S)))5 	7;;==$666r   tmpdirNc                 f    t          |                              dd           |t          _        d S )NT)parentsexist_ok)r   mkdirtempfiler   )r   r3   s     r   r   zENVConfig._setup_tmpdir5   s/    V4$777!r   c                 .    |                      dd          S )zRextra image content to add around an identified element region; measured in pixelsIMAGE_CROP_PADr   r)   r   s    r   r:   zENVConfig.IMAGE_CROP_PAD9   s     }}-q111r   c                 .    |                      dd          S )zextra image content to add around an identified table region; measured in pixels

        The padding adds image data around an identified table bounding box for downstream table
        structure detection model use as input
        TABLE_IMAGE_CROP_PADr   r;   r   s    r   r=   zENVConfig.TABLE_IMAGE_CROP_PAD>   s     }}3Q777r   c                 .    |                      dd          S )z%the quantile to check for text heightTESSERACT_TEXT_HEIGHT_QUANTILE      ?r,   r   s    r   r?   z(ENVConfig.TESSERACT_TEXT_HEIGHT_QUANTILEG   s     ?EEEr   c                 .    |                      dd          S )zminimum text height acceptable from tesseract OCR results

        if estimated text height from tesseract OCR results is lower than this value the image is
        scaled up to be processed again
        TESSERACT_MIN_TEXT_HEIGHT   r;   r   s    r   rC   z#ENVConfig.TESSERACT_MIN_TEXT_HEIGHTL   s     }}8"===r   c                 .    |                      dd          S )zmaximum text height acceptable from tesseract OCR results

        if estimated text height from tesseract OCR results is higher than this value the image is
        scaled down to be processed again
        TESSERACT_MAX_TEXT_HEIGHTd   r;   r   s    r   rF   z#ENVConfig.TESSERACT_MAX_TEXT_HEIGHTU   s     }}8#>>>r   c                 .    |                      dd          S )z%optimum text height for tesseract OCRTESSERACT_OPTIMUM_TEXT_HEIGHT   r;   r   s    r   rI   z'ENVConfig.TESSERACT_OPTIMUM_TEXT_HEIGHT^   s     }}<bAAAr   c                 .    |                      dd          S )z%API endpoint to use for Google VisionGOOGLEVISION_API_ENDPOINTr   r$   r   s    r   rL   z#ENVConfig.GOOGLEVISION_API_ENDPOINTc   s      ;R@@@r   c                 8    |                      dt                    S )zOCR Agent to use	OCR_AGENT)r$   r   r   s    r   rO   zENVConfig.OCR_AGENTh   s     -@AAAr   c                 .    |                      dd          S )zextra image block content to add around an identified element(`Image`, `Table`) region
        horizontally; measured in pixels
        'EXTRACT_IMAGE_BLOCK_CROP_HORIZONTAL_PADr   r;   r   s    r   rQ   z1ENVConfig.EXTRACT_IMAGE_BLOCK_CROP_HORIZONTAL_PADm   s    
 }}FJJJr   c                 .    |                      dd          S )zextra image block content to add around an identified element(`Image`, `Table`) region
        vertically; measured in pixels
        %EXTRACT_IMAGE_BLOCK_CROP_VERTICAL_PADr   r;   r   s    r   rS   z/ENVConfig.EXTRACT_IMAGE_BLOCK_CROP_VERTICAL_PADt   s    
 }}DaHHHr   c                 .    |                      dd          S )zCadds `table_as_cells` to a Table element's metadata when it is TrueEXTRACT_TABLE_AS_CELLSFr2   r   s    r   rU   z ENVConfig.EXTRACT_TABLE_AS_CELLS{   s     ~~6>>>r   c                 .    |                      dd          S )a9  threshold to determine if an OCR region is a sub-region of a given block
        when aggregating the text from OCR'd elements that lie within the given block

        When the intersection region area divided by self area is larger than this threshold self is
        considered a subregion of the other
        OCR_LAYOUT_SUBREGION_THRESHOLDr@   rA   r   s    r   rX   z(ENVConfig.OCR_LAYOUT_SUBREGION_THRESHOLD   s     ?EEEr   c                 .    |                      dd          S )Rthreshold to consider the bounding boxes of two embedded images as the same region$EMBEDDED_IMAGE_SAME_REGION_THRESHOLDg333333?rA   r   s    r   r[   z.ENVConfig.EMBEDDED_IMAGE_SAME_REGION_THRESHOLD   s     EsKKKr   c                 .    |                      dd          S )aA  threshold to determine if an embedded region is a sub-region of a given block
        when aggregating the text from embedded elements that lie within the given block

        When the intersection region area divided by self area is larger than this threshold self is
        considered a subregion of the other
        -EMBEDDED_TEXT_AGGREGATION_SUBREGION_THRESHOLDgGz?rA   r   s    r   r]   z7ENVConfig.EMBEDDED_TEXT_AGGREGATION_SUBREGION_THRESHOLD   s     NPTUUUr   c                 .    |                      dd          S )rZ   #EMBEDDED_TEXT_SAME_REGION_THRESHOLD?rA   r   s    r   r_   z-ENVConfig.EMBEDDED_TEXT_SAME_REGION_THRESHOLD   s     DcJJJr   c                 .    |                      dd          S )zThe threshold value (between 0.0 and 1.0) that determines the minimum overlap required
        for an annotation to be considered within the element.
        PDF_ANNOTATION_THRESHOLDr`   rA   r   s    r   rb   z"ENVConfig.PDF_ANNOTATION_THRESHOLD   s     93???r   c                 .    |                      dd          S )zBEnable usage of GLOBAL_WORKING_DIR and GLOBAL_WORKING_PROCESS_DIR.r   FrV   r   s    r   r   z$ENVConfig.GLOBAL_WORKING_DIR_ENABLED   s     ~~:EBBBr   c                 p    |                      dt          t          j                    dz                      S )z%Path to Unstructured cache directory.GLOBAL_WORKING_DIRz.cache/unstructured)r$   r   r   homer   s    r   re   zENVConfig.GLOBAL_WORKING_DIR   s/      4c$)++H]:]6^6^___r   c                     t          | j                  }|                     d|          }|dk    r|}| j        r|                     |           |S )zPath to Unstructured cache tempdir. Overrides TMPDIR, TEMP and TMP.
        Defaults to '{GLOBAL_WORKING_DIR}/tmp/{os.getpgid(0)}'.
        )r
   r   r   )r   re   r$   r   r   )r   default_tmpdirr3   s      r   r   z$ENVConfig.GLOBAL_WORKING_PROCESS_DIR   s_    
 %)@AAA!!">OOR<<#F* 	'v&&&r   c                 .    |                      dd          S )z)Analysis dump object detection skip flag.ANALYSIS_DUMP_OD_SKIPFrV   r   s    r   rj   zENVConfig.ANALYSIS_DUMP_OD_SKIP   s     ~~5u===r   c                 .    |                      dd          S )z(Analysis draw bboxes on pages skip flag.ANALYSIS_BBOX_SKIPFrV   r   s    r   rl   zENVConfig.ANALYSIS_BBOX_SKIP   s     ~~2E:::r   c                 .    |                      dd          S )z@Flag for drawing the analysis bboxes on a single image (as grid)ANALYSIS_BBOX_DRAW_GRIDFrV   r   s    r   rn   z!ENVConfig.ANALYSIS_BBOX_DRAW_GRID   s     ~~7???r   c                 .    |                      dd          S )zMFlag for drawing the caption above the analysed page (for e.g. layout source)ANALYSIS_BBOX_DRAW_CAPTIONTrV   r   s    r   rp   z$ENVConfig.ANALYSIS_BBOX_DRAW_CAPTION   s     ~~:DAAAr   c                 B    |                      dd          }|dk    rdS |S )zAnalaysis bbox resize valueANALYSIS_BBOX_RESIZEg      NrA   )r   resizes     r   rr   zENVConfig.ANALYSIS_BBOX_RESIZE   s+     !7>>T>>4r   c                 .    |                      dd          S )zJThe format for analysed pages with bboxes drawn on them. Default is 'png'.ANALYSIS_BBOX_FORMATpngrM   r   s    r   ru   zENVConfig.ANALYSIS_BBOX_FORMAT   s      6>>>r   )r   ))__name__
__module____qualname____doc__r   r   r$   r&   r)   r+   r,   boolr2   r   propertyr:   r=   r?   rC   rF   rI   rL   rO   rQ   rS   rU   rX   r[   r]   r_   rb   r   re   r   rj   rl   rn   rp   r   rr   ru    r   r   r   r      s       55@ @ @2 2s 23 2 2 2 2 2
C      
c % E    
S  $    
"C "D " " " " 2 2 2 2 X2 8c 8 8 8 X8 F F F F XF >3 > > > X> ?3 ? ? ? X? Bs B B B XB A3 A A A XA B3 B B B XB K K K K XK Is I I I XI ? ? ? ? X? F F F F XF Le L L L XL Vu V V V XV KU K K K XK @% @ @ @ X@ CD C C C XC `C ` ` ` X` 
C 
 
 
 X
 >t > > > X> ;D ; ; ; X; @ @ @ @ X@ BD B B B XB huo    X ?c ? ? ? X? ? ?r   r   )rz   r   r8   dataclassesr   	functoolsr   pathlibr   typingr   &unstructured.partition.utils.constantsr   r   r   r   
env_configr}   r   r   <module>r      s    
			  ! ! ! ! ! !                   F F F F F F 1S S    
 B? B? B? B? B? B? B? B?J Y[[


r   