
    NgD                     V   d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZmZmZmZ d dlmZmZmZmZmZmZ dedee         fd	Zd
efdZdddddddddded
ededee         dee         dededededee         defdZ	 	 	 	 	 ddededee         dededee         defdZdS )    N)BytesIO)Path)Optional)AnalysisDrawerFinalLayoutDrawerLayoutDrawerOCRLayoutDrawerODModelLayoutDrawerPdfminerLayoutDrawer)ExtractedLayoutDumperFinalLayoutDumperJsonLayoutDumperLayoutDumperObjectDetectionLayoutDumperOCRLayoutDumperdumperreturnc                    t          | t                    r"t          |                                           S t          | t                    r"t          |                                           S t          | t                    r"t          |                                           S t          | t                    r"t          |                                           S t          d|            )zFor a given layout dumper, return the corresponding layout drawer instance initialized with
    a dumped layout dict.

    Args:
        dumper: The layout dumper instance

    Returns:
        LayoutDrawer: The corresponding layout drawer instance
    layout_dumpzUnknown dumper type: )
isinstancer   r
   dumpr   r   r   r	   r   r   
ValueError)r   s    k/var/www/html/ai-engine/env/lib/python3.11/site-packages/unstructured/partition/pdf_image/analysis/tools.py_get_drawer_for_dumperr      s     &566 	;"v{{}}====	F1	2	2 ;#>>>>	FO	,	, ;6;;==9999	F-	.	. ; V[[]];;;;999:::    is_imagec                 ^    t          j                    j        dd         }| rd| dS d| dS )ziGenerate a filename for the analysis artifacts based on the file type.
    Adds a random uuid suffix
    N   image_z.pngpdf_z.pdf)uuiduuid4hex)r   suffixs     r   _generate_filenamer&   /   sE     Z\\bqb!F %$$$$$&r   FTpng)filenamefileskip_bboxesskip_dump_od	draw_griddraw_captionresizeformatlayout_dumpersanalyzed_image_output_dir_pathr(   r)   r*   r+   r,   r-   r.   r/   c        
   
         |st          |           }|s|rdS t          |          }|                    dd           |s?t          ||          }|
D ]}|                    |           |                                 |sVt          ||| |||||	          }|
D ]&}t          |          }|                    |           '|                                 dS dS )a  Save the analysis artifacts for a given file. Loads some settings from
    the environment configuration.

    Args:
        layout_dumpers: The layout dumpers to save and use for bboxes rendering
        is_image: Flag for the file type (pdf/image)
        analyzed_image_output_dir_path: The directory to save the analysis artifacts
        filename: The filename of the sources analyzed file (pdf/image).
            Only one of filename or file should be provided.
        file: The file object for the analyzed file.
            Only one of filename or file should be provided.
        draw_grid: Flag for drawing the analysis bboxes on a single image (as grid)
        draw_caption: Flag for drawing the caption above the analyzed page (for e.g. layout source)
        resize: Output image resize value. If not provided, the image will not be resized.
        format: The format for analyzed pages with bboxes drawn on them. Default is 'png'.
    NTparentsexist_ok)r(   save_dir)r(   r)   r   r6   r,   r-   r.   r/   )	r&   r   mkdirr   add_layout_dumperprocessr   r   
add_drawer)r   r1   r(   r)   r*   r+   r,   r-   r.   r/   r0   output_pathjson_layout_dumperlayout_dumperanalysis_drawerdrawers                   r   save_analysis_artifiactsr@   9   s>   :  0%h// l 566KdT222 %- 
 
 
 , 	@ 	@M00????""$$$ "( %	
 	
 	
 , 	/ 	/M+M::F&&v....!!!!!" "r   renders_output_dir_pathc           	      2   t          |           j        }t          |           j                            d           }t          |          dz  |z  dz  }	|	                                sdS g }
|	                                D ]}|                                st          |          5 }t          j	        |          }ddd           n# 1 swxY w Y   |j        dk    r#|

                    t          |                     |j        dk    r#|

                    t          |                     |j        dk    r#|

                    t          |                     |j        d	k    r#|

                    t          |                     |
r|st          |          dz  |z  d
z  }nt          |          }|                    dd           t!          | ||||||          }|
D ]}|                    |           |                                 dS dS )a  Render the bounding boxes for a given layout dimp file.
    To be used for analysis after the partition is performed for
    only dumping the layouts - the bboxes can be rendered later.

    Expects that the analyzed_image_output_dir_path keeps the structure
    that was created by the save_analysis_artifacts function.

    Args:
        filename: The filename of the sources analyzed file (pdf/image)
        analyzed_image_output_dir_path: The directory where the analysis artifacts
          (layout dumps) are saved. It should be the root directory of the structure
          created by the save_analysis_artifacts function.
        renders_output_dir_path: Optional directory to save the rendered bboxes -
          if not provided, it will be saved in the analysis directory.
        draw_grid: Flag for drawing the analysis bboxes on a single image (as grid)
        draw_caption: Flag for drawing the caption above the analyzed page (for e.g. layout source)
        resize: Output image resize value. If not provided, the image will not be resized.
        format: The format for analyzed pages with bboxes drawn on them. Default is 'png'.
    pdfanalysisr   Nfinalr   object_detectionocrpdfminerbboxesTr3   )r(   r6   r   r,   r-   r.   r/   )r   stemr%   endswithexistsiterdiris_fileopenjsonloadappendr   r
   r	   r   r7   r   r:   r9   )r(   r1   rA   r,   r-   r.   r/   filename_stemr   analysis_dumps_dirlayout_drawersanalysis_dump_filenamefr   r;   r>   r?   s                    r   render_bboxes_for_filerX   x   s   8 NN'M>>(11%888H+,,z9MIMY  $$&& N"4"<"<">"> Q Q%--// 	()) 	'Q)A,,K	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	'!&'11!!"3"L"L"LMMM!&*<<<!!"5+"N"N"NOOO!&%//!!/k"J"J"JKKK!&*44!!"6;"O"O"OPPP "& 	8344zAMQT\\ K 677K$666( %
 
 
 % 	/ 	/F&&v....!!!!!)" "s   (C		C	C	)NFTNr'   )rP   r"   ior   pathlibr   typingr   <unstructured.partition.pdf_image.analysis.bbox_visualisationr   r   r   r	   r
   r   5unstructured.partition.pdf_image.analysis.layout_dumpr   r   r   r   r   r   r   boolr&   strfloatr@   rX    r   r   <module>rb      sk                                                    ;< ;H\4J ; ; ; ;,     #""<" <" <"!<"<" %(<" sm	<"
 7
<" <" <" <" <" UO<" <" <" <" <"D .2"F" F"F"$'F" &c]F" 	F"
 F" UOF" F" F" F" F" F" F"r   