
    NgY
                    ~    d dl mZ d dlZd dlZd dlmZ d dlmZ d dlm	Z	  e	dg          dd            Z
	 dddZdS )    )annotationsN)IO)exactly_one)requires_dependenciespypandocfilenamestrsource_formattarget_formatreturnc                L   ddl }	 |                    | ||          }n# t          $ r}d| }t          |          d}~wt          $ rW}|                                \  }}|dk    r||vrd}	nd}	| d|	 d	|                                 d
}t          |          d}~ww xY w|S )z@Uses pandoc to convert the source document to a raw text string.r   N)formatzError converting the file to text. Ensure you have the pandoc package installed on your system. Installation instructions are available at https://pandoc.org/installing.html. The original exception text was:
rtfzSupport for RTF files is not available in the current pandoc installation. It was introduced in pandoc 2.14.2.
Reference: https://pandoc.org/releases.html#pandoc-2.14.2-2021-08-21 z

z

Current version of pandoc: z
Make sure you have the right version installed in your system. Please follow the pandoc installation instructions in README.md to install the right version.)r   convert_fileFileNotFoundErrorRuntimeErrorget_pandoc_formatsget_pandoc_version)
r   r
   r   r   texterrmsgsupported_source_formats_additional_infos
             c/var/www/html/ai-engine/env/lib/python3.11/site-packages/unstructured/file_utils/file_conversion.pyconvert_file_to_textr      s*    OOO $$X}]$SS % % %\VY\ \ 	
  $$$      &.&A&A&C&C# !E!!m;S&S&SW O !O  [ [ [ [*2*E*E*G*G[ [ [ 	 3% ( Ks    
B!=B!
ABB!
str | NonefileIO[bytes] | Nonec                   t          ||           |t          j                    5 }t          j                            |d|            }t          |d          5 }|                    |                                           ddd           n# 1 swxY w Y   t          || d          cddd           S # 1 swxY w Y   |J t          || d          S )znConverts a document to HTML raw text.

    Enables the doucment to be processed using `partition_html()`.
    )r   r   Nz	tmp_file.wbhtml)r   r
   r   )
r   tempfileTemporaryDirectoryospathjoinopenwritereadr   )r
   r   r   temp_dir_pathtmp_file_pathtmp_files         r   &convert_file_to_html_text_using_pandocr/   0   sc    ----(** 	mGLL8SM8S8STTMmT** ,htyy{{+++, , , , , , , , , , , , , , ,'&mSY  		 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 f   s5   4B8(BB8B	B8B	B88B<?B<)r   r	   r
   r	   r   r	   r   r	   )NN)r
   r	   r   r   r   r    r   r	   )
__future__r   r&   r$   typingr   $unstructured.partition.common.commonr   unstructured.utilsr   r   r/        r   <module>r6      s    " " " " " " 				        < < < < < < 4 4 4 4 4 4 
|$$! ! ! %$!J OS      r5   