
    NgD                        d dl mZmZmZmZ d dlZd dlmZ dZg dZ	de
de
fdZde
defd	Z	 	 dde
deeeee         f                  dee
e
f         fdZ	 	 	 dde
deeeee         f                  dee
         dee
e
f         fdZdS )    )IOOptionalTupleUnionN)convert_to_bytesg?)utf_8
iso_8859_1
iso_8859_6
iso_8859_8asciibig5utf_16	utf_16_be	utf_16_leutf_32	utf_32_be	utf_32_leeuc_jis_2004euc_jisx0213euc_jpeuc_krgb18030	shift_jisshift_jis_2004shift_jisx0213encodingreturnc                 z    |                                                      dd          }g d}||v r
|dd         }|S )zFormat input encoding string (e.g., `utf-8`, `iso-8859-1`, etc).
    Parameters
    ----------
    encoding
        The encoding string to be formatted (e.g., `UTF-8`, `utf_8`, `ISO-8859-1`, `iso_8859_1`,
        etc).
    _-)ziso-8859-6-iziso-8859-6-eziso-8859-8-iziso-8859-8-eN)lowerreplace)r   formatted_encodingannotated_encodingss      \/var/www/html/ai-engine/env/lib/python3.11/site-packages/unstructured/file_utils/encoding.pyformat_encoding_strr'   "   sT     "))11#s;; [ZZ000/4    c                 `    t           D ]%}t          |          t          |           k    r dS &dS )zChecks if an encoding string is valid. Helps to avoid errors in cases where
    invalid encodings are extracted from malformed documents.TF)COMMON_ENCODINGSr'   )r   common_encodings     r&   validate_encodingr,   4   sA     ,  //3Fx3P3PPP44 Q5r(    filenamefilec                    | r=t          | d          5 }|                                }d d d            n# 1 swxY w Y   n!|rt          |          }nt          d          t	          j        |          }|d         }|d         }||t          k     rt          D ]r}	 | r>t          | |          5 }|                                }d d d            n# 1 swxY w Y   n|                    |          }|} n7# t          t          f$ r Y ow xY wt          d|dt          |          d          n|                    |          }t          |          }	|	|fS )	Nrbz#No filename nor file were specifiedr   
confidencer   z]Unable to determine the encoding of the file or match it with any of the specified encodings.r   zInvalid encoding)openreadr   FileNotFoundErrorchardetdetectENCODE_REC_THRESHOLDr*   decodeUnicodeDecodeErrorUnicodeErrorlenr'   )
r.   r/   f	byte_dataresultr   r2   enc	file_textr$   s
             r&   detect_file_encodingrC   =   s	     G(D!! 	!QI	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	!	 G$T**		 EFFF^I&&Fj!H%J:(<<<# 	 	C	 6h555 -$%FFHH	- - - - - - - - - - - - - - - !* 0 0 5 5I&5    %.I"  	  $$X..	,X66y((sB   488C5/CC5C	C5C	C55D	D	c                 d   | r|rkt          |          }t          | |          5 }	 |                                }n# t          t          f$ r}|d}~ww xY w	 ddd           n# 1 swxY w Y   nt          |           \  }}n|r|rt          |          }	 t          |t                    r|n|                                }t          |t                    r|                    |          }n|}n># t          t          f$ r}|d}~ww xY wt          |          \  }}nt          d          ||fS )z6Extracts document metadata from a plain text document.r3   N)r/   zNo filename was specified)
r'   r4   r5   r;   r<   rC   
isinstancebytesr:   r6   )r.   r/   r   r$   r>   rB   errorfile_contents           r&   read_txt_filerI   l   s     = 	K!4X!>!>h);<<<    !II*L9      K                                -A,J,J)			 = 	L!4X!>!>'1$'>'>OttDIIKKlE22 - , 3 34F G GII ,I&5    -Ad,K,K,K)		 ;<<<y((sJ   A$<A$AAAA$$A(+A(AC0 0DDD)r-   N)r-   NN)typingr   r   r   r   r7   $unstructured.partition.common.commonr   r9   r*   strr'   boolr,   rF   rC   rI    r(   r&   <module>rO      sh   - - - - - - - - - - - -  A A A A A A    0# #    $      .2,) ,),)
55	)*
+,) 38_,) ,) ,) ,)` .2" )  ) )
55	)*
+ ) sm ) 38_	 )  )  )  )  )  )r(   