
    Ng@                        d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlm Z  d dlm!Z! d dl"m#Z# d dl"m$Z$ d dl"m%Z% d dl&m'Z' dZ(	 d dl)m*Z* dZ+n# e,$ r dZ+Y nw xY w	 d dl-Z.dZ/n# e,$ r dZ/Y nw xY w e j0        d          Z1dZ2 ej3        d          Z4dZ5h dZ6d e7fd!e7fd"e7fd#e8fd$e9fd%e8fd&Z:d'd(d)d*d+Z; e%d,          Z< e%d-          Z= G d. d/          Z> G d0 d1e?          Z@ G d2 d3eA          ZB G d4 d5e?          ZC G d6 d7e?          ZD G d8 d9e?          ZEd: ZFed]d;            ZGd< ZHd= ZId> ZJd? ZKed@             ZLd^dAZM	 	 	 d_dCZNd`dDe9dEeOfdFZP	 	 	 	 dadGe e9         dHe!e9         dIe7dJe7dEeOf
dKZQ	 	 	 	 	 	 dbdLZRdM ZSdN ZTdO ZUeHdcdP            ZVeHdQ             ZWddBd e>jX        d fdRZY	 	 	 	 	 dddTZZ	 	 	 	 dedUZ[ddBd e>jX        d fdVZ\d]dWZ]ddBd e>jX        d dfdXZ^dYdBd e>jX        d fdZZ_d[ Z`ead\k    r eb e`                      dS )f    N)contextmanager)
QUOTE_NONE)ENOENT)wraps)iglob)BytesIO)environ)extsep)linesep)remove)normcase)normpath)realpath)NamedTemporaryFile)sleep)List)Optional)InvalidVersion)parse)Version)Image	tesseract)ndarrayTFpytesseractzutf-8z	^[a-z_]+$RGB>
   BMPGIFPBMPGMPNGPPMJPEGTIFFWEBPJPEG2000page_numorientationrotateorientation_confscriptscript_conf)zPage numberzOrientation in degreesRotatezOrientation confidenceScriptzScript confidencez.tessedit_create_boxfile=1 batch.nochop makeboxztessedit_create_alto=1ztessedit_create_hocr=1ztessedit_create_tsv=1)boxxmlhocrtsvz3.05z4.1.0c                       e Zd ZdZdZdZdZdS )Outputbytesz
data.framedictstringN)__name__
__module____qualname__BYTES	DATAFRAMEDICTSTRING     S/var/www/html/ai-engine/env/lib/python3.11/site-packages/pytesseract/pytesseract.pyr3   r3   V   s"        EIDFFFr?   r3   c                        e Zd Z fdZ xZS )PandasNotSupportedc                 J    t                                          d           d S )NzMissing pandas packagesuper__init__self	__class__s    r@   rF   zPandasNotSupported.__init__^   s"    122222r?   r7   r8   r9   rF   __classcell__rI   s   @r@   rB   rB   ]   s8        3 3 3 3 3 3 3 3 3r?   rB   c                       e Zd Zd ZdS )TesseractErrorc                 4    || _         || _        ||f| _        d S N)statusmessageargs)rH   rQ   rR   s      r@   rF   zTesseractError.__init__c   s     W%			r?   N)r7   r8   r9   rF   r>   r?   r@   rN   rN   b   s#        & & & & &r?   rN   c                        e Zd Z fdZ xZS )TesseractNotFoundErrorc                 Z    t                                          t           d           d S )NzQ is not installed or it's not in your PATH. See README file for more information.)rE   rF   tesseract_cmdrG   s    r@   rF   zTesseractNotFoundError.__init__j   s;     6 6 6	
 	
 	
 	
 	
r?   rJ   rL   s   @r@   rU   rU   i   s8        
 
 
 
 
 
 
 
 
r?   rU   c                        e Zd Z fdZ xZS )TSVNotSupportedc                 J    t                                          d           d S )Nz4TSV output not supported. Tesseract >= 3.05 requiredrD   rG   s    r@   rF   zTSVNotSupported.__init__r   s,    B	
 	
 	
 	
 	
r?   rJ   rL   s   @r@   rY   rY   q   8        
 
 
 
 
 
 
 
 
r?   rY   c                        e Zd Z fdZ xZS )ALTONotSupportedc                 J    t                                          d           d S )Nz6ALTO output not supported. Tesseract >= 4.1.0 requiredrD   rG   s    r@   rF   zALTONotSupported.__init__y   s,    D	
 	
 	
 	
 	
r?   rJ   rL   s   @r@   r]   r]   x   r[   r?   r]   c                 &   |                                   	 |                     d           n*# t          $ r t          d           Y nt          $ r Y nw xY w|                                  || _        d S # |                                  || _        w xY w)N   )	terminatewait	TypeErrorr   	Exceptionkill
returncode)processcodes     r@   re   re      s    "Q   a    	! 	!!!!!s2   , A3 AA3 	AA3 AA3 3Bc              #     K   	 |sj|                                  d         V  	 | j                                         | j                                         | j                                         d S 	 |                      |          \  }}|V  n2# t
          j        $ r  t          | d           t          d          w xY w	 | j                                         | j                                         | j                                         d S # | j                                         | j                                         | j                                         w xY w)Nr`   )timeoutzTesseract process timeout)	communicatestdinclosestdoutstderr
subprocessTimeoutExpiredre   RuntimeError)procseconds_error_strings       r@   timeout_managerrx      sm      	""$$Q'''' 	
	<"..w.??OA|( 	< 	< 	<rNNN:;;;	< 
 	
 	
s$   D 1B D /B>>D AEc                 N     t                      fd            _        S )Nc                  j    |                     dd          r	j        u r | i |_        j        S )NcachedF)pop_result)rS   kwargsfuncwrappers     r@   r   zrun_once.<locals>.wrapper   sC    zz(E** 	4go.H.H"dD3F33GOr?   )r   r}   )r   r   s   `@r@   run_oncer      s@    
4[[     [
 GONr?   c                     d                     d |                     t                                                    D                                                       S )N c              3      K   | ]}|V  d S rP   r>   .0lines     r@   	<genexpr>zget_errors.<locals>.<genexpr>   s3             r?   )joindecodeDEFAULT_ENCODING
splitlinesstrip)rw   s    r@   
get_errorsr      sX    88  %,,-=>>IIKK    eggr?   c                     t          | r|  dn|           D ]:}	 t          |           # t          $ r}|j        t          k    r Y d}~3d}~ww xY wdS )z5Tries to remove temp files by filename wildcard path.*N)r   r   OSErrorerrnor   )	temp_namefilenamees      r@   cleanupr      s    YEY////IFF  	8 	 	 	w&   !    	 s   *
AA

Ac                    t           r)t          | t                    rt          j        |           } t          | t          j                  st          d          | j        sdn| j        }|t          vrt          d          d|                                 v rLt          j	        t          | j        d          }|                    | d|                     d                     |} || _        | |fS )NzUnsupported image objectr    zUnsupported image format/typeA)   r   r   )r   r   )numpy_installed
isinstancer   r   	fromarrayrc   formatSUPPORTED_FORMATSgetbandsnewRGB_MODEsizepaste
getchannel)image	extension
backgrounds      r@   preparer      s     ':eW55 '&&eU[)) 42333"\;u|I)))7888
ennYx_EE
(8(8(=(=>>>EL)r?   c           	   #   "  K   	 t          dd          5 }t          | t                    rT|j        t	          t          t          |                               fV  	 d d d            t          |j                   d S t          |           \  } }|j         dt           | }| 
                    || j                   |j        |fV  d d d            n# 1 swxY w Y   t          |j                   d S # t          |j                   w xY w)Ntess_F)prefixdelete_input)r   )r   r   strnamer   r   r   r   r   r
   saver   )r   fr   input_file_names       r@   r   r      s[     
wu=== 	*%%% fhx'@'@AAAAAA	* 	* 	* 	* 	* 	* 	  'u~~E9!"BBvByBBOJJu|J<<<&/))))	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	s<   C8 A	CC8 ?ACC8 CC8 CC8 8Dc                 L   t           j        t           j        d t          d}t          t           d          rMt          j                    |d<   |d         xj        t           j        z  c_        t           j        |d         _        | rt           j        |d<   nt           j	        |d<   |S )N)rm   rp   startupinfoenvSTARTUPINFOr   ro   )
rq   PIPEr	   hasattrr   dwFlagsSTARTF_USESHOWWINDOWSW_HIDEwShowWindowDEVNULL)include_stdoutr~   s     r@   subprocess_argsr      s    
 /	 F z=)) ? * 6 8 8}}%%)HH%%,6,>}) .%?x%-xMr?    c                    g }t           j        dk     }|r|dk    r|ddt          |          fz  }|t          | |fz  }||d|fz  }|r|t	          j        ||          z  }|                                D ]}	|	dvr|                    |	           t                              d|           	 t          j
        |fi t                      }
n1# t          $ r$}|j        t          k    r t                      d }~ww xY wt!          |
|          5 }|
j        r"t%          |
j        t'          |                    	 d d d            d S # 1 swxY w Y   d S )	Nwin32r   nicez-n-l)posix>   r.   osdr1   r/   z%r)sysplatformr   rW   shlexsplitappendLOGGERdebugrq   Popenr   r   r   r   rU   rx   rf   rN   r   )input_filenameoutput_filename_baser   langconfigr   rj   cmd_argsnot_windows
_extensionrt   r   rw   s                r@   run_tesseractr      s    H|w./K .tqyyVT3t99--0DEEHT4L  ;EKk::::oo'' ( (
999OOJ'''
LLx   +>>O,=,=>> + + +7f(***	+ 
w	'	' L<? 	L *\2J2JKKK	LL L L L L L L L L L L L L L L L L Ls*   .C 
C;C66C;*EE
E
r   return_bytesc                     t          | d          5 }|r |                                cd d d            S |                                                    t                    cd d d            S # 1 swxY w Y   d S )Nrb)openreadr   r   )r   r   output_files      r@   _read_outputr     s    	h		 ; 	&##%%; ; ; ; ; ; ; ; !!(()9::; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ;s   A,+A,,A03A0
extensionsr   r   rj   c           	      D  	 d                     d |D                                                       }|rd| }nd}t          |           5 \  }}||d                     |          ||||d	t          di 	 	fd|D             cd d d            S # 1 swxY w Y   d S )Nr   c              3   L   K   | ]}t                               |d           V   dS )r   N)EXTENTION_TO_CONFIGget)r   r   s     r@   r   z.run_and_get_multiple_output.<locals>.<genexpr>.  sD        3<	2..     r?   z-c r   r   r   r   r   r   r   rj   c                 ^    g | ])}t          d           t           | |dv rdn          *S )r      pdfr0   T)r   r
   )r   r   r~   r   s     r@   
<listcomp>z/run_and_get_multiple_output.<locals>.<listcomp>C  s_     
 
 

 	 01F6F9FF!_44, 
 
 
r?   r>   )r   r   r   r   )
r   r   r   r   rj   r   r   r   r   r~   s
        `   @r@   run_and_get_multiple_outputr   &  s?    XX  @J    egg   v	e 
3N,$-*--
 
 	
 
 
 
 

 (
 
 

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
s   ;BBBc           	          t          |           5 \  }}|||||||d}	t          di |	 t          |	d          t           | |          cd d d            S # 1 swxY w Y   d S )Nr   r   r>   )r   r   r   r
   )
r   r   r   r   r   rj   r   r   r   r~   s
             r@   run_and_get_outputr   L  s     
e 
3N,$-"
 
 	,-BvByBB
 

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
s   :AAAc                    i }fd|                                                      d          D             }t          |          dk     r|S |                    d          }t          |          }t          |d                   |k     r|d                             d           |dk     r||z  }t          |          D ]\  }}t                      ||<   |D ]{}	t          |	          |k    r||k    r<	 t          t          |	|                             }
n # t          $ r |	|         }
Y nw xY w|	|         }
||                             |
           ||S )Nc                 :    g | ]}|                               S r>   r   )r   rowcell_delimiters     r@   r   z file_to_dict.<locals>.<listcomp>i  s%    III#CIIn%%IIIr?   
   r   rk   r   )
r   r   lenr|   r   	enumeratelistintfloat
ValueError)r1   r   str_col_idxresultrowsheaderlengthiheadr   vals    `         r@   file_to_dictr   g  sq   FIIII1B1B41H1HIIID
4yy1}}XXa[[F[[F
48}}v 	RQvV$$ % %4vvt 	% 	%C3xx1}}K!eCFmm,,CC! ! ! !a&CCC! !f4L$$$$	% Ms   3"DD+*D+c                     |t           u r|                                 S |t          u r#	 t          |            dS # t          $ r Y dS w xY wdS )NTF)r   isdigitr   r   )r   _types     r@   is_validr    s`    ||{{}}~~	#JJJ4 	 	 	55	 4s   9 
AAc                 T    d d |                      d          D             D             S )Nc                    i | ]}t          |          d k    t          |d         t          |d                  d                   Bt          |d                  d         t          |d                  d         |d                   S )r   r`   r   )r   r  OSD_KEYS)r   kvs     r@   
<dictcomp>zosd_to_dict.<locals>.<dictcomp>  sv       r77a<<HRUHRUOA,>??< 	AHRUOA.r!u55<<r?   c              3   @   K   | ]}|                     d           V  dS ): Nr   r   s     r@   r   zosd_to_dict.<locals>.<genexpr>  s.      @@4::d##@@@@@@r?   r   r   )r   s    r@   osd_to_dictr    s;     @@		$@@@   r?   c                    t           dg}| r|t          j        |           z  }	 t          j        |t          j        t          j                  }n# t          $ r t                      w xY w|j	        dvrt                      g }|j
        r}|j
                            t                                        t                    D ]E}|                                }t                              |          r|                    |           F|S )Nz--list-langs)ro   rp   )r   r`   )rW   r   r   rq   runr   STDOUTr   rU   rf   ro   r   r   r   r   LANG_PATTERNmatchr   )r   r   r   	languagesr   r   s         r@   get_languagesr    s   ~.H (EK''''?$
 
 

  ' ' '$&&&' &&$&&&I} 'M(()9::@@II 	' 	'D::<<D!!$'' '  &&&s   +A A)c                     	 t          j        t          dgt           j        t          t           j                  } n# t          $ r t                      w xY w|                     t                    }|
                    t          j        dd                                       d          ^}}|                    d          ^}}	 t          |          }|t          k    sJ n(# t           t"          f$ r t%          d| d          w xY w|S )	z9
    Returns Version object of the Tesseract version
    z	--version)rp   r   rm   
   Nr   -zInvalid tesseract version: "")rq   check_outputrW   r  r	   r   r   rU   r   r   lstripr6   	printable	partitionr   TESSERACT_MIN_VERSIONAssertionErrorr   
SystemExit)outputraw_versionstr_versionrv   versions        r@   get_tesseract_versionr#    s)   
'(K($$	
 
 
  ' ' '$&&&' -- 011K!(()9"##)>??II#NNOK!!++C00OK!H$$//////N+ H H HFFFFGGGH Ns   8; AC" "%Dc                     | d||||g	 t          j        fdt           j        fdt           j        fdi|                     S )zS
    Returns the result of a Tesseract OCR run on the provided image to string
    txtc                      t           dgz    S NTr   rS   s   r@   <lambda>z!image_to_string.<locals>.<lambda>      044&=B r?   c                      dt            iS )Ntextr(  r)  s   r@   r*  z!image_to_string.<locals>.<lambda>  s    f&8$&?@ r?   c                      t            S rP   r(  r)  s   r@   r*  z!image_to_string.<locals>.<lambda>      148 r?   )r3   r:   r<   r=   r   r   r   r   output_typerj   rS   s         @r@   image_to_stringr2    sm     5$g6DBBBB@@@@8888 	   r?   r   c                     |dvrt          d|           |dk    rd|                                 }| |||||dg}t          | S )zU
    Returns the result of a Tesseract OCR run on the provided image to pdf/hocr
    r   zUnsupported extension: r0   z-c tessedit_create_hocr=1 T)r   r   r   )r   r   r   r   r   rj   rS   s          r@   image_to_pdf_or_hocrr4    se     ''>9>>???F>fllnn>>9dFD'4@Dt$$r?   c                     t          d          t          k     rt                      d|                                 }| d||||dg}t	          | S )zU
    Returns the result of a Tesseract OCR run on the provided image to ALTO XML
    Tr{   z-c tessedit_create_alto=1 r/   )r#  TESSERACT_ALTO_VERSIONr]   r   r   )r   r   r   r   rj   rS   s         r@   image_to_alto_xmlr8    s[     D))),BBB   :&,,..::F5$gt<Dt$$r?   c                     |                                  d}| d||||g	 t          j        fdt          j        fdt          j        fdi|                     S )zR
    Returns string containing recognized characters and their box boundaries
    z2 -c tessedit_create_boxfile=1 batch.nochop makeboxr.   c                      t           dgz    S r'  r(  r)  s   r@   r*  z image_to_boxes.<locals>.<lambda>)  r+  r?   c                  :    t          dt             dd          S )Nz char left bottom right top page
r   r   r   r   r)  s   r@   r*  z image_to_boxes.<locals>.<lambda>*  s(    \K0BD0IKK
 
 r?   c                      t            S rP   r(  r)  s   r@   r*  z image_to_boxes.<locals>.<lambda>/  r/  r?   r   r3   r:   r<   r=   r0  s         @r@   image_to_boxesr?    s     <<>>MMM  5$g6DBBBB 
 
 
 

 	8888    r?   c                     t           st                      t          dd}	 |                    |           n# t          t
          f$ r Y nw xY wt          j        t          t          |            fi |S )N	)quotingsep)
pandas_installedrB   r   updaterc   r   pdread_csvr   r   )rS   r   r~   s      r@   get_pandas_outputrH  3  s     # """#D11Ffz"    ;w14899DDVDDDs   7 A
Ac           
      ,   t          d          t          k     rt                      d|                                 }| d||||g	 t	          j        fdt          j        fdt          j        fdt          j        fdi|                     S )	zt
    Returns string containing box boundaries, confidences,
    and other information. Requires Tesseract 3.05+
    Tr6  z-c tessedit_create_tsv=1 r1   c                      t           dgz    S r'  r(  r)  s   r@   r*  zimage_to_data.<locals>.<lambda>U  r+  r?   c                  ,    t           dgz             S r'  )rH  )rS   pandas_configs   r@   r*  zimage_to_data.<locals>.<lambda>V  s    "3D6M#
 #
 r?   c                  4    t          t            dd          S )NrA  rk   r<  r)  s   r@   r*  zimage_to_data.<locals>.<lambda>Z  s    \*<d*CT2NN r?   c                      t            S rP   r(  r)  s   r@   r*  zimage_to_data.<locals>.<lambda>[  r/  r?   )	r#  r  rY   r   r3   r:   r;   r<   r=   )r   r   r   r   r1  rj   rL  rS   s         `@r@   image_to_datarO  @  s     D))),AAA999F5$g6DBBBB 
 
 
 
 
 	NNNN8888    r?   r   c                     d|                                  }| d||||g	 t          j        fdt          j        fdt          j        fdi|                     S )zN
    Returns string containing the orientation and script detection (OSD)
    z--psm 0 r   c                      t           dgz    S r'  r(  r)  s   r@   r*  zimage_to_osd.<locals>.<lambda>n  r+  r?   c                  0    t          t                      S rP   )r  r   r)  s   r@   r*  zimage_to_osd.<locals>.<lambda>o  s    [);T)BCC r?   c                      t            S rP   r(  r)  s   r@   r*  zimage_to_osd.<locals>.<lambda>p  r/  r?   r>  r0  s         @r@   image_to_osdrT  _  s     )((F5$g6DBBBBCCCC8888 	   r?   c                     t          t          j                  dk    rt          j        d         d }} nut          t          j                  dk    r;t          j        d         dk    r%t          j        d         t          j        d         }} nt          dt          j                   dS 	 t          j        |           5 }t          t          ||                     d d d            d S # 1 swxY w Y   d S # t          $ r6}t          t          |           d	t          j                   Y d }~dS d }~wt          $ r=}t          t          |          j         d
| t          j                   Y d }~dS d }~ww xY w)Nr   r`      r      z(Usage: pytesseract [-l lang] input_file
)file)r   r   r  )r   r   argvprintrp   r   r   r2  rU   r   r   typer7   )r   r   imgr   s       r@   mainr]  t  s   
38}}!d$	SX!		t 3 3!chqk$9
KKKKqZ!! 	3S/#D111222	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3!   Qmmm#*----qqqqq   a!((Q((sz::::qqqqqsH   )C6 =C)C6 )C--C6 0C-1C6 6
E; +D11E;>2E66E;__main__rP   )T)r   r   r   )F)Nr   r   F)r   Nr   r   r   F)r   )Nr   r   r   r   )Nr   r   r   )cloggingrer   r6   rq   r   
contextlibr   csvr   r   r   	functoolsr   globr   ior   osr	   r
   r   r   os.pathr   r   r   tempfiler   timer   typingr   r   packaging.versionr   r   r   PILr   rW   numpyr   r   ModuleNotFoundErrorpandasrF  rD  	getLoggerr   r   compiler  r   r   r   r   r   r  r   r  r7  r3   EnvironmentErrorrB   rs   rN   rU   rY   r]   re   rx   r   r   r   r   r   r   r   boolr   r   r   r   r  r  r  r#  r=   r2  r4  r8  r?  rH  rO  rT  r]  r7   r  r>   r?   r@   <module>rt     s    				       



 % % % % % %                                                                         ' ' ' ' ' '                   , , , , , , # # # # # # % % % % % %       OO   OOO    
	=	)	) rz+&&    $,c2o159o'/  <#$"	      ))        3 3 3 3 3) 3 3 3
& & & & &\ & & &
 
 
 
 
- 
 
 

 
 
 
 
& 
 
 

 
 
 
 
' 
 
 

" 
" 
"    $        *      : 	
&L &L &L &LR; ;3 ;d ; ; ; ; #
 #
S	#
 3-#
 	#

 #
 #
 #
 #
 #
P 		

 
 
 
6     F     
   
8 
  
: 
	
   , 
	
% % % %2 
	
% % % %, 
	
   6
E 
E 
E 
E 
	
   B 
	
   *  ( z
*TTVV

 s$   B# #B-,B-1B8 8CC