
    Ng>                        d Z ddlZddlZddlZddlZddlZddlZddlZddlm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZ ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddl m!Z! ddl m"Z"  ej#        e$          Z% G d de&          Z' G d d          Z( G d de(          Z) G d de(          Z* G d de*          Z+ G d de(          Z, G d de,          Z- G d de)          Z. G d d e,          Z/ G d! d"e)          Z0 G d# d$e,          Z1 G d% d&          Z2 G d' d(ee                   Z3d)ee4         d*dfd+Z5e$d,k    r e5ej6                   dS dS )-a   Adobe character mapping (CMap) support.

CMaps provide the mapping between character codes and Unicode
code-points to character ids (CIDs).

More information is available on the Adobe website:

  http://opensource.adobe.com/wiki/display/cmap/CMap+Resources

    N)AnyBinaryIODictIterableIteratorListMutableMappingOptionalTextIOTupleUnioncastSet   )name2unicode)KWD)PSEOF)	PSKeyword)	PSLiteral)PSStackParser)PSSyntaxError)literal_name)choplist)nunpackc                       e Zd ZdS )	CMapErrorN__name__
__module____qualname__     K/var/www/html/ai-engine/env/lib/python3.11/site-packages/pdfminer/cmapdb.pyr   r   1   s        Dr"   r   c                       e Zd ZdZdeddfdZdefdZdededdfd	Z	d
ede
ddfdZde
d
eeee
f         ddfdZddZd
edee
         fdZdS )CMapBaser   kwargsreturnNc                 8    |                                 | _        d S N)copyattrsselfr&   s     r#   __init__zCMapBase.__init__9   s    28++--


r"   c                 @    | j                             dd          dk    S )NWModer   r+   getr-   s    r#   is_verticalzCMapBase.is_vertical<   s    z~~gq))Q..r"   kvc                     || j         |<   d S r)   )r+   )r-   r5   r6   s      r#   set_attrzCMapBase.set_attr?   s    
1r"   codecidc                     d S r)   r!   )r-   r9   r:   s      r#   add_code2cidzCMapBase.add_code2cidB       r"   c                     d S r)   r!   )r-   r:   r9   s      r#   add_cid2unichrzCMapBase.add_cid2unichrE   r=   r"   cmapc                     d S r)   r!   )r-   r@   s     r#   use_cmapzCMapBase.use_cmapH   r=   r"   c                     t           r)   )NotImplementedError)r-   r9   s     r#   decodezCMapBase.decodeK   s    !!r"   )r@   r%   r'   N)r   r   r    debugobjectr.   boolr4   strr8   intr<   r   r   bytesr?   rB   r   rE   r!   r"   r#   r%   r%   5   s       E@ @D @ @ @ @/T / / / /# & T     3 4    # U9eS3H-I d       "5 "Xc] " " " " " "r"   r%   c            	           e Zd Zdeeef         ddfdZdefdZdeddfdZ	de
dee         fd	Zej        dd
fdedeeeef                  deedf         ddfdZdS )CMapr&   r'   Nc                 8    t          j        | fi | i | _        d S r)   )r%   r.   code2cidr,   s     r#   r.   zCMap.__init__P   s&    $))&)))+-r"   c                 <    d| j                             d          z  S )Nz
<CMap: %s>CMapNamer1   r3   s    r#   __repr__zCMap.__repr__T   s    djnnZ8888r"   r@   c                    t          |t                    s$J t          t          |                                dt          t
          t          f         dt          t
          t          f         dd ffd | j        |j                   d S )Ndstsrcr'   c                     |                                 D ]3\  }}t          |t                    ri }|| |<    ||           .|| |<   4d S r)   )items
isinstancedict)rT   rU   r5   r6   dr*   s        r#   r*   zCMap.use_cmap.<locals>.copyZ   sd    ))++  Aa&& +-ACFDAJJJJCFF r"   )rX   rM   rI   typer   rJ   rG   rO   )r-   r@   r*   s     @r#   rB   zCMap.use_cmapW   s    $%%66s4::666	d3;' 	d3;.? 	D 	 	 	 	 	 	 	T]DM*****r"   r9   c              #   ,  K   t                               d| |           | j        }t          |          D ]^}||v rQ||         }t	          |t
                    r|V  | j        }/t          t          t
          t          f         |          }W| j        }_d S )Nzdecode: %r, %r)	logrF   rO   iterrX   rJ   r   r   rG   )r-   r9   rZ   ixs        r#   rE   zCMap.decodee   s      		"D$///Md 		" 		"AAvvaDa%% 3GGGAAT#v+.22AAM		" 		"r"   r!   outrO   .c           	      P   |	| j         }d}t          |                                          D ]x\  }}||fz   }t          |t                    r|                    d||fz             ;|                     |t          t          t          t          f         |          |           yd S )Nr!   zcode %r = cid %d
)ra   rO   r9   )
rO   sortedrW   rX   rJ   writedumpr   r   rG   )r-   ra   rO   r9   r5   r6   cs          r#   re   z	CMap.dumps   s     }HDX^^--.. 	P 	PFQtA!S!! P		.!Q78888		cDc6k1BA,F,FQ	OOOO	P 	Pr"   )r   r   r    r   rI   rJ   r.   rR   r%   rB   rK   r   rE   sysstdoutr   r
   r   rG   r   re   r!   r"   r#   rM   rM   O   s       .sCx .T . . . .9# 9 9 9 9+X +$ + + + +"5 "Xc] " " " "  j04 "	P PP 4V,-P CHo	P
 
P P P P P Pr"   rM   c                   .    e Zd Zdedeedf         fdZdS )IdentityCMapr9   r'   .c                 ^    t          |          dz  }|rt          j        d|z  |          S dS )N   z>%dHr!   lenstructunpackr-   r9   ns      r#   rE   zIdentityCMap.decode   s4    IIN 	=!T2222r"   Nr   r   r    rK   r   rJ   rE   r!   r"   r#   rj   rj      >        5 U38_      r"   rj   c                   .    e Zd Zdedeedf         fdZdS )IdentityCMapByter9   r'   .c                 X    t          |          }|rt          j        d|z  |          S dS )Nz>%dBr!   rm   rq   s      r#   rE   zIdentityCMapByte.decode   s0    II 	=!T2222r"   Nrs   r!   r"   r#   rv   rv      rt   r"   rv   c                   h    e Zd Zdeeef         ddfdZdefdZdedefdZe	j
        fdeddfd	ZdS )

UnicodeMapr&   r'   Nc                 8    t          j        | fi | i | _        d S r)   )r%   r.   
cid2unichrr,   s     r#   r.   zUnicodeMap.__init__   s&    $))&)))*,r"   c                 <    d| j                             d          z  S )Nz<UnicodeMap: %s>rQ   r1   r3   s    r#   rR   zUnicodeMap.__repr__   s    !DJNN:$>$>>>r"   r:   c                 T    t                               d| |           | j        |         S )Nget_unichr: %r, %r)r]   rF   r{   r-   r:   s     r#   
get_unichrzUnicodeMap.get_unichr   s&    		&c222s##r"   ra   c                     t          | j                                                  D ]\  }}|                    d||fz              d S )Nzcid %d = unicode %r
)rc   r{   rW   rd   )r-   ra   r5   r6   s       r#   re   zUnicodeMap.dump   sT    T_224455 	8 	8FQII-A67777	8 	8r"   )r   r   r    r   rI   rJ   r.   rR   r   rg   rh   r   re   r!   r"   r#   ry   ry      s        -sCx -T - - - -?# ? ? ? ?$c $c $ $ $ $ "% 8 8 8 8 8 8 8 8 8r"   ry   c                       e Zd ZdedefdZdS )IdentityUnicodeMapr:   r'   c                 X    t                               d| |           t          |          S )z+Interpret character id as unicode codepointr~   )r]   rF   chrr   s     r#   r   zIdentityUnicodeMap.get_unichr   s$    		&c2223xxr"   N)r   r   r    rJ   rI   r   r!   r"   r#   r   r      s6        c c      r"   r   c                   "    e Zd ZdededdfdZdS )FileCMapr9   r:   r'   Nc                    t          |t                    rt          |t                    s3J t          t          |          t          |          f                      | j        }|d d         D ]L}t          |          }||v r.t          t          t          t          f         ||                   }Ci }|||<   |}Mt          |d                   }|||<   d S )N)	rX   rI   rJ   r[   rO   ordr   r   rG   )r-   r9   r:   rZ   rf   cits          r#   r<   zFileCMap.add_code2cid   s    $$$ 	
C)=)= 	
 	
s$ZZc#@
 @
 	
 	
 	
 Mcrc 	 	AQBQwwc6k*AbE22')"b]]"r"   )r   r   r    rI   rJ   r<   r!   r"   r#   r   r      s=         3 4      r"   r   c                   4    e Zd Zdedeeeef         ddfdZdS )FileUnicodeMapr:   r9   r'   Nc                 $   t          |t                    s$J t          t          |                                t          |t                    r1t          |j        t                    sJ t          |j                  }n`t          |t                    r|                    dd          }n4t          |t                    rt          |          }nt          |          |dk    r | j                            |          dk    rd S || j        |<   d S )NzUTF-16BEignore     )rX   rJ   rI   r[   r   namer   rK   rE   r   	TypeErrorr{   r2   )r-   r:   r9   unichrs       r#   r?   zFileUnicodeMap.add_cid2unichr   s    #s##33Sc^^333dI&& 
	"di-----!$),,FFe$$ 	"[[X66FFc"" 	"YYFFD//! X$/"5"5c":":c"A"AF%r"   )r   r   r    rJ   r   r   rK   r?   r!   r"   r#   r   r      sI        &# &U9eS3H-I &d & & & & & &r"   r   c                   ,     e Zd Zdededdf fdZ xZS )PyCMapr   moduler'   Nc                     t                                          |           |j        | _        |j        rd| j        d<   d S d S N)rQ   r   r0   )superr.   CODE2CIDrO   IS_VERTICALr+   )r-   r   r   	__class__s      r#   r.   zPyCMap.__init__   sN    $''' 	$"#DJw	$ 	$r"   )r   r   r    rI   r   r.   __classcell__r   s   @r#   r   r      sR        $S $# $$ $ $ $ $ $ $ $ $ $ $r"   r   c                   0     e Zd Zdedededdf fdZ xZS )PyUnicodeMapr   r   verticalr'   Nc                     t                                          |           |r|j        | _        d| j        d<   d S |j        | _        d S r   )r   r.   CID2UNICHR_Vr{   r+   CID2UNICHR_H)r-   r   r   r   r   s       r#   r.   zPyUnicodeMap.__init__   sP    $''' 	2$1DO"#DJw$1DOOOr"   )r   r   r    rI   r   rH   r.   r   r   s   @r#   r   r      sY        2S 2# 2 2$ 2 2 2 2 2 2 2 2 2 2r"   r   c                       e Zd ZU i Zeeef         ed<   i Zeee	e
         f         ed<    G d de          Zededefd            Zededefd            Zedded
edefd            ZdS )CMapDB_cmap_cache_umap_cachec                       e Zd ZdS )CMapDB.CMapNotFoundNr   r!   r"   r#   CMapNotFoundr      s        r"   r   r   r'   c           	         |                     dd          }d|z  }t                              d|           t          j                            dd          t          j                            t          j                            t                    d          f}|D ]}t          j                            ||          }t          j        
                    |          rt          j        |          }	 t          t          |          dt          j        |                                                    |                                 c S # |                                 w xY wt&                              |          )	N  z%s.pickle.gzzloading: %r	CMAP_PATHz/usr/share/pdfminer/r@   r!   )replacer]   rF   osenvironr2   pathjoindirname__file__existsgzipopenr[   rI   pickleloadsreadcloser   r   )clsr   filename
cmap_paths	directoryr   gzfiles          r#   
_load_datazCMapDB._load_data   s$   ||D"%%!D(		-&&&JNN;(>??GLL22F;;

 $ 		, 		,I7<<	844Dw~~d## #4#D		2v|FKKMM/J/JKKLLNNNNNNFLLNNNN# %%d+++s   1AE		Ec                 P   |dk    rt          d          S |dk    rt          d          S |dk    rt          d          S |dk    rt          d          S 	 | j        |         S # t          $ r Y nw xY w|                     |          }t          ||          x| j        |<   }|S )Nz
Identity-Hr   )r0   z
Identity-Vr   OneByteIdentityHOneByteIdentityV)rj   rv   r   KeyErrorr   r   )r   r   datar@   s       r#   get_cmapzCMapDB.get_cmap  s    <a((((\!!a(((('''#!,,,,'''#!,,,,	?4(( 	 	 	D	~~d##'-dD'9'99s   A' '
A43A4Fr   c                     	 | j                  |         S # t          $ r Y nw xY w|                     dz            fddD             | j         <   | j                  |         S )Nzto-unicode-%sc                 2    g | ]}t          |          S r!   )r   ).0r6   r   r   s     r#   
<listcomp>z*CMapDB.get_unicode_map.<locals>.<listcomp>  s%     T T TdD!!<!< T T Tr"   )FT)r   r   r   )r   r   r   r   s    ` @r#   get_unicode_mapzCMapDB.get_unicode_map  s    	?4(22 	 	 	D	~~o455 T T T T Tm T T Tt$X..s    
$$N)F)r   r   r    r   r   rI   r   __annotations__r   r   r   r   r   classmethodr   r   r%   r   rH   ry   r   r!   r"   r#   r   r      s        %'Kc6k"'''13Kc4--.333    y    ,c ,c , , , [,& C H    [" / /3 /$ /: / / / [/ / /r"   r   c                      e Zd ZdededdfdZddZ ed          Z ed          Z	 ed	          Z
 ed
          Z ed          Z ed          Z ed          Z ed          Z ed          Z ed          Z ed          Z ed          Z ed          Z ed          Z ed          Z ed          ZdededdfdZdeddfdZdS )
CMapParserr@   fpr'   Nc                 r    t          j        | |           || _        d| _        t	                      | _        d S )NT)r   r.   r@   _in_cmapset	_warnings)r-   r@   r   s      r#   r.   zCMapParser.__init__!  s3    tR(((	#&55r"   c                 R    	 |                                   n# t          $ r Y nw xY wd S r)   )
nextobjectr   r3   s    r#   runzCMapParser.run)  s?    	OO 	 	 	D	s    
$$s	   begincmaps   endcmaps   usecmaps   defs   begincodespaceranges   endcodespaceranges   begincidranges   endcidranges   begincidchars
   endcidchars   beginbfranges
   endbfranges   beginbfchars	   endbfchars   beginnotdefranges   endnotdefrangepostokenc                 Z   || j         u rd| _        |                                  dS || j        u r	d| _        dS | j        sdS || j        u rZ	 |                     d          \  \  }}\  }}| j                            t          |          |           n# t          $ r Y nw xY wdS || j
        u r}	 |                     d          \  \  }}| j                            t                              t          |                               n # t          $ r Y nt          j        $ r Y nw xY wdS || j        u r|                                  dS || j        u r|                                  dS || j        u r|                                  dS || j        u rd |                                 D             }t'          d|          D ]\  }}	}
t)          |t*                    s|                     d           2t)          |	t*                    s|                     d	           ]t)          |
t.                    s|                     d
           t1          |          t1          |	          k    r|                     d           |dd         }|	dd         }||k    r|                     d           |dd         }|	dd         }t3          |          }t3          |          }t1          |          }t5          ||z
  dz             D ]D}|t7          j        d||z             | d         z   }| j                            |
|z   |           EdS || j        u r|                                  dS || j        u r{d |                                 D             }t'          d|          D ]J\  }
}t)          |t*                    r0t)          |
t.                    r| j                            |
|           KdS || j         u r|                                  dS || j!        u rd |                                 D             }t'          d|          D ]\  }}	}t)          |t*                    s|                     d           2t)          |	t*                    s|                     d           ]t1          |          t1          |	          k    r|                     d           t3          |          }t3          |	          }t)          |tD                    rrt1          |          ||z
  dz   k    r|                     d           tG          t5          ||dz             |          D ] \  }
}| j                            |
|           !8t)          |t*                    sJ |dd         }t3          |          }|dd         }t1          |          }t5          ||z
  dz             D ]D}|t7          j        d||z             | d         z   }| j                            ||z   |           EdS || j$        u r|                                  dS || j%        u rd |                                 D             }t'          d|          D ]W\  }
}t)          |
t*                    r=t)          |t*                    r(| j                            t3          |
          |           XdS || j&        u r|                                  dS || j'        u r|                                  dS | (                    ||f           dS )z[ToUnicode CMaps

        See Section 5.9.2 - ToUnicode CMaps of the PDF Reference.
        TNFrl   r   c                     g | ]\  }}|S r!   r!   r   __objs      r#   r   z)CMapParser.do_keyword.<locals>.<listcomp>p      777IRC777r"      z0The start object of begincidrange is not a byte.z.The end object of begincidrange is not a byte.z.The cid object of begincidrange is not a byte.z?The start and end byte of begincidrange have different lengths.zGThe prefix of the start and end byte of begincidrange are not the same.z>Lc                     g | ]\  }}|S r!   r!   r   s      r#   r   z)CMapParser.do_keyword.<locals>.<listcomp>  r   r"   c                     g | ]\  }}|S r!   r!   r   s      r#   r   z)CMapParser.do_keyword.<locals>.<listcomp>  r   r"   zThe start object is not a byte.zThe end object is not a byte.z.The start and end byte have different lengths.zPThe difference between the start and end offsets does not match the code length.c                     g | ]\  }}|S r!   r!   r   s      r#   r   z)CMapParser.do_keyword.<locals>.<listcomp>  r   r"   ))KEYWORD_BEGINCMAPr   popallKEYWORD_ENDCMAPKEYWORD_DEFpopr@   r8   r   r   KEYWORD_USECMAPrB   r   r   r   KEYWORD_BEGINCODESPACERANGEKEYWORD_ENDCODESPACERANGEKEYWORD_BEGINCIDRANGEKEYWORD_ENDCIDRANGEr   rX   rK   
_warn_oncerJ   rn   r   rangero   packr?   KEYWORD_BEGINCIDCHARKEYWORD_ENDCIDCHARKEYWORD_BEGINBFRANGEKEYWORD_ENDBFRANGElistzipKEYWORD_BEGINBFCHARKEYWORD_ENDBFCHARKEYWORD_BEGINNOTDEFRANGEKEYWORD_ENDNOTDEFRANGEpush)r-   r   r   _r5   r6   cmapnameobjs
start_byteend_byter:   start_prefix
end_prefixsvarevarstartendvlenr_   r`   r9   unicode_valuevarbaseprefixs                            r#   
do_keywordzCMapParser.do_keywordA  s   
 D*** DMKKMMMFd***!DMF} 	FD$$$#'88A;; !Q!Q	""<??A6666    FD(((#'88A;; !X	""6??<3I3I#J#JKKKK    &   FD444KKMMMFD222KKMMMFD...KKMMMFD,,,77777D/74/@/@ 9 9+Xs!*e44 OO$VWWW!(E22 OO$TUUU!#s++ OO$TUUUz??c(mm33OO-   )#2#%crc]
:--OO:   !"##}dmm4yysU{Q// 9 9A$v{4'C'CTEFF'KKAI,,S1Wa88889 FD---KKMMMFD+++77777D'400 8 8ddE** 8z#s/C/C 8I,,S$777FD---KKMMMFD+++77777D08D0A0A ? ?,Xt!*e44 OO$EFFF!(E22 OO$CDDDz??c(mm33OO$TUUU
++h''dD)) ?4yyC%K!O33F   /2%sQw2G2G.N.N E E*]	00mDDDDE &dE22222rss)C"3<<D!#2#YFs88D"3;?33 ? ?"V[tax%@%@$%HH	00A>>>>? FD,,,KKMMMFD***77777D'400 A Adc5)) Aju.E.E AI,,WS\\4@@@FD111KKMMMFD///KKMMMF		3,s,   AB 
B B /AD	 	
D&D&%D&msgc                     || j         vr;| j                             |           d}t                              ||z              dS dS )z!Warn once for each unique messagezIgnoring (part of) ToUnicode map because the PDF data does not conform to the format. This could result in (cid) values in the output. N)r   addr]   warning)r-   r  base_msgs      r#   r   zCMapParser._warn_once  sU    dn$$Ns###/ 
 KK3''''' %$r"   )r'   N)r   r   r    r%   r   r.   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rJ   r   r  rI   r   r!   r"   r#   r   r      s       X 8         L))c*ooOc*ooO#f++K"%#&<"="= #$8 9 9C 011#n--3//]++3//]++#n--L))"s#677 S!233U c U ) U  U  U  U  U n	(c 	(d 	( 	( 	( 	( 	( 	(r"   r   argvr'   c                 "   ddl m}  |dt                     | dd          }|D ]j}t          |d          }t	                      }t          ||                                           |                                 |                                 kd S )Nr   )warnzThe function main() from cmapdb.py will be removed in 2023. It was probably introduced for testing purposes a long time ago, and no longer relevant. Feel free to create a GitHub issue if you disagree.r   rb)	warningsr  DeprecationWarningr   r   r   r   r   re   )r  r  argsfnamer   r@   s         r#   mainr     s    D	> 		   8D  %4  """



		
Fr"   __main__)7__doc__r   loggingr   os.pathr   ro   rg   typingr   r   r   r   r   r   r	   r
   r   r   r   r   r   
encodingdbr   psparserr   r   r   r   r   r   r   utilsr   r   	getLoggerr   r]   	Exceptionr   r%   rM   rj   rv   ry   r   r   r   r   r   r   r   rI   r   r  r!   r"   r#   <module>r+     s5  	 	   				       



                               % $ $ $ $ $                         # # # # # # # # # # # # " " " " " "            g!!	 	 	 	 		 	 	 	" " " " " " " "42P 2P 2P 2P 2P8 2P 2P 2Pj    8       |   8 8 8 8 8 8 8 8"           t   $& & & & &Z & & &*$ $ $ $ $T $ $ $2 2 2 2 2: 2 2 26/ 6/ 6/ 6/ 6/ 6/ 6/ 6/rA( A( A( A( A(y) A( A( A(HtCy T    ( zDNNNNN r"   