
    NgX/              	       <   U d Z ddlmZmZmZmZmZmZ ddlm	Z	 g dZ
dZi Zeeef         ed<    G d d          Z G d	 d
e          Z G d de          Z G d de          Zdededee         fdZdee         dededee         fdZdeeef         fdZdS )z]
emoji.tokenizer
~~~~~~~~~~~~~~~

Components for detecting and tokenizing emoji in strings.

    )List
NamedTupleDictUnionIteratorAny)unicode_codes)
EmojiMatchEmojiMatchZWJEmojiMatchZWJNonRGITokentokenizefilter_tokensu   ‍_SEARCH_TREEc                       e Zd ZdZdZdedededeeee	f         df         fdZ
d	eee	f         fd
Zd	efdZd	ed         fdZd	efdZdS )r
   zd
    Represents a match of a "recommended for general interchange" (RGI)
    emoji in a string.
    emojistartenddatar   r   r   r   Nc                 D    || _         	 || _        	 || _        	 || _        d S Nr   )selfr   r   r   r   s        K/var/www/html/ai-engine/env/lib/python3.11/site-packages/emoji/tokenizer.py__init__zEmojiMatch.__init__"   s/     
!
86	bb    returnc                     | j         r/| j                                         }| j        |d<   | j        |d<   |S | j        | j        dS )z
        Returns a copy of the data from :data:`EMOJI_DATA` for this match
        with the additional keys ``match_start`` and ``match_end``.
        match_start	match_end)r   r    )r   copyr   r   )r   emj_datas     r   	data_copyzEmojiMatch.data_copy1   sO    
 9 	Fy~~''H&*jH]#$(HH[!O#':DHEEEr   c                     t           | j        v S )zp
        Checks if this is a ZWJ-emoji.

        :returns: True if this is a ZWJ-emoji, False otherwise
        )_ZWJr   r   s    r   is_zwjzEmojiMatch.is_zwj>   s     tz!!r   )r   r
   c                 L    |                                  rt          |           S | S )z
        Splits a ZWJ-emoji into its constituents.

        :returns: An :class:`EmojiMatchZWJ` containing the "sub-emoji" if this is a ZWJ-emoji, otherwise self
        )r'   r   r&   s    r   splitzEmojiMatch.splitG   s'     ;;== 	 &&&Kr   c                 P    | j         j         d| j         d| j         d| j         dS N(z, :))	__class____name__r   r   r   r&   s    r   __repr__zEmojiMatch.__repr__S   s4    .)RRDJRR$*RRtxRRRRr   )r0   
__module____qualname____doc__	__slots__strintr   r   r   r   r#   boolr'   r)   r1    r   r   r
   r
      s         
 2Icc!$c+.c6;DcND<P6Qc c c cF4S> F F F F" " " " "
u:; 
 
 
 
S# S S S S S Sr   r
   c                   X     e Zd ZdZdZdef fdZdefdZde	fdZ
d
dZdefd	Z xZS )r   zr
    Represents a match of multiple emoji in a string that were joined by
    zero-width-joiners (ZWJ/``\u200D``).)emojismatchc           
         t                                          |j        |j        |j        |j                   g | _        	 |j        }|j                            t                    D ]q}t          |||t          |          z   t          j                            |d                     }| j                            |           |t          |          dz   z  }rd S )N   )superr   r   r   r   r   r;   r)   r%   r
   lenr	   
EMOJI_DATAgetappend)r   r<   iemr/   s        r   r   zEmojiMatchZWJ.__init__^   s    ek59ejIII(*5K""4(( 	 	A1aSVV]-E-I-I!T-R-RSSAKq!!!Q!OAA	 	r   r   c                 T    t                               d | j        D                       S )z1
        Joins a ZWJ-emoji into a string
        c              3   $   K   | ]}|j         V  d S r   r   .0rE   s     r   	<genexpr>z%EmojiMatchZWJ.join.<locals>.<genexpr>o   s$      66Q666666r   )r%   joinr;   r&   s    r   rM   zEmojiMatchZWJ.joinj   s'    
 yy66$+666666r   c                     dS )NTr9   r&   s    r   r'   zEmojiMatchZWJ.is_zwjq   s    tr   c                     | S r   r9   r&   s    r   r)   zEmojiMatchZWJ.splitt   s    r   c                 j    | j         j         d|                                  d| j         d| j         dS r+   )r/   r0   rM   r   r   r&   s    r   r1   zEmojiMatchZWJ.__repr__w   s:    .)SSDIIKKSS4:SSSSSSr   )r   r   )r0   r2   r3   r4   r5   r
   r   r6   rM   r8   r'   r)   r1   __classcell__)r/   s   @r   r   r   W   s        - - I
j 
 
 
 
 
 
7c 7 7 7 7       T# T T T T T T T Tr   r   c                   4    e Zd ZdZdedefdZd ZdefdZdS )	r   a  
    Represents a match of multiple emoji in a string that were joined by
    zero-width-joiners (ZWJ/``\u200D``). This class is only used for emoji
    that are not "recommended for general interchange" (non-RGI) by Unicode.org.
    The data property of this class is always None.
    first_emoji_matchsecond_emoji_matchc                 B    ||g| _         	 |                                  d S r   )r;   _update)r   rS   rT   s      r   r   zEmojiMatchZWJNonRGI.__init__   s#    (*<=5r   c                     t                               d | j        D                       | _        | j        d         j        | _        | j        d         j        | _        d | _        d S )Nc              3   $   K   | ]}|j         V  d S r   rI   rJ   s     r   rL   z.EmojiMatchZWJNonRGI._update.<locals>.<genexpr>   s$      <<1qw<<<<<<r   r   )r%   rM   r;   r   r   r   r   r&   s    r   rV   zEmojiMatchZWJNonRGI._update   sQ    YY<<<<<<<
[^)
;r?&			r   next_emoji_matchc                 b    | j                             |           |                                  d S r   )r;   rC   rV   )r   rZ   s     r   _addzEmojiMatchZWJNonRGI._add   s*    +,,,r   N)r0   r2   r3   r4   r
   r   rV   r\   r9   r   r   r   r   {   sj         * *      Z      r   r   c                   8    e Zd ZU dZeed<   eeef         ed<   dS )r   z
    A named tuple containing the matched string and its :class:`EmojiMatch` object if it is an emoji
    or a single character that is not a unicode emoji.
    charsvalueN)r0   r2   r3   r4   r6   __annotations__r   r
   r9   r   r   r   r      s<          
 JJJj!!!!!!r   r   stringkeep_zwjr   c              #     K   t                      }t          j        }g }d}t          |           }g }||k     r d}| |         }	||v r6|dz  }|	t          k    r%|r#|                    t          |	|	                     K|	|v r|dz   }
||	         }|
|k     r2| |
         |v r(|
|v rn#|| |
                  }|
dz  }
|
|k     r
| |
         |v (d|v rN|d         }| ||
         }t          |||
|          }|
dz
  }d}|                    t          ||                     n|	t          k    r|r|d         j        |v r|dk    r| |dz
           |v r|                    |           ||d         j                 d         t          j	        d         k    rD|t          d	 |d
d         D                       z
  }| |         t          k    r	|dz  }|d= n&|d
d= n |t          |d         j                  z
  }|d= |r
|E d{V  g }|s/|	dk    r)|	dk    r#|                    t          |	|	                     |dz  }||k      |E d{V  dS )a  
    Finds unicode emoji in a string. Yields all normal characters as a named
    tuple :class:`Token` ``(char, char)`` and all emoji as :class:`Token` ``(chars, EmojiMatch)``.

    :param string: String contains unicode characters. MUST BE UNICODE.
    :param keep_zwj: Should ZWJ-characters (``\u200D``) that join non-RGI emoji be
        skipped or should be yielded as normal characters
    :return: An iterable of tuples :class:`Token` ``(char, char)`` or :class:`Token` ``(chars, EmojiMatch)``
    r   Fr>   r   TrY   status	componentc              3   >   K   | ]}t          |j                  V  d S r   )r@   r^   )rK   ts     r   rL   ztokenize.<locals>.<genexpr>   s*      >>QCLL>>>>>>r   Nu   ︎u   ️)get_search_treer	   rA   r@   r%   rC   r   r
   r^   STATUSsum)ra   rb   treerA   resultrD   lengthignoreconsumedcharjsub_treer"   code_points	match_objs                  r   r   r      s      D)JF	A[[F 	  f**ay;;FAt|||eD$//000T\\AADzHf**h!6!6;;#F1I.Q	 f**h!6!6
 !!#F+$QqSk
 '{Aq(CC	EeK;;<<< DLL r
 J..Aq1u%% MM!6":+,X6 '45 5
 >>&+>>>>>>!9$$FAr

 rss F2J,---2J 	F 	-DH,,1A1AMM%d++,,,	QE f**H r   matches
emoji_only
join_emojic           
   #   \  K   |s|s
| E d{V  dS |s| D ]}|j         t          k    r|V  dS d}d}d}g }| D ]j}|}|r|j        t          k    rd}t          |j        t                    r!|r|r t          |d         j        t
                    r_|d         j                            |j                   t          |d         j         t          z   |j         z   |d         j                  |d<   n|                                }t          |j        t                    sJ |	                    t          |j         t          z   |j         z   t          |j        |j                                       n|	                    |           d}d}Vd}d}|E d{V  |s|V  g }l|E d{V  dS )ap  
    Filters the output of `tokenize()`

    :param matches: An iterable of tuples of the form ``(match_str, result)``
        where ``result`` is either an EmojiMatch or a string.
    :param emoji_only: If True, only EmojiMatch are returned in the output.
        If False all characters are returned
    :param join_emoji: If True, multiple EmojiMatch are merged into
        a single :class:`EmojiMatchZWJNonRGI` if they are separated only by a ZWJ.

    :return: An iterable of tuples :class:`Token` ``(char, char)``,
        :class:`Token` ``(chars, EmojiMatch)`` or :class:`Token` ``(chars, EmojiMatchZWJNonRGI)``
    NFTrY   )
r^   r%   r_   
isinstancer
   r   r\   r   poprC   )	rv   rw   rx   tokenprevious_is_emojiprevious_is_zwjpre_previous_is_emojiaccumulatorprevs	            r   r   r      s1     "  j   	 	E{d"" O!!K     1 	!4!4"OOZ00 	$ * *k"o35HII O)..u{;;;&+#B-4u{B#B-' 'KOO
 '??,,D%dj*=====&& J-;/
EKHH     ""5))) $#OO !&#O"""""""" KKr   c                      t           sit          j        D ]\} t           }t          |           dz
  }t	          |           D ]1\  }}||vri ||<   ||         }||k    rt          j        |          |d<   2]t           S )u  
    Generate a search tree for demojize().
    Example of a search tree::

        EMOJI_DATA =
        {'a': {'en': ':Apple:'},
        'b': {'en': ':Bus:'},
        'ba': {'en': ':Bat:'},
        'band': {'en': ':Beatles:'},
        'bandit': {'en': ':Outlaw:'},
        'bank': {'en': ':BankOfEngland:'},
        'bb': {'en': ':BB-gun:'},
        'c': {'en': ':Car:'}}

        _SEARCH_TREE =
        {'a': {'data': {'en': ':Apple:'}},
        'b': {'a': {'data': {'en': ':Bat:'},
                    'n': {'d': {'data': {'en': ':Beatles:'},
                                'i': {'t': {'data': {'en': ':Outlaw:'}}}},
                        'k': {'data': {'en': ':BankOfEngland:'}}}},
            'b': {'data': {'en': ':BB-gun:'}},
            'data': {'en': ':Bus:'}},
        'c': {'data': {'en': ':Car:'}}}

                   _SEARCH_TREE
                 /     |        ⧵
               /       |          ⧵
            a          b             c
            |        / |  ⧵          |
            |       /  |    ⧵        |
        :Apple:   ba  :Bus:  bb     :Car:
                 /  ⧵         |
                /    ⧵        |
              :Bat:    ban     :BB-gun:
                     /     ⧵
                    /       ⧵
                 band       bank
                /   ⧵         |
               /     ⧵        |
            bandi :Beatles:  :BankOfEngland:
               |
            bandit
               |
           :Outlaw:


    r>   r   )r   r	   rA   r@   	enumerate)emjrs   lastidxrD   rq   s        r   ri   ri   >  s    `  	E + 	E 	EC#H#hhlG$S>> E E4x''%'HTN#D><<'4'?'DHV$E r   N)r4   typingr   r   r   r   r   r   r   r	   __all__r%   r   r6   r`   r
   r   r   r   r8   r   r   ri   r9   r   r   <module>r      s     @ ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?         !d38n ! ! !:S :S :S :S :S :S :S :Sz!T !T !T !T !TJ !T !T !TH    -   2" " " " "J " " "XS XD XXe_ X X X XvBe_B*.B<@Be_B B B BJ:c3h : : : : : :r   