
    Ng                     ~    d Z ddlmZmZ ddlmZmZmZ ddlm	Z	 ddl
mZ  G d de          Z G d d	e          Zd
S )z
Tokenizer Interface
    )ABCabstractmethod)IteratorListTuple)
overridden)string_span_tokenizec                       e Zd ZdZededee         fd            Zdedee	e
e
f                  fdZdee         deee                  fdZdee         deee	e
e
f                           fdZd	S )

TokenizerIz
    A processing interface for tokenizing a string.
    Subclasses must define ``tokenize()`` or ``tokenize_sents()`` (or both).
    sreturnc                 f    t          | j                  r|                     |g          d         S dS )zL
        Return a tokenized copy of *s*.

        :rtype: List[str]
        r   N)r   tokenize_sentsselfr   s     M/var/www/html/ai-engine/env/lib/python3.11/site-packages/nltk/tokenize/api.pytokenizezTokenizerI.tokenize   s<     d)** 	/&&s++A..	/ 	/    c                     t                      )z
        Identify the tokens using integer offsets ``(start_i, end_i)``,
        where ``s[start_i:end_i]`` is the corresponding token.

        :rtype: Iterator[Tuple[int, int]]
        NotImplementedErrorr   s     r   span_tokenizezTokenizerI.span_tokenize$   s     "###r   stringsc                        fd|D             S )z
        Apply ``self.tokenize()`` to each element of ``strings``.  I.e.:

            return [self.tokenize(s) for s in strings]

        :rtype: List[List[str]]
        c                 :    g | ]}                     |          S  )r   ).0r   r   s     r   
<listcomp>z-TokenizerI.tokenize_sents.<locals>.<listcomp>5   s%    222Qa  222r   r   )r   r   s   ` r   r   zTokenizerI.tokenize_sents-   s     3222'2222r   c              #   \   K   |D ]&}t          |                     |                    V  'dS )z
        Apply ``self.span_tokenize()`` to each element of ``strings``.  I.e.:

            return [self.span_tokenize(s) for s in strings]

        :yield: List[Tuple[int, int]]
        N)listr   )r   r   r   s      r   span_tokenize_sentszTokenizerI.span_tokenize_sents7   sF        	. 	.At))!,,------	. 	.r   N)__name__
__module____qualname____doc__r   strr   r   r   r   intr   r   r!   r   r   r   r   r      s         
 /# /$s) / / / ^/$s $xc3h'@ $ $ $ $3d3i 3DcO 3 3 3 3.Cy.	$uS#X'	(. . . . . .r   r   c                   D    e Zd ZdZeed                         Zd Zd ZdS )StringTokenizerzxA tokenizer that divides a string into substrings by splitting
    on the specified string (defined in subclasses).
    c                     t           Nr   )r   s    r   _stringzStringTokenizer._stringJ   s
     "!r   c                 6    |                     | j                  S r+   )splitr,   r   s     r   r   zStringTokenizer.tokenizeO   s    wwt|$$$r   c              #   @   K   t          || j                  E d {V  d S r+   )r	   r,   r   s     r   r   zStringTokenizer.span_tokenizeR   s0      '4<88888888888r   N)	r"   r#   r$   r%   propertyr   r,   r   r   r   r   r   r)   r)   E   sa          " " ^ X"% % %9 9 9 9 9r   r)   N)r%   abcr   r   typingr   r   r   nltk.internalsr   nltk.tokenize.utilr	   r   r)   r   r   r   <module>r5      s     $ # # # # # # # ( ( ( ( ( ( ( ( ( ( % % % % % % 3 3 3 3 3 3.. .. .. .. .. .. .. ..b9 9 9 9 9j 9 9 9 9 9r   