
    Ngw
                    .   U d dl mZ d dlZd dlmZ d dlmZmZmZ d dl	Z	d dl	m
Z d dl	mZ d dl	mZ dZd	ed
<   d ZddZ e            d             Z ee          dd            Z ee          dd            Z ee          dd            Z
dS )    )annotationsN)	lru_cache)FinalListTuple)pos_tag)sent_tokenize)word_tokenize   z
Final[int]CACHE_MAX_SIZEc                 ^    t          j        dd           t          j        dd           d S )Naveraged_perceptron_tagger_engT)quiet	punkt_tab)nltkdownload     U/var/www/html/ai-engine/env/lib/python3.11/site-packages/unstructured/nlp/tokenize.pydownload_nltk_packagesr      s3    M2$????M+T******r   package_namestrpackage_categoryreturnboolc                .   g }t           j        j        D ]L}|                    d          s t          j                            |d          }|                    |           M	 t          j        | d|  |           dS # t          t          f$ r Y dS w xY w)zEChecks to see if the specified NLTK package exists on the file system	nltk_data/)pathsTF)
r   datapathendswithosjoinappendfindLookupErrorOSError)r   r   r   r!   s       r   check_for_nltk_packager)      s    E	  }}[)) 	37<<k22DT	%6666eDDDDt!   uus   "A? ?BBc                 r    t          dd          } t          dd          }|r| st                       dS dS )z;If required NLTK packages are not available, download them.taggersr   )r   r   
tokenizersr   N)r)   r   )tagger_availabletokenizer_availables     r   &_download_nltk_packages_if_not_presentr/   %   si     ."5   1%K     !)9 !     ! !r   )maxsizetext	List[str]c                <    t                       t          |           S )zFA wrapper around the NLTK sentence tokenizer with LRU caching enabled.)r/   _sent_tokenizer1   s    r   r	   r	   5        +,,,$r   c                <    t                       t          |           S )zBA wrapper around the NLTK word tokenizer with LRU caching enabled.)r/   _word_tokenizer5   s    r   r
   r
   <   r6   r   List[Tuple[str, str]]c                    t                       t          |           }g }|D ]3}t          |          }|                    t	          |                     4|S )z>A wrapper around the NLTK POS tagger with LRU caching enabled.)r/   r4   r8   extend_pos_tag)r1   	sentencesparts_of_speechsentencetokenss        r   r   r   C   se     +,,, t$$I-/O 1 1))x//0000r   )r   r   r   r   r   r   )r1   r   r   r2   )r1   r   r   r9   )
__future__r   r#   	functoolsr   typingr   r   r   r   r   r<   r	   r4   r
   r8   r   __annotations__r   r)   r/   r   r   r   <module>rE      s{   " " " " " " " 				       % % % % % % % % % %  $ $ $ $ $ $ 0 0 0 0 0 0 0 0 0 0 0 0         + + +
   " ! ! ! >"""      #"  >"""      #"  >"""   #"  r   