
    Ng                     v    d dl mZ d dlmZ d dlmZmZ ej        Z eedddd          Z	de	_
        d	 Zd
 ZdS )    )partial)chain)
everygramspad_sequenceTz<s>z</s>)pad_leftleft_pad_symbol	pad_rightright_pad_symbolzPads both ends of a sentence to length specified by ngram order.

    Following convention <s> pads the start of sentence </s> pads its end.
    c                 \    t          t          t          ||                     |           S )zpHelper with some useful defaults.

    Applies pad_both_ends to sentence and follows it up with everygrams.
    nmax_len)r   listpad_both_ends)ordersentences     Q/var/www/html/ai-engine/env/lib/python3.11/site-packages/nltk/lm/preprocessing.pypadded_everygramsr      s,    
 d=U;;;<<eLLLL    c                      t          t                      fd|D             t          t          |                    fS )a  Default preprocessing for a sequence of sentences.

    Creates two iterators:

    - sentences padded and turned into sequences of `nltk.util.everygrams`
    - sentences padded as above and chained together for a flat stream of words

    :param order: Largest ngram length produced by `everygrams`.
    :param text: Text to iterate over. Expected to be an iterable of sentences.
    :type text: Iterable[Iterable[str]]
    :return: iterator over text as ngrams, iterator over text as vocabulary data
    r   c              3   f   K   | ]+}t          t           |                               V  ,dS )r   N)r   r   ).0sentr   
padding_fns     r   	<genexpr>z,padded_everygram_pipeline.<locals>.<genexpr>1   sB      LLtDD))**E	:	:	:LLLLLLr   )r   r   flattenmap)r   textr   s   ` @r   padded_everygram_pipeliner    "   sQ     %000JLLLLLtLLLJ%%&& r   N)	functoolsr   	itertoolsr   	nltk.utilr   r   from_iterabler   r   __doc__r   r     r   r   <module>r'      s                . . . . . . . .

   M M M    r   