
    Ng                          d Z ddlZddlZddlmZmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZ  G d	 d
e          Zd Zd ZddZ G d de          ZdS )zLanguage Model Interface.    N)ABCMetaabstractmethod)bisect)
accumulate)NgramCounter)	log_base2)
Vocabularyc                   D    e Zd ZdZd Zed             Zed             ZdS )	SmoothingzNgram Smoothing Interface

    Implements Chen & Goodman 1995's idea that all smoothing algorithms have
    certain features in common. This should ideally allow smoothing algorithms to
    work both with Backoff and Interpolation.
    c                 "    || _         || _        dS )z
        :param vocabulary: The Ngram vocabulary object.
        :type vocabulary: nltk.lm.vocab.Vocabulary
        :param counter: The counts of the vocabulary items.
        :type counter: nltk.lm.counter.NgramCounter
        N)vocabcounts)self
vocabularycounters      G/var/www/html/ai-engine/env/lib/python3.11/site-packages/nltk/lm/api.py__init__zSmoothing.__init__   s      
    c                     t                      NNotImplementedError)r   words     r   unigram_scorezSmoothing.unigram_score&       !###r   c                     t                      r   r   r   r   contexts      r   alpha_gammazSmoothing.alpha_gamma*   r   r   N)__name__
__module____qualname____doc__r   r   r   r    r   r   r   r      sc            $ $ ^$ $ $ ^$ $ $r   r   )	metaclassc                 @    t          |           t          |           z  S )z0Return average (aka mean) for sequence of items.)sumlen)itemss    r   _meanr*   /   s    u::E

""r   c                 b    t          | t          j                  r| S t          j        |           S r   )
isinstancerandomRandom)seed_or_generators    r   _random_generatorr0   4   s.    #V]33 !  =*+++r   c                 $   | st          d          t          |           t          |          k    rt          d          t          t          |                    }|d         }|                                }| t          |||z                     S )z`Like random.choice, but with weights.

    Heavily inspired by python 3.6 `random.choices`.
    z"Can't choose from empty populationz3The number of weights does not match the population)
ValueErrorr(   listr   r-   r   )
populationweightsrandom_generatorcum_weightstotal	thresholds         r   _weighted_choicer;   :   s    
  ?=>>>
:#g,,&&NOOOz'**++KOE ''))If[%)*;<<==r   c                   d    e Zd ZdZddZddZddZedd            ZddZ	d Z
d	 Zd
 ZddZdS )LanguageModelzKABC for Language Models.

    Cannot be directly instantiated itself.

    Nc                     || _         |r9t          |t                    s$t          j        d| j        j        dd           |t                      n|| _        |t                      n|| _	        dS )ap  Creates new LanguageModel.

        :param vocabulary: If provided, this vocabulary will be used instead
            of creating a new one when training.
        :type vocabulary: `nltk.lm.Vocabulary` or None
        :param counter: If provided, use this object to count ngrams.
        :type counter: `nltk.lm.NgramCounter` or None
        :param ngrams_fn: If given, defines how sentences in training text are turned to ngram
            sequences.
        :type ngrams_fn: function or None
        :param pad_fn: If given, defines how sentences in training text are padded.
        :type pad_fn: function or None
        z$The `vocabulary` argument passed to z- must be an instance of `nltk.lm.Vocabulary`.   )
stacklevelN)
orderr,   r	   warningswarn	__class__r    r   r   r   )r   rA   r   r   s       r   r   zLanguageModel.__init__P   s     
 	jZ@@ 	M?t~7N ? ? ?   
 &0%7Z\\\Z
(/lnnnWr   c                       j         s+|t          d           j                             |            j                             fd|D                        dS )zeTrains the model on a text.

        :param text: Training text as a sequence of sentences.

        Nz:Cannot fit without a vocabulary or text to create it from.c              3   L   K   | ]}j                             |          V  d S r   )r   lookup).0sentr   s     r   	<genexpr>z$LanguageModel.fit.<locals>.<genexpr>t   s3      DDt4:,,T22DDDDDDr   )r   r3   updater   )r   textvocabulary_texts   `  r   fitzLanguageModel.fith   ss     z 	/& P   Jo...DDDDtDDDDDDDDr   c                     |                      | j                            |          |r| j                            |          nd          S )zMasks out of vocab (OOV) words and computes their model score.

        For model-specific logic of calculating scores, see the `unmasked_score`
        method.
        N)unmasked_scorer   rG   r   s      r   scorezLanguageModel.scorev   sL     ""Jd##7%TTZ%6%6w%?%?%?PT
 
 	
r   c                     t                      )a  Score a word given some optional context.

        Concrete models are expected to provide an implementation.
        Note that this method does not mask its arguments with the OOV label.
        Use the `score` method for that.

        :param str word: Word for which we want the score
        :param tuple(str) context: Context the word is in.
            If `None`, compute unigram score.
        :param context: tuple(str) or None
        :rtype: float
        r   r   s      r   rP   zLanguageModel.unmasked_score   s     "###r   c                 H    t          |                     ||                    S )zEvaluate the log score of this word in this context.

        The arguments are the same as for `score` and `unmasked_score`.

        )r   rQ   r   s      r   logscorezLanguageModel.logscore   s      D'22333r   c                 d    |r#| j         t          |          dz            |         n| j         j        S )zHelper method for retrieving counts for a given context.

        Assumes context has been checked and oov words in it masked.
        :type context: tuple(str) or None

           )r   r(   unigrams)r   r   s     r   context_countszLanguageModel.context_counts   s2     7>WDKGq()'224;CW	
r   c                 @     dt           fd|D                       z  S )a?  Calculate cross-entropy of model for given evaluation text.

        This implementation is based on the Shannon-McMillan-Breiman theorem,
        as used and referenced by Dan Jurafsky and Jordan Boyd-Graber.

        :param Iterable(tuple(str)) text_ngrams: A sequence of ngram tuples.
        :rtype: float

        r2   c                 X    g | ]&}                     |d          |dd                    'S )r2   N)rT   )rH   ngramr   s     r   
<listcomp>z)LanguageModel.entropy.<locals>.<listcomp>   s3    KKKeT]]59eCRCj11KKKr   )r*   r   text_ngramss   ` r   entropyzLanguageModel.entropy   s5     EKKKK{KKK
 
 
 	
r   c                 H    t          d|                     |                    S )zCalculates the perplexity of the given text.

        This is simply 2 ** cross-entropy for the text, so the arguments are the same.

        g       @)powr_   r]   s     r   
perplexityzLanguageModel.perplexity   s      3[11222r   rV   c                     |g nt          |          }t          |          }|dk    rt          |           j        k    r| j         dz   d         n|                      j                                                }rR|sPt                    dk    r
dd         ng                       j                                                }r|Pt          |          }t          |t           fd|D                       |          S g }t          |          D ]0}|                                         d||z   |                     1|S )a  Generate words from the model.

        :param int num_words: How many words to generate. By default 1.
        :param text_seed: Generation can be conditioned on preceding context.
        :param random_seed: A random seed or an instance of `random.Random`. If provided,
            makes the random sampling part of generation reproducible.
        :return: One (str) word or a list of words generated from model.

        Examples:

        >>> from nltk.lm import MLE
        >>> lm = MLE(2)
        >>> lm.fit([[("a", "b"), ("b", "c")]], vocabulary_text=['a', 'b', 'c'])
        >>> lm.fit([[("a",), ("b",), ("c",)]])
        >>> lm.generate(random_seed=3)
        'a'
        >>> lm.generate(text_seed=['a'])
        'b'

        NrV   c              3   D   K   | ]}                     |          V  d S r   )rQ   )rH   wr   r   s     r   rJ   z)LanguageModel.generate.<locals>.<genexpr>   s1      >>djjG,,>>>>>>r   )	num_words	text_seedrandom_seed)r4   r0   r(   rA   rX   r   rG   sortedr;   tuplerangeappendgenerate)	r   rf   rg   rh   r7   samples	generated_r   s	   `       @r   rm   zLanguageModel.generate   s   * $+BBi	,[99>> y>>TZ// 4:+/++,, 
 ))$**;*;G*D*DEEG J' J),W)9)9'!""++r--dj.?.?.H.HII  J' J WooG#>>>>>g>>>>>    	y!! 	 	A')3 0       r   )NNr   )rV   NN)r    r!   r"   r#   r   rN   rQ   r   rP   rT   rX   r_   rb   rm   r$   r   r   r=   r=   I   s         E E E E0E E E E
 
 
 
 $ $ $ ^$4 4 4 4	
 	
 	

 
 
3 3 35 5 5 5 5 5r   r=   r   )r#   r-   rB   abcr   r   r   	itertoolsr   nltk.lm.counterr   nltk.lm.utilr   nltk.lm.vocabularyr	   r   r*   r0   r;   r=   r$   r   r   <module>rv      s6        ' ' ' ' ' ' ' '                   ( ( ( ( ( ( " " " " " " ) ) ) ) ) )$ $ $ $ $' $ $ $ $6# # #
, , ,> > > >e e e e eg e e e e e er   