
    Ng(                         d Z ddlmZ ddlmZ ddlmZmZmZm	Z	  G d de          Z
d Zedk    r e             d	S d	S )
a  
A classifier based on the Naive Bayes algorithm.  In order to find the
probability for a label, this algorithm first uses the Bayes rule to
express P(label|features) in terms of P(label) and P(features|label):

|                       P(label) * P(features|label)
|  P(label|features) = ------------------------------
|                              P(features)

The algorithm then makes the 'naive' assumption that all features are
independent, given the label:

|                       P(label) * P(f1|label) * ... * P(fn|label)
|  P(label|features) = --------------------------------------------
|                                         P(features)

Rather than computing P(features) explicitly, the algorithm just
calculates the numerator for each label, and normalizes them so they
sum to one:

|                       P(label) * P(f1|label) * ... * P(fn|label)
|  P(label|features) = --------------------------------------------
|                        SUM[l]( P(l) * P(f1|l) * ... * P(fn|l) )
    )defaultdict)ClassifierI)DictionaryProbDistELEProbDistFreqDistsum_logsc                   T    e Zd ZdZd Zd Zd Zd ZddZdd	Z	e
efd
            ZdS )NaiveBayesClassifiera  
    A Naive Bayes classifier.  Naive Bayes classifiers are
    paramaterized by two probability distributions:

      - P(label) gives the probability that an input will receive each
        label, given no information about the input's features.

      - P(fname=fval|label) gives the probability that a given feature
        (fname) will receive a given value (fval), given that the
        label (label).

    If the classifier encounters an input with a feature that has
    never been seen with any label, then rather than assigning a
    probability of 0 to all labels, it will ignore that feature.

    The feature value 'None' is reserved for unseen feature values;
    you generally should not use 'None' as a feature value for one of
    your own features.
    c                 n    || _         || _        t          |                                          | _        dS )a=  
        :param label_probdist: P(label), the probability distribution
            over labels.  It is expressed as a ``ProbDistI`` whose
            samples are labels.  I.e., P(label) =
            ``label_probdist.prob(label)``.

        :param feature_probdist: P(fname=fval|label), the probability
            distribution for feature values, given labels.  It is
            expressed as a dictionary whose keys are ``(label, fname)``
            pairs and whose values are ``ProbDistI`` objects over feature
            values.  I.e., P(fname=fval|label) =
            ``feature_probdist[label,fname].prob(fval)``.  If a given
            ``(label,fname)`` is not a key in ``feature_probdist``, then
            it is assumed that the corresponding P(fname=fval|label)
            is 0 for all values of ``fval``.
        N)_label_probdist_feature_probdistlistsamples_labels)selflabel_probdistfeature_probdists      T/var/www/html/ai-engine/env/lib/python3.11/site-packages/nltk/classify/naivebayes.py__init__zNaiveBayesClassifier.__init__@   s3    "  .!1N224455    c                     | j         S N)r   )r   s    r   labelszNaiveBayesClassifier.labelsU   s
    |r   c                 P    |                      |                                          S r   )prob_classifymax)r   
featuresets     r   classifyzNaiveBayesClassifier.classifyX   s"    !!*--11333r   c                    |                                 }t          |                                          D ]}| j        D ]}||f| j        v r n||= i }| j        D ]}| j                            |          ||<    | j        D ]w}|                                D ]`\  }}||f| j        v r3| j        ||f         }||xx         |                    |          z  cc<   C||xx         t          g           z  cc<   axt          |dd          S )NT)	normalizelog)
copyr   keysr   r   r   logprobitemsr   r   )r   r   fnamelabelr$   fvalfeature_probss          r   r   z"NaiveBayesClassifier.prob_classify[   sY     __&&
*//++,, 	& 	&E & &5>T%;;;E < u% \ 	A 	AE!199%@@GENN \ 		3 		3E)//11 3 3t5>T%;;;$($:5%<$HMENNNm&;&;D&A&AANNNN
 ENNNhrll2NNNN3 "'TtDDDDr   
   c                 J  	 | j         t          d           |                     |          D ]\  fd	t          fd| j        D             	fdd          }t          |          dk    rH|d         }|d	         }|f                                       dk    rd
}n>d|f                                       |f                                       z  z  }t          ddddd|z  d d         ddd|z  d d         dd|d
           d S )NzMost Informative Featuresc                 >    | f                                        S r   )prob)lcpdistr&   r(   s    r   	labelprobzFNaiveBayesClassifier.show_most_informative_features.<locals>.labelprob   s     ah',,T222r   c              3   X   K   | ]$}|f                                          v  |V  %d S r   )r   ).0r.   r/   r&   r(   s     r   	<genexpr>zFNaiveBayesClassifier.show_most_informative_features.<locals>.<genexpr>   sB      OOqDF1e84D4L4L4N4N,N,N,N,N,N,NOOr   c                       |            | fS r    )elementr0   s    r   <lambda>zENaiveBayesClassifier.show_most_informative_features.<locals>.<lambda>   s    ii&8&8%8'$B r   T)keyreverse   r   INFz%8.1fz>24z = 14 z%s   z>6z : 6z : 1.0)r   printmost_informative_featuressortedr   lenr-   )
r   nr   l0l1ratior/   r&   r(   r0   s
         @@@@r   show_most_informative_featuresz3NaiveBayesClassifier.show_most_informative_features|   s   ')***99!<< 	 	KE43 3 3 3 3 3 3 OOOOOODLOOOBBBB  F
 6{{aBBb%i %%d++q002u9%**4006"e)3D3I3I$3O3OO 555$$$$BQB$)RaR%%%I   )	 	r   d   c                 n  	
 t          | d          r| j        d|         S t                      }t          t                    	t          d           
| j                                        D ]\  \  }}}|                                D ]}||f}|                    |           |	                    |          }t          |	|                   	|<   t          |
|                   
|<   
|         dk    r|                    |           t          |	
fd          | _        | j        d|         S )a  
        Return a list of the 'most informative' features used by this
        classifier.  For the purpose of this function, the
        informativeness of a feature ``(fname,fval)`` is equal to the
        highest value of P(fname=fval|label), for any label, divided by
        the lowest value of P(fname=fval|label), for any label:

        |  max[ P(fname=fval|label1) / P(fname=fval|label2) ]
        _most_informative_featuresNc                      dS )Ng      ?r5   r5   r   r   r7   z@NaiveBayesClassifier.most_informative_features.<locals>.<lambda>   s    # r   r   c                     |          |          z  | d         | d         dv t          | d                                                   fS )Nr   r:   )NFT)strlower)feature_maxprobminprobs    r   r7   z@NaiveBayesClassifier.most_informative_features.<locals>.<lambda>   sK    H%(99QKQK#66$$**,,	& r   )r8   )hasattrrL   setr   floatr   r%   r   addr-   r   mindiscardrC   )r   rE   featuresr'   r&   probdistr(   featureprR   rS   s            @@r   rB   z.NaiveBayesClassifier.most_informative_features   sg    4566 	22A266 uuH "%((G!++..G,0,B,H,H,J,J 2 2($,,.. 2 2D$dmGLL))) d++A'*1gg.>'?'?GG$'*1gg.>'?'?GG$w'1,, ((1112 /5    / / /D+ .rr22r   c                    t                      }t          t                     }t          t                    }t                      }|D ]w\  }}||xx         dz  cc<   |                                D ]M\  }	}
|||	f         |
xx         dz  cc<   ||	                             |
           |                    |	           Nx|D ]j}||         }|D ]]}	|||	f                                         }||z
  dk    r6|||	f         dxx         ||z
  z  cc<   ||	                             d           ^k ||          }i }|                                D ]/\  \  }}	} ||t          ||	                             }||||	f<   0 | ||          S )z
        :param labeled_featuresets: A list of classified featuresets,
            i.e., a list of tuples ``(featureset, label)``.
        r:   r   N)bins)r   r   rU   r%   rW   NrD   )clslabeled_featuresets	estimatorlabel_freqdistfeature_freqdistfeature_valuesfnamesr   r'   r&   r(   num_samplescountr   r   freqdistr[   s                    r   trainzNaiveBayesClassifier.train   s    "&x00$S)) "5 	" 	"J5!!!Q&!!!)//11 " "t .t4449444u%))$///

5!!!!" $ 	4 	4E(/K 4 4(688:: &**$UE\24888K%<OO888"5)--d3334 #>22 (8(>(>(@(@ 	6 	6$NUEH yN54I0J0JKKKH-5UE\**s>#3444r   N)r*   )rJ   )__name__
__module____qualname____doc__r   r   r   r   rI   rB   classmethodr   rk   r5   r   r   r
   r
   +   s         (6 6 6*  4 4 4E E EB   <)3 )3 )3 )3V 2= .5 .5 .5 [.5 .5 .5r   r
   c                  d    ddl m}   | t          j                  }|                                 d S )Nr   )
names_demo)nltk.classify.utilrr   r
   rk   rI   )rr   
classifiers     r   demoru      s?    ------0677J--/////r   __main__N)ro   collectionsr   nltk.classify.apir   nltk.probabilityr   r   r   r   r
   ru   rl   r5   r   r   <module>rz      s    2 $ # # # # # ) ) ) ) ) ) P P P P P P P P P P P PI5 I5 I5 I5 I5; I5 I5 I5b0 0 0 zDFFFFF r   