
    NgP/                         d Z ddlZddlZddlmZ ddZd Zd Zd Z	 G d d	          Z
d
 Zd ZefdZefdZi addZd ZdS )z0
Utility functions and classes for classifiers.
    N)LazyMapc                      |$|o!t          |d         t          t          f          }|r fd}t          ||          S t           |          S )a  
    Use the ``LazyMap`` class to construct a lazy list-like
    object that is analogous to ``map(feature_func, toks)``.  In
    particular, if ``labeled=False``, then the returned list-like
    object's values are equal to::

        [feature_func(tok) for tok in toks]

    If ``labeled=True``, then the returned list-like object's values
    are equal to::

        [(feature_func(tok), label) for (tok, label) in toks]

    The primary purpose of this function is to avoid the memory
    overhead involved in storing all the featuresets for every token
    in a corpus.  Instead, these featuresets are constructed lazily,
    as-needed.  The reduction in memory overhead can be especially
    significant when the underlying list of tokens is itself lazy (as
    is the case with many corpus readers).

    :param feature_func: The function that will be applied to each
        token.  It should return a featureset -- i.e., a dict
        mapping feature names to feature values.
    :param toks: The list of tokens to which ``feature_func`` should be
        applied.  If ``labeled=True``, then the list elements will be
        passed directly to ``feature_func()``.  If ``labeled=False``,
        then the list elements should be tuples ``(tok,label)``, and
        ``tok`` will be passed to ``feature_func()``.
    :param labeled: If true, then ``toks`` contains labeled tokens --
        i.e., tuples of the form ``(tok, label)``.  (Default:
        auto-detect based on types.)
    Nr   c                 6     | d                   | d         fS )Nr       )labeled_tokenfeature_funcs    N/var/www/html/ai-engine/env/lib/python3.11/site-packages/nltk/classify/util.py	lazy_funcz!apply_features.<locals>.lazy_funcA   s"     Lq!122M!4DEE    )
isinstancetuplelistr   )r	   tokslabeledr   s   `   r
   apply_featuresr      so    B =:d1gt}== +	F 	F 	F 	F 	F y$'''|T***r   c                 4    t          d | D                       S )a!  
    :return: A list of all labels that are attested in the given list
        of tokens.
    :rtype: list of (immutable)
    :param tokens: The list of classified tokens from which to extract
        labels.  A classified token has the form ``(token, label)``.
    :type tokens: list
    c                     h | ]\  }}|S r   r   ).0toklabels      r
   	<setcomp>z"attested_labels.<locals>.<setcomp>R   s    333LS%%333r   )r   )tokenss    r
   attested_labelsr   I   s!     33F333444r   c                     |                      d |D                       }d t          ||          D             }t          j        t	          |          t          |          z            S )Nc                     g | ]\  }}|S r   r   r   fsls      r
   
<listcomp>z"log_likelihood.<locals>.<listcomp>V   s    ,D,D,DGRR,D,D,Dr   c                 D    g | ]\  \  }}}|                     |          S r   )prob)r   r   r   pdists       r
   r    z"log_likelihood.<locals>.<listcomp>W   s+    	C	C	C,Wb!e%**Q--	C	C	Cr   )prob_classify_manyzipmathlogsumlen)
classifiergoldresultslls       r
   log_likelihoodr.   U   sb    ++,D,Dt,D,D,DEEG	C	CD'0B0B	C	C	CB8CGGc"gg%&&&r   c                     |                      d |D                       }d t          ||          D             }|rt          |          t          |          z  S dS )Nc                     g | ]\  }}|S r   r   r   s      r
   r    zaccuracy.<locals>.<listcomp>\   s    '?'?'?wA'?'?'?r   c                 &    g | ]\  \  }}}||k    S r   r   )r   r   r   rs       r
   r    zaccuracy.<locals>.<listcomp>]   s$    ===,7BAqAv===r   r   )classify_manyr%   r(   r)   )r*   r+   r,   corrects       r
   accuracyr5   [   se    &&'?'?$'?'?'?@@G==#dG*<*<===G 7||c'll**qr   c                       e Zd ZdZd Zd ZdS )CutoffCheckerz
    A helper class that implements cutoff checks based on number of
    iterations and log likelihood.

    Accuracy cutoffs are also implemented, but they're almost never
    a good idea to use.
    c                     |                                 | _        d|v rt          |d                    |d<   d|v rt          |d                   |d<   d | _        d | _        d| _        d S )Nmin_llmin_lldeltar   )copycutoffsabsr-   acciter)selfr<   s     r
   __init__zCutoffChecker.__init__m   sq    ||~~w!$WX%6!7!7 7GHG##%()?%@%@GM"			r   c                 b   | j         }| xj        dz  c_        d|v r| j        |d         k    rdS t          j        j                            ||          }t          j        |          rdS d|v sd|v rGd|v r||d         k    rdS d|v r*| j        r#|| j        z
  t          |d                   k    rdS || _        d|v sd|v rnt          j        j                            ||          }d|v r||d         k    rdS d|v r*| j
        r#|| j
        z
  t          |d                   k    rdS || _
        dS d S )	Nr   max_iterTr9   r:   max_accmin_accdeltaF)r<   r?   nltkclassifyutilr.   r&   isnanr-   r=   r>   )r@   r*   
train_toksr<   new_llnew_accs         r
   checkzCutoffChecker.checkw   sy   ,		Q		  TY'*2E%E%E4#22:zJJ:f 	4w-7":":7""v1B'B'Bt((G )tw&3w}/E+F+FFFtDG>W#<#<m(77
JOOGG##793E(E(Et'))H *(S1H-I-IIItDH5 $=#<r   N)__name__
__module____qualname____doc__rA   rM   r   r   r
   r7   r7   d   s<           ! ! ! ! !r   r7   c                 $   i }d|d<   | d                                          |d<   | d                                          |d<   dD ]K}|                                                      |          |d|z  <   ||                                  v |d	|z  <   L|S )
NTalwaysonr   
startswithendswithabcdefghijklmnopqrstuvwxyz	count(%s)has(%s)lowercountnamefeaturesletters      r
   names_demo_featuresra      s    HHZ!!W]]__H\8>>++HZ. > >)-););F)C)Cv%&'-'=V#$$Or   c                    i }d|d<   | d                                          dv |d<   | d                                          dv |d<   dD ]}|                                                      |          |d	|z  <   ||                                  v |d
|z  <   || d                                          k    |d|z  <   || d                                          k    |d|z  <   |S )NTrS   r   aeiouyzstartswith(vowel)rU   zendswith(vowel)rW   rX   rY   zstartswith(%s)zendswith(%s)rZ   r]   s      r
   binary_names_demo_featuresrd      s    HHZ$(GMMOOx$?H !"&r(.."2"2h">H. G G)-););F)C)Cv%&'-'=V#$.4Q.G!F*+,2d2hnn6F6F,F&())Or   c           	         dd l }ddlm} d |                    d          D             d |                    d          D             z   }|                    d           |                    |           |d d         }|dd	         }t          d
            | fd|D                       }t          d           t          |fd|D                       }t          d|z             	 fd|D             }	|                    |	          }
d t          ||
          D             }t          dt          |          t          |          z  z             t                       t          d           t          t          ||
                    d d         D ]N\  \  }}}|dk    rd}nd}t          |||                    d          |                    d          fz             On# t          $ r Y nw xY w|S )Nr   namesc                     g | ]}|d fS )maler   r   r^   s     r
   r    znames_demo.<locals>.<listcomp>   s    CCC4vCCCr   male.txtc                     g | ]}|d fS )femaler   rj   s     r
   r    znames_demo.<locals>.<listcomp>   s0     G G G!xG G Gr   
female.txt@ i  i|  Training classifier...c                 0    g | ]\  }} |          |fS r   r   r   ngr_   s      r
   r    znames_demo.<locals>.<listcomp>   *    ???v188A;;*???r   Testing classifier...c                 0    g | ]\  }} |          |fS r   r   rr   s      r
   r    znames_demo.<locals>.<listcomp>   *    DDDVa!a 0DDDr   Accuracy: %6.4fc                 ,    g | ]\  }} |          S r   r   rr   s      r
   r    znames_demo.<locals>.<listcomp>   %    ;;;FQHHQKK;;;r   c                 D    g | ]\  \  }}}|                     |          S r   logprobr   r^   r+   r#   s       r
   r    znames_demo.<locals>.<listcomp>   -    QQQ&;|dUemmD!!QQQr   Avg. log likelihood: %6.4fMUnseen Names      P(Male)  P(Female)
----------------------------------------   ri     %-15s *%6.4f   %6.4f  %-15s  %6.4f  *%6.4frm   )randomnltk.corpusrg   wordsseedshuffleprintr5   r$   r%   r(   r)   r   r"   NotImplementedError)trainerr_   r   rg   namelisttraintestr*   r>   test_featuresetspdistsr-   r^   genderr#   fmts    `              r
   
names_demor      so   MMM!!!!!! DC5;;z+B+BCCC G G%*[[%>%>G G G H
 KK
NN8UdUOEDID 

"###???????@@J 

!"""
:DDDDtDDD
E
EC	
c
!""";;;;d;;;../?@@QQs4?P?PQQQ*c"ggD		.ABCCCABBB%)#dF*;*;%<%<RaR%@ 	J 	J!NT6E..#uzz&115::h3G3GHHIIII	J     s   -C=G+ +
G87G8c           	         dd l }ddlm} |                    d          }|                    d          }|                    d           |                    |           |                    |           t          |d d                   }t          |dd         |d d         z             }d	 |dd
         D             d |dd         D             z   }|                    |           t          d            | ||          }	t          d           t          |	fd|D                       }
t          d|
z             	 fd|D             }|		                    |          }d t          ||          D             }t          dt          |          t          |          z  z             t                       t          d           t          ||          d d         D ]N\  \  }}}|dk    rd}nd}t          |||                    d          |                    d          fz             On# t          $ r Y nw xY w|	S )Nr   rf   rk   rn   i	 i  i	  i  c                     g | ]}|d fS )Tr   rj   s     r
   r    z&partial_names_demo.<locals>.<listcomp>   s    ;;;TT4L;;;r   i
  c                     g | ]}|d fS )Fr   rj   s     r
   r    z&partial_names_demo.<locals>.<listcomp>   s)     ? ? ?u? ? ?r   i  rp   rv   c                 0    g | ]\  }} |          |fS r   r   r   rs   mr_   s      r
   r    z&partial_names_demo.<locals>.<listcomp>  rx   r   ry   c                 ,    g | ]\  }} |          S r   r   r   s      r
   r    z&partial_names_demo.<locals>.<listcomp>  r{   r   c                 D    g | ]\  \  }}}|                     |          S r   r}   r   s       r
   r    z&partial_names_demo.<locals>.<listcomp>  r   r   r   r   r   Tr   r   F)r   r   rg   r   r   r   mapr   r5   r$   r%   r(   r)   r"   r   )r   r_   r   rg   
male_namesfemale_namespositive	unlabeledr   r*   r>   r   r   r-   r^   is_maler#   r   s    `                r
   partial_names_demor      s   MMM!!!!!!Z((J;;|,,L
KK
NN:
NN<    8Z.//H Hjd3l4C46HHIII <;ZT	%:;;; ? ?".s3w"7? ? ? D NN4 

"###9--J 

!"""
:DDDDtDDD
E
EC	
c
!""";;;;d;;;../?@@QQs4?P?PQQQ*c"ggD		.ABCCCABBB&)$&7&7&; 	E 	E"OT7U$..#uzz$//E1B1BCCDDDD	E     s   C0H8 8
II  c                     dd l }ddlm} t          d           |t          vr'd |                    |          D             t          |<   t          |         d d          }|t          |          k    rt          |          }t          d |D                       }t          dd                    |          z              t          d           |	                    d	           |
                    |           |d t          d
|z                     }|t          d
|z            |         }	t          d            | fd|D                       }
t          d           t          |
fd|	D                       }t          d|z             	 fd|	D             }|
                    |          }d t          |	|          D             }t          dt          |          t          |	          z  z             n# t           $ r Y nw xY w|
S )Nr   )sensevalzReading data...c                 ,    g | ]}||j         d          fS )r   )senses)r   is     r
   r    zwsd_demo.<locals>.<listcomp>+  s#    PPP!a!-PPPr   c                     h | ]\  }}|S r   r   )r   r   r   s      r
   r   zwsd_demo.<locals>.<setcomp>/  s    ---!Q1---r   z
  Senses:  zSplitting into test & train...ro   g?rp   c                 0    g | ]\  }} |          |fS r   r   r   r   r   r_   s      r
   r    zwsd_demo.<locals>.<listcomp>;  ru   r   rv   c                 0    g | ]\  }} |          |fS r   r   r   s      r
   r    zwsd_demo.<locals>.<listcomp>?  rx   r   ry   c                 ,    g | ]\  }} |          S r   r   )r   r   rs   r_   s      r
   r    zwsd_demo.<locals>.<listcomp>E  r{   r   c                 D    g | ]\  \  }}}|                     |          S r   r}   r   s       r
   r    zwsd_demo.<locals>.<listcomp>G  r   r   r   )r   r   r   r   _inst_cache	instancesr)   r   joinr   r   intr5   r$   r%   r(   r   )r   wordr_   rs   r   r   r   r   r   r   r*   r>   r   r   r-   s     `            r
   wsd_demor   "  sN   MMM$$$$$$ 

;PPx7I7I$7O7OPPPDD!!!!$I3y>>	NN--9---..F	,&))
)*** 

*+++
KK
NN9nC!Gn%ESq\\A%&D 

"###???????@@J 

!"""
:DDDDtDDD
E
EC	
c
!""";;;;d;;;../?@@QQs4?P?PQQQ*c"ggD		.ABCCCC    s   A,G> >
H
Hc                  x    	 t            dS # t          $ r$} t          d          }t          |          | d} ~ ww xY w)z8
    Checks whether the MEGAM binary is configured.
    z\Please configure your megam binary first, e.g.
>>> nltk.config_megam('/usr/bin/local/megam')N)
_megam_bin	NameErrorstr)eerr_msgs     r
   check_megam_configr   P  sU    (


 ( ( (<
 
   a'(s    
949)N)r   )rQ   r&   nltk.classify.utilrF   	nltk.utilr   r   r   r.   r5   r7   ra   rd   r   r   r   r   r   r   r   r
   <module>r      s,              *+ *+ *+ *+Z	5 	5 	5' ' '  4 4 4 4 4 4 4 4x  
 
 
 "5 , , , ,^ *= 5 5 5 5p + + + +\( ( ( ( (r   