
    NgB                     l    d Z ddlZddlmZ 	 ddlZn# e$ r dZY nw xY wdad
dZddZddZ	d Z
d	 ZdS )aP  
A set of functions used to interface with the external megam_ maxent
optimization package. Before megam can be used, you should tell NLTK where it
can find the megam binary, using the ``config_megam()`` function. Typical
usage:

    >>> from nltk.classify import megam
    >>> megam.config_megam() # pass path to megam if not found in PATH # doctest: +SKIP
    [Found megam: ...]

Use with MaxentClassifier. Example below, see MaxentClassifier documentation
for details.

    nltk.classify.MaxentClassifier.train(corpus, 'megam')

.. _megam: https://www.umiacs.umd.edu/~hal/megam/index.html
    N)find_binaryc                 4    t          d| dgg dd          adS )aA  
    Configure NLTK's interface to the ``megam`` maxent optimization
    package.

    :param bin: The full path to the ``megam`` binary.  If not specified,
        then nltk will search the system for a ``megam`` binary; and if
        one is not found, it will raise a ``LookupError`` exception.
    :type bin: str
    megamMEGAM)z	megam.optr   	megam_686zmegam_i686.optz0https://www.umiacs.umd.edu/~hal/megam/index.html)env_varsbinary_namesurlN)r   
_megam_bin)bins    O/var/www/html/ai-engine/env/lib/python3.11/site-packages/nltk/classify/megam.pyconfig_megamr   )   s3     JJJ>  JJJ    Tc                 8  	                                  }d t          |          D             }| D ]\  	t          d          r7|                    d                    	fd|D                                  n|                    d|	         z             |s&t                              	          ||           n?|D ]<}|                    d           t                              |          ||           =|                    d           dS )	a  
    Generate an input file for ``megam`` based on the given corpus of
    classified tokens.

    :type train_toks: list(tuple(dict, str))
    :param train_toks: Training data, represented as a list of
        pairs, the first member of which is a feature dictionary,
        and the second of which is a classification label.

    :type encoding: MaxentFeatureEncodingI
    :param encoding: A feature encoding, used to convert featuresets
        into feature vectors. May optionally implement a cost() method
        in order to assign different costs to different class predictions.

    :type stream: stream
    :param stream: The stream to which the megam input file should be
        written.

    :param bernoulli: If true, then use the 'bernoulli' format.  I.e.,
        all joint features have binary values, and are listed iff they
        are true.  Otherwise, list feature values explicitly.  If
        ``bernoulli=False``, then you must call ``megam`` with the
        ``-fvals`` option.

    :param explicit: If true, then use the 'explicit' format.  I.e.,
        list the features that would fire for any of the possible
        labels, for each token.  If ``explicit=True``, then you must
        call ``megam`` with the ``-explicit`` option.
    c                     i | ]\  }}||	S  r   ).0ilabels      r   
<dictcomp>z$write_megam_file.<locals>.<dictcomp>b   s    ===Zaq===r   cost:c              3   `   K   | ](}t                              |                    V  )d S N)strr   )r   lencoding
featuresetr   s     r   	<genexpr>z#write_megam_file.<locals>.<genexpr>i   s;      RRaX]]:ua@@AARRRRRRr   z%dz #
N)labels	enumeratehasattrwritejoin_write_megam_featuresencode)

train_toksr   stream	bernoulliexplicitr!   labelnumr   r   r   s
    `      @@r   write_megam_filer-   B   sR   > __F==9V+<+<===H (  
E8V$$ 	1LLRRRRRR6RRRRR    LL/000  	Y!(//*e"D"DfiXXXX
  Y YT"""%hooj!&D&DfiXXXX 	T- r   c                 `   t           t          d          |s
J d            |                                                     d          }t          j        |d          }|D ]L}|                                r6|                                \  }}t          |          |t          |          <   M|S )z
    Given the stdout output generated by ``megam`` when training a
    model, return a ``numpy`` array containing the corresponding weight
    vector.  This function does not currently handle bias features.
    Nz.This function requires that numpy be installedznon-explicit not supported yetr    d)numpy
ValueErrorstripsplitzerosfloatint)sfeatures_countr+   linesweightslinefidweights           r   parse_megam_weightsr>   ~   s     }IJJJ555555GGIIOOD!!Ek.#..G . .::<< 	.**,,KC %fGCHHNr   c                     | st          d          | D ]W\  }}|r5|dk    r|                    d|z             &|dk    rt          d          <|                    d| d|            Xd S )Nz:MEGAM classifier requires the use of an always-on feature.   z %sr   z3If bernoulli=True, then allfeatures must be binary. )r1   r$   )vectorr)   r*   r<   fvals        r   r&   r&      s     
K
 
 	
  	+ 	+	T 	+qyyUS[)))) L   
 LL)S))4))****	+ 	+r   c                    t          | t                    rt          d          t          t	                       t          g| z   }t          j        |t
          j                  }|                                \  }}|j	        dk    r,t                       t          |           t          d          t          |t                    r|S |                    d          S )z=
    Call the ``megam`` binary with the given arguments.
    z args should be a list of stringsN)stdoutr   zmegam command failed!zutf-8)
isinstancer   	TypeErrorr   r   
subprocessPopenPIPEcommunicate
returncodeprintOSErrordecode)argscmdprE   stderrs        r   
call_megamrT      s     $ <:;;; ,
CZ_555A}}VV 	|qf-...&# &}}W%%%r   r   )TT)T)__doc__rH   nltk.internalsr   r0   ImportErrorr   r   r-   r>   r&   rT   r   r   r   <module>rX      s    "     & & & & & &LLLL   EEE 
   29 9 9 9x   $+ + +"& & & & &s    