
    Ng                          d Z ddlmZ ddlmZmZmZ ddlmZm	Z	 ddl
mZmZ ddlmZ 	 ddZd	 Zd
 Z G d d          ZddZdS )z 
Utility functions for parsers.
    )load)CFGPCFGFeatureGrammar)ChartChartParser)FeatureChartFeatureChartParser)InsideChartParserNc                 f   t          | fi |}t          |t                    st          d          t          |t                    r|t
          } ||||          S t          |t                    r |t          }|t          } ||||          S |t          }|t          } ||||          S )a  
    Load a grammar from a file, and build a parser based on that grammar.
    The parser depends on the grammar format, and might also depend
    on properties of the grammar itself.

    The following grammar formats are currently supported:
      - ``'cfg'``  (CFGs: ``CFG``)
      - ``'pcfg'`` (probabilistic CFGs: ``PCFG``)
      - ``'fcfg'`` (feature-based CFGs: ``FeatureGrammar``)

    :type grammar_url: str
    :param grammar_url: A URL specifying where the grammar is located.
        The default protocol is ``"nltk:"``, which searches for the file
        in the the NLTK data package.
    :type trace: int
    :param trace: The level of tracing that should be used when
        parsing a text.  ``0`` will generate no tracing output;
        and higher numbers will produce more verbose tracing output.
    :param parser: The class used for parsing; should be ``ChartParser``
        or a subclass.
        If None, the class depends on the grammar format.
    :param chart_class: The class used for storing the chart;
        should be ``Chart`` or a subclass.
        Only used for CFGs and feature CFGs.
        If None, the chart class depends on the grammar format.
    :type beam_size: int
    :param beam_size: The maximum length for the parser's edge queue.
        Only used for probabilistic CFGs.
    :param load_args: Keyword parameters used when loading the grammar.
        See ``data.load`` for more information.
    z1The grammar must be a CFG, or a subclass thereof.N)trace	beam_size)r   chart_class)r   
isinstancer   
ValueErrorr   r   r   r
   r	   r   r   )grammar_urlr   parserr   r   	load_argsgrammars          K/var/www/html/ai-engine/env/lib/python3.11/site-packages/nltk/parse/util.pyload_parserr      s    D ;,,),,Ggs## QOPPP'4   E>&FvgUi@@@@	G^	,	, E>'F&KvgUDDDD > FKvgUDDDD    c              #      K   t          | d          D ]=\  }\  }}t          |          |d||dddddg
}d                    |          dz   }|V  >dS )	a  
    A module to convert a single POS tagged sentence into CONLL format.

    >>> from nltk import word_tokenize, pos_tag
    >>> text = "This is a foobar sentence."
    >>> for line in taggedsent_to_conll(pos_tag(word_tokenize(text))): # doctest: +NORMALIZE_WHITESPACE
    ... 	print(line, end="")
        1	This	_	DT	DT	_	0	a	_	_
        2	is	_	VBZ	VBZ	_	0	a	_	_
        3	a	_	DT	DT	_	0	a	_	_
        4	foobar	_	JJ	JJ	_	0	a	_	_
        5	sentence	_	NN	NN	_	0	a	_	_
        6	.		_	.	.	_	0	a	_	_

    :param sentence: A single input sentence to parse
    :type sentence: list(tuple(str, str))
    :rtype: iter(str)
    :return: a generator yielding a single sentence in CONLL format.
       )start_0a	
N)	enumeratestrjoin)sentenceiwordtag	input_strs        r   taggedsent_to_conllr)   O   sw      ( $HA666  ;D#VVT3S#sCcJ	IIi((4/	 r   c              #   F   K   | D ]}t          |          E d{V  dV  dS )aV  
    A module to convert the a POS tagged document stream
    (i.e. list of list of tuples, a list of sentences) and yield lines
    in CONLL format. This module yields one line per word and two newlines
    for end of sentence.

    >>> from nltk import word_tokenize, sent_tokenize, pos_tag
    >>> text = "This is a foobar sentence. Is that right?"
    >>> sentences = [pos_tag(word_tokenize(sent)) for sent in sent_tokenize(text)]
    >>> for line in taggedsents_to_conll(sentences): # doctest: +NORMALIZE_WHITESPACE
    ...     if line:
    ...         print(line, end="")
    1	This	_	DT	DT	_	0	a	_	_
    2	is	_	VBZ	VBZ	_	0	a	_	_
    3	a	_	DT	DT	_	0	a	_	_
    4	foobar	_	JJ	JJ	_	0	a	_	_
    5	sentence	_	NN	NN	_	0	a	_	_
    6	.		_	.	.	_	0	a	_	_
    <BLANKLINE>
    <BLANKLINE>
    1	Is	_	VBZ	VBZ	_	0	a	_	_
    2	that	_	IN	IN	_	0	a	_	_
    3	right	_	NN	NN	_	0	a	_	_
    4	?	_	.	.	_	0	a	_	_
    <BLANKLINE>
    <BLANKLINE>

    :param sentences: Input sentences to parse
    :type sentence: list(list(tuple(str, str)))
    :rtype: iter(str)
    :return: a generator yielding sentences in CONLL format.
    Nz

)r)   )	sentencesr$   s     r   taggedsents_to_conllr,   i   sM      B   &x000000000 r   c                   "    e Zd ZdZddZddZdS )TestGrammarz
    Unit tests for  CFG.
    Nc                 j    || _         t          |d          | _        || _        || _        || _        d S )Nr   )r   )test_grammarr   cpsuite_accept_reject)selfr   r2   acceptrejects        r   __init__zTestGrammar.__init__   s7    #gQ///
r   Fc                    | j         D ]}t          |d         dz   d           dD ]}||         D ]}|                                }t          | j                            |                    }|r3|r1t                       t          |           |D ]}t          |           |dk    r|g k    rt          d|z            d}|rt          d	|z            d}	|r|	rt          d
           dS )a}  
        Sentences in the test suite are divided into two classes:

        - grammatical (``accept``) and
        - ungrammatical (``reject``).

        If a sentence should parse according to the grammar, the value of
        ``trees`` will be a non-empty list. If a sentence should be rejected
        according to the grammar, then the value of ``trees`` will be None.
        doc: )end)r6   r7   r6   zSentence '%s' failed to parse'TzSentence '%s' received a parse'zAll tests passed!N)r2   printsplitlistr1   parser   )
r5   
show_treestestkeysenttokenstreestreeacceptedrejecteds
             r   runzTestGrammar.run   s<    J 	+ 	+D$u+#----+ , , I , ,D!ZZ\\F v!6!677E! (e (d$) ( (D!$KKKKh B;;",-MPT-T"U"UU'+HH  ,",-NQU-U"V"VV'+HH#,$  +H +)***-	+ 	+r   )NN)F)__name__
__module____qualname____doc__r8   rK    r   r   r.   r.      sF            !+ !+ !+ !+ !+ !+r   r.   #%;c                    ||                      |          } g }|                     d          D ]}|dk    s
|d         |v r|                    dd          }d}t          |          dk    r:|d         dv r|d         d	v }|d         }nt          |d                   }|d         }|                                }|g k    r|||fgz  }|S )
a  
    Parses a string with one test sentence per line.
    Lines can optionally begin with:

    - a bool, saying if the sentence is grammatical or not, or
    - an int, giving the number of parse trees is should have,

    The result information is followed by a colon, and then the sentence.
    Empty lines and lines beginning with a comment char are ignored.

    :return: a list of tuple of sentences and expected results,
        where a sentence is a list of str,
        and a result is None, or bool, or int

    :param comment_chars: ``str`` of possible comment characters.
    :param encoding: the encoding of the string, if it is binary
    Nr     r   r;   r      )TruetrueFalsefalse)rU   rV   )decoder?   lenint)stringcomment_charsencodingr+   r$   
split_inforesultrF   s           r   extract_test_sentencesra      s    $ x((ILL&& ( (r>>Xa[M99^^C++
z??a!} BBB#A*::%a=Z]++%a=!!R<<vv&''		r   )r   NNr   )rQ   N)rO   	nltk.datar   nltk.grammarr   r   r   nltk.parse.chartr   r   nltk.parse.featurechartr	   r
   nltk.parse.pchartr   r   r)   r,   r.   ra   rP   r   r   <module>rg      s	          2 2 2 2 2 2 2 2 2 2 / / / / / / / / D D D D D D D D / / / / / / DE6E 6E 6E 6Er  4# # #V.+ .+ .+ .+ .+ .+ .+ .+b% % % % % %r   