
    NgI                         d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZ dZ G d d	e          Z G d
 de          Z G d de          Z G d de          ZdS )    N)PIPE)_java_optionsconfig_javafind_jar_iterfind_jars_within_pathjava)ParserIDependencyGraph)Treez2https://nlp.stanford.edu/software/lex-parser.shtmlc                   r    e Zd ZdZdZdZdZdZdZ	 	 	 	 	 	 	 ddZ	d Z
ddZddZddZddZddZddZdS )GenericStanfordParserz Interface to the Stanford Parserz+stanford-parser-(\d+)(\.(\d+))+-models\.jarzstanford-parser\.jarz3edu.stanford.nlp.parser.lexparser.LexicalizedParserFN4edu/stanford/nlp/models/lexparser/englishPCFG.ser.gzutf8-mx4g c                    t          t          | j        |ddt          |d          d           }t          t          | j        |ddt          |d          d           }	t
          j                            |          d	         }
t          |	gt          |
          z             | _
        || _        || _        || _        || _        d S )
N)STANFORD_PARSERSTANFORD_CORENLP T)env_vars
searchpathurlverboseis_regexc                 @    t           j                            |           S Nospathdirname
model_paths    O/var/www/html/ai-engine/env/lib/python3.11/site-packages/nltk/parse/stanford.py<lambda>z0GenericStanfordParser.__init__.<locals>.<lambda>;       27??:#>#>     )key)STANFORD_MODELSr   c                 @    t           j                            |           S r   r   r"   s    r$   r%   z0GenericStanfordParser.__init__.<locals>.<lambda>H   r&   r'   r   )maxr   _JAR_stanford_url_MODEL_JAR_PATTERNr   r    splittupler   
_classpathr#   	_encodingcorenlp_optionsjava_options)selfpath_to_jarpath_to_models_jarr#   encodingr   r4   r3   stanford_jar	model_jarstanford_dirs              r$   __init__zGenericStanfordParser.__init__&   s     	@!   ?>
 
 
 '"@!   ?>
 
 
	  w}}\2215.CL.Q.Q QRR$!.(r'   c           
         g }g }g }d}|                     d          D ]}|dk    r|r'|                    t          |                     g }d}1| j        r@|                    |                     d                    |                               g }d}x|                    t          |                     d                    |                    g                     g }|                    |           d}t          |          S )NFr   
T)
splitlinesappenditer_DOUBLE_SPACED_OUTPUT
_make_treejoin)r5   output_res	cur_lines	cur_treesblanklines          r$   _parse_trees_outputz)GenericStanfordParser._parse_trees_outputV   s   		&&u-- 	 	Drzz 
#JJtI/// "I!EE/ #$$T__TYYy5I5I%J%JKKK "I EEJJtT__TYYy5I5I%J%J$KLLMMM "II  &&&Cyyr'   c           
          | j         d| j        ddd| j        dddg
}|                     |                     |d                    d	 |D                       |                    S )
a  
        Use StanfordParser to parse multiple sentences. Takes multiple sentences as a
        list where each sentence is a list of words.
        Each sentence will be automatically tagged with this StanfordParser instance's
        tagger.
        If whitespaces exists inside a token, then the token will be treated as
        separate tokens.

        :param sentences: Input sentences to parse
        :type sentences: list(list(str))
        :rtype: iter(iter(Tree))
        -model
-sentencesnewline-outputFormat
-tokenizedz-escaperz-edu.stanford.nlp.process.PTBEscapingProcessorr>   c              3   @   K   | ]}d                      |          V  dS ) NrD   ).0sentences     r$   	<genexpr>z4GenericStanfordParser.parse_sents.<locals>.<genexpr>   s.      LLhsxx11LLLLLLr'   _MAIN_CLASSr#   _OUTPUT_FORMATrK   _executerD   r5   	sentencesr   cmds       r$   parse_sentsz!GenericStanfordParser.parse_sentsm   s~     O;
 ''MMTYYLL)LLLLLg 
 
 	
r'   c                 J    t          |                     |g|                    S )a&  
        Use StanfordParser to parse a sentence. Takes a sentence as a string;
        before parsing, it will be automatically tokenized and tagged by
        the Stanford Parser.

        :param sentence: Input sentence to parse
        :type sentence: str
        :rtype: iter(Tree)
        )nextraw_parse_sentsr5   rV   r   s      r$   	raw_parsezGenericStanfordParser.raw_parse   s$     D(((W==>>>r'   c                     | j         d| j        ddd| j        g}|                     |                     |d                    |          |                    S )aI  
        Use StanfordParser to parse multiple sentences. Takes multiple sentences as a
        list of strings.
        Each sentence will be automatically tokenized and tagged by the Stanford Parser.

        :param sentences: Input sentences to parse
        :type sentences: list(str)
        :rtype: iter(iter(Tree))
        rM   rN   rO   rP   r>   rX   r\   s       r$   rb   z%GenericStanfordParser.raw_parse_sents   sa     O
 ''MM#tyy33W==
 
 	
r'   c                 J    t          |                     |g|                    S )a0  
        Use StanfordParser to parse a sentence. Takes a sentence as a list of
        (word, tag) tuples; the sentence must have already been tokenized and
        tagged.

        :param sentence: Input sentence to parse
        :type sentence: list(tuple(str, str))
        :rtype: iter(Tree)
        )ra   tagged_parse_sentsrc   s      r$   tagged_parsez"GenericStanfordParser.tagged_parse   s$     D++XJ@@AAAr'   c                     d| j         d| j        ddd| j        dddd	d
dg}|                     |                     |d                    fd|D                       |                    S )ad  
        Use StanfordParser to parse multiple sentences. Takes multiple sentences
        where each sentence is a list of (word, tag) tuples.
        The sentences must have already been tokenized and tagged.

        :param sentences: Input sentences to parse
        :type sentences: list(list(tuple(str, str)))
        :rtype: iter(iter(Tree))
        /rM   rN   rO   rP   rQ   z-tagSeparatorz-tokenizerFactoryz,edu.stanford.nlp.process.WhitespaceTokenizerz-tokenizerMethodnewCoreLabelTokenizerFactoryr>   c              3   Z   K   | ]%}d                      fd|D                       V  &dS )rS   c              3   B   K   | ]}                     |          V  d S r   rT   )rU   taggedtag_separators     r$   rW   zEGenericStanfordParser.tagged_parse_sents.<locals>.<genexpr>.<genexpr>   s1      OOF]//77OOOOOOr'   NrT   )rU   rV   ro   s     r$   rW   z;GenericStanfordParser.tagged_parse_sents.<locals>.<genexpr>   sX          HHOOOOhOOOOO     r'   rX   )r5   r]   r   r^   ro   s       @r$   rg   z(GenericStanfordParser.tagged_parse_sents   s     O:*
" ''MM		    $-      	
 	
 		
r'   c                    | j         }|                    d|g           | j        r,|                    | j                                                   d                    t
                    }t          | j        |           t          j	        dd          5 }t          |t                    r|r|                    |          }|                    |           |                                 | j        r<|                    d           t#          || j        |t&          t&                    \  }}n?|                    |j                   t#          || j        t&          t&          	          \  }}|                    d
d          }|                    dd          }|                    |          }d d d            n# 1 swxY w Y   t1          j        |j                   t          |d           |S )Nz	-encodingrS   )optionsr   wbF)modedeleter   )	classpathstdinstdoutstderr)ru   rw   rx   s        s    )r2   extendr3   r/   rD   r   r   r4   tempfileNamedTemporaryFile
isinstancestrencodewriteflush
_USE_STDINseekr   r1   r   r@   namereplacedecoder   unlink)	r5   r^   input_r   r8   default_options
input_filerw   rx   s	            r$   r[   zGenericStanfordParser._execute   s   >

K*+++ 	5JJt+1133444((=11 	D-w???? (d5AAA 	-Z&#&& 18 1x00V$$$  """!%"o$" " " 

:?+++!%4?4" " " ^^K66F^^K66F]]8,,F3	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	-6 		*/""" 	OU;;;;s   DF==GG)NNr   r   Fr   r   F)__name__
__module____qualname____doc__r.   r,   rY   r   rB   r<   rK   r_   rd   rb   rh   rg   r[   r   r'   r$   r   r      s        **G"DGKJ! I.) .) .) .)`  .
 
 
 
>
? 
? 
? 
?
 
 
 
.
B 
B 
B 
B%
 %
 %
 %
N, , , , , ,r'   r   c                   ,     e Zd ZdZdZ fdZd Z xZS )StanfordParsera  
    >>> parser=StanfordParser(
    ...     model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
    ... ) # doctest: +SKIP

    >>> list(parser.raw_parse("the quick brown fox jumps over the lazy dog")) # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
    Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']),
    Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])])]

    >>> sum([list(dep_graphs) for dep_graphs in parser.raw_parse_sents((
    ...     "the quick brown fox jumps over the lazy dog",
    ...     "the quick grey wolf jumps over the lazy fox"
    ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
    Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']),
    Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])]), Tree('ROOT', [Tree('NP',
    [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['grey']), Tree('NN', ['wolf'])]), Tree('NP',
    [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']), Tree('NP', [Tree('DT', ['the']),
    Tree('JJ', ['lazy']), Tree('NN', ['fox'])])])])])])]

    >>> sum([list(dep_graphs) for dep_graphs in parser.parse_sents((
    ...     "I 'm a dog".split(),
    ...     "This is my friends ' cat ( the tabby )".split(),
    ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('ROOT', [Tree('S', [Tree('NP', [Tree('PRP', ['I'])]), Tree('VP', [Tree('VBP', ["'m"]),
    Tree('NP', [Tree('DT', ['a']), Tree('NN', ['dog'])])])])]), Tree('ROOT', [Tree('S', [Tree('NP',
    [Tree('DT', ['This'])]), Tree('VP', [Tree('VBZ', ['is']), Tree('NP', [Tree('NP', [Tree('NP', [Tree('PRP$', ['my']),
    Tree('NNS', ['friends']), Tree('POS', ["'"])]), Tree('NN', ['cat'])]), Tree('PRN', [Tree('-LRB-', [Tree('', []),
    Tree('NP', [Tree('DT', ['the']), Tree('NN', ['tabby'])]), Tree('-RRB-', [])])])])])])])]

    >>> sum([list(dep_graphs) for dep_graphs in parser.tagged_parse_sents((
    ...     (
    ...         ("The", "DT"),
    ...         ("quick", "JJ"),
    ...         ("brown", "JJ"),
    ...         ("fox", "NN"),
    ...         ("jumped", "VBD"),
    ...         ("over", "IN"),
    ...         ("the", "DT"),
    ...         ("lazy", "JJ"),
    ...         ("dog", "NN"),
    ...         (".", "."),
    ...     ),
    ... ))],[]) # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('ROOT', [Tree('S', [Tree('NP', [Tree('DT', ['The']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
    Tree('NN', ['fox'])]), Tree('VP', [Tree('VBD', ['jumped']), Tree('PP', [Tree('IN', ['over']), Tree('NP',
    [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])]), Tree('.', ['.'])])])]
    pennc                 r    t          j        dt          d            t                      j        |i | d S )NzcThe StanfordParser will be deprecated
Please use [91mnltk.parse.corenlp.CoreNLPParser[0m instead.   
stacklevelwarningswarnDeprecationWarningsuperr<   r5   argskwargs	__class__s      r$   r<   zStanfordParser.__init__F  sL    R		
 	
 	
 	
 	$)&)))))r'   c                 *    t          j        |          S r   )r   
fromstringr5   results     r$   rC   zStanfordParser._make_treeP  s    v&&&r'   r   r   r   r   rZ   r<   rC   __classcell__r   s   @r$   r   r     sY        0 0d N* * * * *' ' ' ' ' ' 'r'   r   c                   ,     e Zd ZdZdZ fdZd Z xZS )StanfordDependencyParsera
  
    >>> dep_parser=StanfordDependencyParser(
    ...     model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
    ... ) # doctest: +SKIP

    >>> [parse.tree() for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy'])])]

    >>> [list(parse.triples()) for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE +SKIP
    [[((u'jumps', u'VBZ'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det', (u'The', u'DT')),
    ((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'), u'amod', (u'brown', u'JJ')),
    ((u'jumps', u'VBZ'), u'nmod', (u'dog', u'NN')), ((u'dog', u'NN'), u'case', (u'over', u'IN')),
    ((u'dog', u'NN'), u'det', (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ'))]]

    >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.raw_parse_sents((
    ...     "The quick brown fox jumps over the lazy dog.",
    ...     "The quick grey wolf jumps over the lazy fox."
    ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy'])]),
    Tree('jumps', [Tree('wolf', ['The', 'quick', 'grey']), Tree('fox', ['over', 'the', 'lazy'])])]

    >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.parse_sents((
    ...     "I 'm a dog".split(),
    ...     "This is my friends ' cat ( the tabby )".split(),
    ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('dog', ['I', "'m", 'a']), Tree('cat', ['This', 'is', Tree('friends', ['my', "'"]), Tree('tabby', ['the'])])]

    >>> sum([[list(parse.triples()) for parse in dep_graphs] for dep_graphs in dep_parser.tagged_parse_sents((
    ...     (
    ...         ("The", "DT"),
    ...         ("quick", "JJ"),
    ...         ("brown", "JJ"),
    ...         ("fox", "NN"),
    ...         ("jumped", "VBD"),
    ...         ("over", "IN"),
    ...         ("the", "DT"),
    ...         ("lazy", "JJ"),
    ...         ("dog", "NN"),
    ...         (".", "."),
    ...     ),
    ... ))],[]) # doctest: +NORMALIZE_WHITESPACE +SKIP
    [[((u'jumped', u'VBD'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det', (u'The', u'DT')),
    ((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'), u'amod', (u'brown', u'JJ')),
    ((u'jumped', u'VBD'), u'nmod', (u'dog', u'NN')), ((u'dog', u'NN'), u'case', (u'over', u'IN')),
    ((u'dog', u'NN'), u'det', (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ'))]]

    	conll2007c                 r    t          j        dt          d            t                      j        |i | d S )NzwThe StanfordDependencyParser will be deprecated
Please use [91mnltk.parse.corenlp.CoreNLPDependencyParser[0m instead.r   r   r   r   s      r$   r<   z!StanfordDependencyParser.__init__  sL    \		
 	
 	
 	
 	$)&)))))r'   c                 $    t          |d          S )Nroottop_relation_labelr
   r   s     r$   rC   z#StanfordDependencyParser._make_tree      v&AAAAr'   r   r   s   @r$   r   r   T  s`        . .` !N* * * * *B B B B B B Br'   r   c                   H     e Zd ZdZdZdZdZdZdZdZ	 fdZ
dd	Zd
 Z xZS )StanfordNeuralDependencyParserar  
    >>> from nltk.parse.stanford import StanfordNeuralDependencyParser # doctest: +SKIP
    >>> dep_parser=StanfordNeuralDependencyParser(java_options='-mx4g')# doctest: +SKIP

    >>> [parse.tree() for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy']), '.'])]

    >>> [list(parse.triples()) for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE +SKIP
    [[((u'jumps', u'VBZ'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det',
    (u'The', u'DT')), ((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'),
    u'amod', (u'brown', u'JJ')), ((u'jumps', u'VBZ'), u'nmod', (u'dog', u'NN')),
    ((u'dog', u'NN'), u'case', (u'over', u'IN')), ((u'dog', u'NN'), u'det',
    (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ')), ((u'jumps', u'VBZ'),
    u'punct', (u'.', u'.'))]]

    >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.raw_parse_sents((
    ...     "The quick brown fox jumps over the lazy dog.",
    ...     "The quick grey wolf jumps over the lazy fox."
    ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over',
    'the', 'lazy']), '.']), Tree('jumps', [Tree('wolf', ['The', 'quick', 'grey']),
    Tree('fox', ['over', 'the', 'lazy']), '.'])]

    >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.parse_sents((
    ...     "I 'm a dog".split(),
    ...     "This is my friends ' cat ( the tabby )".split(),
    ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('dog', ['I', "'m", 'a']), Tree('cat', ['This', 'is', Tree('friends',
    ['my', "'"]), Tree('tabby', ['-LRB-', 'the', '-RRB-'])])]
    conllz)edu.stanford.nlp.pipeline.StanfordCoreNLPz%stanford-corenlp-(\d+)(\.(\d+))+\.jarz,stanford-corenlp-(\d+)(\.(\d+))+-models\.jarTc                     t          j        dt          d            t                      j        |i | | xj        dz  c_        d S )Nz}The StanfordNeuralDependencyParser will be deprecated
Please use [91mnltk.parse.corenlp.CoreNLPDependencyParser[0m instead.r   r   z(-annotators tokenize,ssplit,pos,depparse)r   r   r   r   r<   r3   r   s      r$   r<   z'StanfordNeuralDependencyParser.__init__  sa    \		
 	
 	
 	
 	$)&))) JJr'   Fc                      t          d          )z
        Currently unimplemented because the neural dependency parser (and
        the StanfordCoreNLP pipeline class) doesn't support passing in pre-
        tagged tokens.
        zxtagged_parse[_sents] is not supported by StanfordNeuralDependencyParser; use parse[_sents] or raw_parse[_sents] instead.)NotImplementedError)r5   r]   r   s      r$   rg   z1StanfordNeuralDependencyParser.tagged_parse_sents  s     ":
 
 	
r'   c                 $    t          |d          S )NROOTr   r
   r   s     r$   rC   z)StanfordNeuralDependencyParser._make_tree  r   r'   r   )r   r   r   r   rZ   rY   r,   r.   r   rB   r<   rg   rC   r   r   s   @r$   r   r     s         > N=K3DHJ 	K 	K 	K 	K 	K

 

 

 

B B B B B B Br'   r   )r   r{   r   
subprocessr   nltk.internalsr   r   r   r   r   nltk.parse.apir	   nltk.parse.dependencygraphr   	nltk.treer   r-   r   r   r   r   r   r'   r$   <module>r      s   
			                      # " " " " " 6 6 6 6 6 6      Dr r r r rG r r rj@' @' @' @' @'* @' @' @'F>B >B >B >B >B4 >B >B >BB?B ?B ?B ?B ?B%: ?B ?B ?B ?B ?Br'   