
    Ng                        d Z ddlZddlZddlZddlZddlmZmZmZm	Z	m
Z
mZmZmZmZmZ ddlmZmZ ddlmZ ddlmZmZ ddlmZ ddlmZmZ dd	lmZ dd
lm Z  ddl!m"Z"  G d d          Z#d Z$e%dk    r
 e$             dgZ&dS )zl
A graphical tool for exploring the regular expression based chunk
parser ``nltk.chunk.RegexpChunkParser``.
    N)
ButtonCanvasCheckbuttonFrameIntVarLabelMenu	ScrollbarTextTk)askopenfilenameasksaveasfilename)Font)
ChunkScoreRegexpChunkParser)RegexpChunkRule)	conll2000treebank_chunk)ShowText)Tree)in_idlec                   z   e Zd ZdZi dddddddd	d
dddddddddddddddddddddd d!d"d#i d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQZg dRZdS edTU          fdV edWU          fdX edYZ          fd[ ed\]          fd^ ed\]          fd_ ed`d`a          fdb edcdda          fde edfU          fdg edhU          fdi edjU          fg
ZdkZ	 dlZ		 dmZ
	 dnZ	 doZ	  edpdqdrdrdkdsdtduv          Z edldldrdrdwdkdsdtdux	  	        Z edydzd{d{dkdsdtdud|}	  	        Z ed~dsdt          Z edd          Z eddtdtd          Z ed{d{dkdsdtdd          Z eddd          ZdZdrZ edrZ          Z edZ          ZdZd Z	 	 	 	 	 ddZd Zd Zd Zd ZdZ dZ!d Z"dZ#d Z$d Z%d Z&dZ'd Z(d Z)d Z*d Z+d Z,d Z-d Z.d Z/d Z0ddZ1d Z2d Z3d Z4d Z5ddZ6d Z7d Z8d Z9dZ:ddZ;ddZ<ddZ=d Z>ddZ?ddZ@d ZAdS )RegexpChunkAppz
    A graphical tool for exploring the regular expression based chunk
    parser ``nltk.chunk.RegexpChunkParser``.

    See ``HELP`` for instructional text.
    CCzCoordinating conjunctionzPRP$zPossessive pronounCDzCardinal numberRBAdverbDT
DeterminerRBRzAdverb, comparativeEXzExistential thereRBSzAdverb, superlativeFWzForeign wordRPParticleJJ	AdjectiveTOtoJJRzAdjective, comparativeUHInterjectionJJSzAdjective, superlativeVBzVerb, base formLSzList item markerVBDzVerb, past tenseMDModalNNSzNoun, pluralNNzNoun, singular or massVBNzVerb, past participleVBZzVerb,3rd ps. sing. presentNNPzProper noun, singularNNPSzProper noun pluralWDTzwh-determinerPDTPredeterminerWPz
wh-pronounPOSzPossessive endingzWP$zPossessive wh-pronounPRPzPersonal pronounWRBz	wh-adverb(zopen parenthesis)zclose parenthesisz
open quotecommazclose quoteperiodzpound sign (currency marker)zdollar sign (currency marker)zPreposition/subord. conjunctionz#Symbol (mathematical or scientific)zVerb, gerund/present participlezVerb, non-3rd ps. sing. presentcolon)z``,z''.#$INSYMVBGVBP:))Help20a-  Welcome to the regular expression chunk-parser grammar editor.  You can use this editor to develop and test chunk parser grammars based on NLTK's RegexpChunkParser class.

Use this box ('Help') to learn more about the editor; click on the tabs for help on specific topics:<indent>
Rules: grammar rule types
Regexps: regular expression syntax
Tags: part of speech tags
</indent>
Use the upper-left box ('Grammar') to edit your grammar.  Each line of your grammar specifies a single 'rule', which performs an action such as creating a chunk or merging two chunks.

The lower-left box ('Development Set') runs your grammar on the development set, and displays the results.  Your grammar's chunks are <highlight>highlighted</highlight>, and the correct (gold standard) chunks are <underline>underlined</underline>.  If they match, they are displayed in <green>green</green>; otherwise, they are displayed in <red>red</red>.  The box displays a single sentence from the development set at a time; use the scrollbar or the next/previous buttons view additional sentences.

The lower-right box ('Evaluation') tracks the performance of your grammar on the development set.  The 'precision' axis indicates how many of your grammar's chunks are correct; and the 'recall' axis indicates how many of the gold standard chunks your system generated.  Typically, you should try to design a grammar that scores high on both metrics.  The exact precision and recall of the current grammar, as well as their harmonic mean (the 'f-score'), are displayed in the status bar at the bottom of the window.)Rules10a  <h1>{...regexp...}</h1><indent>
Chunk rule: creates new chunks from words matching regexp.</indent>

<h1>}...regexp...{</h1><indent>
Strip rule: removes words matching regexp from existing chunks.</indent>

<h1>...regexp1...}{...regexp2...</h1><indent>
Split rule: splits chunks that match regexp1 followed by regexp2 in two.</indent>

<h1>...regexp...{}...regexp...</h1><indent>
Merge rule: joins consecutive chunks that match regexp1 and regexp2</indent>
)Regexps10 60aZ  <h1>Pattern		Matches...</h1>
<hangindent>	<<var>T</var>>	a word with tag <var>T</var> (where <var>T</var> may be a regexp).
	<var>x</var>?	an optional <var>x</var>
	<var>x</var>+	a sequence of 1 or more <var>x</var>'s
	<var>x</var>*	a sequence of 0 or more <var>x</var>'s
	<var>x</var>|<var>y</var>	<var>x</var> or <var>y</var>
	.	matches any character
	(<var>x</var>)	Treats <var>x</var> as a group
	# <var>x...</var>	Treats <var>x...</var> (to the end of the line) as a comment
	\<var>C</var>	matches character <var>C</var> (useful when <var>C</var> is a special character like + or #)
</hangindent>
<h1>Examples:</h1>
<hangindent>	<regexp><NN></regexp>
		Matches <match>"cow/NN"</match>
		Matches <match>"green/NN"</match>
	<regexp><VB.*></regexp>
		Matches <match>"eating/VBG"</match>
		Matches <match>"ate/VBD"</match>
	<regexp><IN><DT><NN></regexp>
		Matches <match>"on/IN the/DT car/NN"</match>
	<regexp><RB>?<VBD></regexp>
		Matches <match>"ran/VBD"</match>
		Matches <match>"slowly/RB ate/VBD"</match>
\t<regexp><\#><CD> # This is a comment...</regexp>\n		Matches <match>"#/# 100/CD"</match>
</hangindent>)TagsrS   zB<h1>Part of Speech Tags:</h1>
<hangindent><<TAGSET>></hangindent>
redz#a00
foregroundgreenz#080	highlightz#ddd
background	underlineT)r\   h1indent   lmargin1lmargin2
hangindentr   <   varz#88fregexpz#ba7matchz#6a6      g?g{Gz?g{Gz?(      z#efegroove   word)widthheightr[   highlightbackgroundhighlightthicknessreliefborderwrapz#555)	ro   rp   r[   rq   rW   rr   rs   rt   ru   F   
   z#eef)   )	ro   rp   r[   rq   rr   rs   rt   ru   tabsz#9bb)r[   rs   rt   	helveticaifamilysizez#777   )r[   padxpadyrt   i,  i  )r[   rq   rr   rs   rt   ro   rp   )r[   activebackgroundrq   z#aba   c                     t          j        dd|          }t          j        dd|          }t          j        dd|          }|                                }t          j        dd|          }|S )	N((\\.|[^#])*)(#.*)?\1z + z\n\s+z\nz	([^\\])\$z\1\\$)resubstrip)selfgrammars     T/var/www/html/ai-engine/env/lib/python3.11/site-packages/nltk/app/chunkparser_app.pynormalize_grammarz RegexpChunkApp.normalize_grammar4  sa    &/@@&sG,,&5'22--//&x99    r   N NPc                 t   || _         || j        }|| _        |G|dk    rt          j        d          }n,|dk    rt          j                    }nt          d|z            d| _        	 || _        	 d| _	        	 d| _
        	 || _        	 || _        	 d| _        	 d| _        	 g | _        	 d| _        	 d| _        	 d| _        	 d| _        	 t)          |          | _        	 t-                      x}| _        |                    d	           |                    d
           |                    d| j                   t9          |          | _        | j                            d           |                     |           |                      |           | !                    |           | "                    |           | j#        $                                 |r9| j#        %                    d|dz              | j#        &                    dd           | '                    d           | (                                 dS )a  
        :param devset_name: The name of the development set; used for
            display & for save files.  If either the name 'treebank'
            or the name 'conll2000' is used, and devset is None, then
            devset will be set automatically.
        :param devset: A list of chunked sentences
        :param grammar: The initial grammar to display.
        :param tagset: Dictionary from tags to string descriptions, used
            for the help page.  Defaults to ``self.TAGSET``.
        Nr   z	train.txttreebankzUnknown development set %sr   chunk_labelz+50+50zRegexp Chunk Parser Appz<Control-q>d   end
insert1.0))_chunk_labelTAGSETtagsetr   chunked_sentsr   
ValueErrorchunkerr   normalized_grammargrammar_changeddevsetdevset_namedevset_index_last_keypress_history_history_index_eval_grammar_eval_normalized_grammar_eval_indexr   _eval_scorer   topgeometrytitlebinddestroyr   _devset_sizeset_init_fonts_init_widgets_init_bindings_init_menubar
grammarboxfocusr   mark_setshow_devsetupdate)r   r   r   r   r   r   r   s          r   __init__zRegexpChunkApp.__init__?  si   $ (>[F >k))"0==:%%'577 !=!KLLL7)"&7 ;A&?	-  =	8  	I "	 )-%5	5 &+>>>	: dhX		+,,,--- #3KKc""" 	3C   3  	6O""5'D.999O$$Xu555 	r   c                 D    |                     d j                   |                     d j                   |                     d j                   |                     d j                   |                     d fd           |                     d fd            j                             d j                    j                             d j                    j                             d j                    j                             d	 j                   d S )
Nz<Control-n>z<Control-p>z<Control-t>z
<KeyPress>z<Control-s>c                 ,                                     S N)save_grammarer   s    r   <lambda>z/RegexpChunkApp._init_bindings.<locals>.<lambda>      $*;*;*=*= r   z<Control-o>c                 ,                                     S r   )load_grammarr   s    r   r   z/RegexpChunkApp._init_bindings.<locals>.<lambda>  r   r   z<Configure>)r   _devset_next_devset_prevtoggle_show_tracer   r   evalbox
_eval_plotr   r   s   ` r   r   zRegexpChunkApp._init_bindings  s    1222 1222 6777t{+++ = = = =>>> = = = =>>>]D,BCCC]D,=>>>]D,=>>> 	-99999r   c                 @   t          |          | _        | j                            d           t          d| j                                                   | _        t          dt          | j                                        dz  dz                       | _        d S )Nr_   rz   r{      )r   _sizer   r   get_fontint
_smallfontr   s     r   r   zRegexpChunkApp._init_fonts  s    C[[

rDJNN4D4D3DEEE
s4:>>+;+;b+@B+F'G'G%H
 
 
r   c                 z   t          |          }t          |d          }|                    dd| j                   |                    ddd| j                   |                    ddd	| j                   |                    d
d| j                   |                    dd| j        d           |                    dd|           t          |d          }|                    d| j	        dd| j
                   |                    d| j	        dd| j
                   |                    d| j	        dd| j
                   |                    d| j	        dd| j
                   |                    d| j	        dd| j
                   |                    dd|           t          |d          }|                    d| j        d| j                    |                    d!| j        d"| j                    |                    d#| j        d$| j                    |                    d%| j        d&| j                    |                    d'd|           t          |d          }|                    d(d| j                   |                    d)d|           |                    |*           d S )+Nr   )tearoffzReset Application)labelr\   commandzSave Current GrammarzCtrl-s)r   r\   acceleratorr   zLoad GrammarzCtrl-ozSave Grammar History   Exitrh   zCtrl-q)r   r\   r   r   File)r   r\   menuTinyrw   )r   variabler\   valuer   Small   Mediumr_   Large   Huge"   Viewz50 sentences2   )r   r   r   r   z100 sentencesr   z200 sentences   z500 sentencesi  zDevelopment-SetAboutrN   )r   )r	   add_commandresetr   r   save_historyr   add_cascadeadd_radiobuttonr   resizer   set_devset_sizeaboutconfig)r   parentmenubarfilemenuviewmenu
devsetmenuhelpmenus          r   r   zRegexpChunkApp._init_menubar  s   v,,+++#6!TZXXX( %	 	 	
 	
 	
 	  %	 	 	
 	
 	
 	(B@Q 	 	
 	
 	
 	At| 	 	
 	
 	
 	&AHEEE+++  ZK 	! 	
 	
 	
 	  ZK 	! 	
 	
 	
 	  ZK 	! 	
 	
 	
 	  ZK 	! 	
 	
 	
 	  ZK 	! 	
 	
 	
 	&AHEEE'1---
"" &(	 	# 	
 	
 	
 	""!&(	 	# 	
 	
 	
 	""!&(	 	# 	
 	
 	
 	""!&(	 	# 	
 	
 	
 	"3qzRRR+++7aLLL&AHEEE7#####r   c                 f    | j         r|                                  n|                                  dS )Nbreak)_showing_tracer   
show_tracer   r   s     r   r   z RegexpChunkApp.toggle_show_trace#  s7     	OOwr      Fc                 
   |                     d| j                                                  }|                     d| j                                                  }| j                            d           | j                            d|dz  dz
  ddd	          }| j                            |          d         d
z   |dz
  }}| j                            |||z
  dz  z   |dz
  ddd          }d| j                            |          d         dz
  }	}| j        d         }
| j                            | j        	                    dd|dz
  d|
|
                     | j                            | j        	                    d|	dz   dd|
|
                     | j
                                         rt          | j                  dk    rdx}}dx}}t          dt          t          | j                  | j        dz                       D ]U}| j        |          \  }}}}t          ||          }t          ||          }t!          ||          }t!          ||          }Vt!          |dz
  d          }t!          |dz
  d          }t          |dz   d          }t          |dz   d          }ndx}}dx}}t          d          D ]}|||z
  |dz  |z
  ||z
  z  z  z   }|	|	|z
  |dz  |z
  ||z
  z  z  z
  }||cxk     r|k     r"n n| j                            ||||	d           ||cxk     r|	k     r"n o| j                            ||||d           | j                            ||||	           | j                            ||	||	           | j                            |dz
  |	dddd|z  z  	           | j                            |dz
  |dddd|z  z  	           | j                            ||	dz   dddd|z  z  	           | j                            ||	dz   dddd|z  z  	           d x}}t%          | j                  D ]'\  }\  }}}}|||z
  ||z
  ||z
  z  z  z   }|	|	|z
  ||z
  ||z
  z  z  z
  }|| j        k    rO| j                            |dz
  |dz
  |dz   |dz   d d!           d"|dz  z  d#|dz  z  z   d$|dz  z  z   | j        d%<   nD| j                            | j                            |dz
  |dz
  |dz   |dz   d&d'                     |P| j                                         r7| j                            | j                            ||||d'                     ||}})d S )(Nro   rp   allrw   rm   leftw	Precision)justifyanchortextr   sRecallcenter)r  r  r  rh   r[   r   i  )filloutlineg{Gz?   g      $@z#888)r  r~   rightsez%d%%r   nenwz#0f0z#000zPrecision: %.2f%%	zRecall: %.2f%%	zF-score: %.2f%%r  #afaz#8c8)r   r   winfo_widthwinfo_heightdeletecreate_textbbox_EVALBOX_PARAMSlowercreate_rectangle
_autoscalelenr   rangemin_SCALE_Nmaxcreate_line	enumerater   create_ovalstatus_eval_lines)r   r   r   ro   rp   tagr  r  r   botbgmax_precision
max_recallmin_precision
min_recallir   	precisionrecallfmeasurexyprev_xprev_y_fscores                             r   r   zRegexpChunkApp._eval_plot-  s   

7DL$<$<$>$>??Hdl&?&?&A&ABB 	E""" l&&!b &; ' 
 
 l'',,Q/!3URZel&&EDLQ&&RK ' 
 
 t|((--a025S !,/L))!Qq$RQS)TT	
 	
 	
 	L))!S1WdDrSU)VV	
 	
 	

 ?   	+S%7%7!%;%;)**MJ)**MJ1c#dm"4"4dma6GHHII 5 57;}aR7H4FH #I} = = 44
 #I} = = 44

   4a88MZ$.22J 4a88MZ$.22JJ)**MJ)**MJ r 
	I 
	IATJ&:
+BC A sSyTM)mm.KL A a%((CCf(EEEQ}}}}}}}}}((q%(HHH  sD#666  sE3777 	  1H3./ 	! 	
 	
 	
 	  1H3./ 	! 	
 	
 	
 	  !G3+, 	! 	
 	
 	
 	  !G3+, 	! 	
 	
 	
 1:4=1I1I 	" 	"-A-9ff*$j)@A A sSy]*}}/LM A D'''((E1q5!a%QVV )    *Y_=(FSL9:'6C<89 F## ""L,,Aq1ua!eQU -    
 !d&6&:&:&<&<!""L,,VVQ,OO   FFF5	" 	"r   c                 4   | j         d S | j        	d| _        d S t          j                    }t          j                    | j        z
  | j        k     rL| j        | j        k    r<d| _        | j                             t          | j
        dz            | j                  S | j        | j        k    r| j        D ]\  }}}}| j        |                     |          k    r_| j                            ||||f           t          | j                  dz
  | _        |                                  d| _        d | _         d S d| _        t'          | j                  | _        | j        | _        | j        | _        | j                                        dk    r	d| _        d S | j        | j        t5          | j        | j        z   | j                                                           D ]D}|                     |                                          }| j                             ||           E| xj        | j        z  c_        | j        | j                                        k    r| j                            | j        | j        !                                | j        "                                | j        #                                f           t          | j                  dz
  | _        |                                  d| _        d | _        d S d| j        z  | j                                        z  }d	|z  | j$        d
<   d| _        | %                    t          j                    |z
             | j                             t          | j
        dz            | j                   d S )NFTi  rh   r   r   r   r   z$Evaluating on Development Set (%d%%)r  )&r   r   _eval_demon_runningtimer   _EVAL_DELAYr   r   afterr   
_EVAL_FREQ_eval_demonr   r   appendr  r   r   r   r   r   r   r   r   r   r   r  _EVAL_CHUNKr   r   _chunkparseleavesscorer/  r0  	f_measurer%  _adaptively_modify_eval_chunk)	r   t0gprfgoldguessprogresss	            r   r>  zRegexpChunkApp._eval_demon  sz   8F<',D$F Y[[ IKK$--0@@@'4+HHH'+D$8>>#do&<"="=t?OPPP "d&CCC #m  
1a*d.D.DQ.G.GGGM((!Q1666*-dm*<*<q*@D'OO%%%/4D,48D1FF H  !D)d6GHHHD!%D,0,CD) "((**b00',D$F Ks 4#33T5F5J5J5L5L    
 	0 	0D
 $$T[[]]33E""4//// 	D,, t0446666M  &$..00$++--$..00	   #&dm"4"4q"8DOO',D$,0D)))T--0A0E0E0G0GGH"H8"SDK'+D$..ty{{R/?@@@HNN3t5668HIIIIIr   c           
         || j         k    r]| j        dk    rRt          | j        dz
  t          t	          | j        | j         |z  z            | j        dz
                      | _        dS || j        k     rRt          | j        dz   t          t	          | j        | j        |z  z            | j        dz                       | _        dS dS )z
        Modify _EVAL_CHUNK to try to keep the amount of time that the
        eval demon takes between _EVAL_DEMON_MIN and _EVAL_DEMON_MAX.

        :param t: The amount of time that the eval demon took.
        r   rh   rw   N)_EVAL_DEMON_MAXr@  r  r!  r   _EVAL_DEMON_MIN)r   ts     r   rE  z,RegexpChunkApp._adaptively_modify_eval_chunk  s     t###(81(<(<" 1$(D,@1,DEFF$r)    D %%%" 1$(D,@1,DEFF$r)    D &%r   c                     t          |fi  j        }|                    dd           |                    dd           |                    dd           |                    dd           t	          |fd j        i j         _        t          | j        d	d
 j        d                    _	         j	        
                    ddd            j        
                    ddd           t          | j        j                  }|
                    ddd            j                            |j                    j        d         }t          ||          }|
                    ddd           t          |fd j        d j                            d           t          |fd j        d j                            d           t	          |fd j        i j         _         j        
                    ddd           i  _         j        d         }t          ||          }|
                    ddd           t1           j                  D ]\  }\  }}	}
t          || j                  }|
                    |dz  dd           |                    d|f fd	           | j        |<   t          |d j        |          
                    |dz  dz   d            j         j        d         d                                       j                     j                            d!d"#            j        D ]\  }}  j        j        d$|z  fi |                       j        d         d                    t          | j        j                  } j                            |j                   |
                    ddd           t          | j        d                   }t	          |fd j        i j          _!         j!                            d"d%&           t          | j        d'd( j         d         )           _"         j"        
                    ddd           |
                    ddd           t          | j#                   _$         j$        
                    ddd           t          | j!        j%        d*+           _&         j&        j         j!        d,<    j&                            d-d./            j        d         }t          ||          }|
                    dd0d           t          |fd1 j'        d j                            d           t          |fd2 j(        d j                            d           t          |fd3 j)        d4d5 j         _*         j*                            d(           t          |fd6 j+        d j         _,         j,                            d(           t[          |fi  j.         _/        t          | j        d7d( j.        d         )          }|
                    ddd            j/        
                    dddd8            j        d         }t          ||          }|
                    dd0d           ta           j1                   _2         j2                            d9           tg          |f j2         j4        d:d; j                            d           ta           j1                   _5         j5                            d9           tg          |f j5         j4        d<d; j                            d           t          |fd=d>i j                            d(           t          |fd j        i j6         _7         j7        
                    dd?d@dddA           d4 j        dB<   d4 j!        dB<    j        d         }t          |dCd|          
                    dd           t          |ddC|          
                    dd           t          |dDd|          
                    ddE           |                    d%d"F            j!                            dGdHdIJ            j!                            dKdIdLM            j!                            dNdO            j!                            dPdQdRS            j!                            dTdUdRV            j!                            dWdLX            j                            dWdY            j                            dZd[X            j                            d\d]X            j                            d^d_X            j                            d`ddab           d S )cNr      )weightr~   rm   rh   r   fontGrammar:blackr[   )rU  r  highlightcolorr[   SW)columnrowstickyNEWS)r   NWS)yscrollcommandrZ   EWzPrev Grammar)r  r   r  )sidezNext Grammar)r  rU  Sz<ButtonPress>c                 .                         |          S r   )	show_help)r   tabr   s     r   r   z.RegexpChunkApp._init_widgets.<locals>.<lambda>N  s    4>>#;N;N r   )rp   ro   r[   )rZ  r[  )rU  elideT)rf  tag-%sboth)expandr  zDevelopment Set:r  )rU  r  r  r[   horiz)r   orientxscrollcommandbottomr2  )ra  r     zPrev Example (Ctrl-p)zNext Example (Ctrl-n)zShow exampledisabled)r  r   statez
Show tracezEvaluation:)rZ  r[  r\  
columnspanFZoom)r   r   r  Linesr  History	   NEW)rZ  r[  r\  r   r   rq  rp  rw   r      )r  ri  true-posr  True)r[   r\   	false-negz#800)r\   rW   	false-posz#faatracez#666none)rW   ru   
wrapindentrx   )rb   ru   errorrV   z#feccommentz#840anglez#00fbracez#0a0rc   rj   r`   )8r   _FRAME_PARAMSgrid_columnconfiguregrid_rowconfigurer   r   _GRAMMARBOX_PARAMSr   r   grammarlabelgridr
   yviewr   r   r   _history_prev_BUTTON_PARAMSpack_history_nextr   _HELPBOX_PARAMShelpboxhelptabsr#  HELPr   _HELPTAB_SPACER	configure
tag_configHELP_AUTOTAGrd  _DEVSETBOX_PARAMS	devsetboxdevsetlabel_devset_scrolldevset_scrollxviewdevset_xscrollr   r   r   devset_buttonr   trace_buttonr   r  r   r   r   r  r   r   r&  _STATUS_PARAMSr%  )r   r   frame0grammar_scrollbarr)  frame3helptab_framer.  re  tabstopsr  r   r'  paramshelp_scrollbarframe4frame1frame2s   `                 r   r   zRegexpChunkApp._init_widgets  s   s11d011##Aa#000##Aa#000  1 ---  1 --- vRRDJR$:QRR!".|<
 
 
 	aQt<<<A1V<<< &fdo6KLLLaQu===.?.CDDD -v"---1!D111	
&	
 	
 !		
 	

 $F$


	
&	
 	
 !		
 	

 $F$


 FQQQD<PQQ&999-f444!4888(1$)(<(< 		, 		,$A$Xt-cHHHEJJa!e3J777 JJc(N(N(N(N(NOOO!&DM#at/CPR  d!a%!)d++++dil1o&00dj0AAAt444, 	> 	>KC#DL#HsN==f====ty|A''' #64<3EFFF>+=>>>1!E::: v$*<\*JKKKfPP4:P9OPP4f555 #-l;
 
 
 	QAd;;;1!F333 'vt7JKKKqa>>>'DN0
 
 
 ,0+>+B'(  hS 999 -v"---1!D111	
(%	
 	
 !		
 	

 $F$


	
(%	
 	
 !		
 	

 $F$


#
$	
 

 !
 
 	W---"
%t
 
BFBU
 
 	G,,, f==(<==+L9
 
 
 	

!4
000&QGGG -v"---1!D111 **E"""	
_O		
 	

 !	
 	
 $F$


!$(++U###	
%O		
 	

 !	
 	
 $F$


v==I=)<==BBBPPP FKKKt7JKKqQQSTUUU !+W",w -fRqR888==QA=NNNfQbR888==QA=NNNfQaB777<<A1<MMM 	--- 	!!*6!RRR!!+F!SSS!!+&!AAA!!'f6!JJJ!!,&!III!!'f!=== 	""7v">>>""9"@@@""7v">>>""7v">>>""<!b"IIIIIr   c                 L   d| _         d| j        d<   d| j        d<   d| j        d<   | j                            dd           d| j        dz   | j                                        fz  | j        d	<   | j	        9| j        
                    dd
           | j                            ddd           d S | j        | j                 }| j	                                        }d}dgt          |                                          D ]2\  }\  }}|d|z  z  }                    t#          |                     3fdt%          t#          |          dz             D             | _        d t%          t#          |          dz             D             | _        t%          t#          |          dz             D ]}|dk    r8| j        
                    dd           | j                            ddd           nC| j        
                    dd||dz
           z             | j                            ddd           | j        
                    d|dz              | j                            ddd           t+          |d |                   }	|                     |                                          }
|                     |          }|                     |
          }|                    |          D ]}|                     ||d           ||z
  D ]}|                     ||d           ||z
  D ]}|                     ||d           | j        
                    dd           | j                            ddd           | j                            d| j        j        dd           d S )NTro  rp  normalr   r   Development Set (%d/%d)rh   r  z#Trace: waiting for a valid grammar.r  	z%s c                 d    i | ],}t          t                              D ]}||f|         -S  )r  r  ).0r.  jcharnums      r   
<dictcomp>z-RegexpChunkApp.show_trace.<locals>.<dictcomp>  sX     
 
 
3w<<((
 
  FGAJ
 
 
 
r   c                      i | ]}||d z  d z   S )rm   r  )r  r.  s     r   r  z-RegexpChunkApp.show_trace.<locals>.<dictcomp>  s"    DDD1q519DDDr   r   zStart:
r|  zend -2c linestartzend -2cz
Apply %s:
r   r~  rx  rz  r{  z
Finished.
r   g333333?)r   r  r  r  r  r   r   r   r  r   r   tag_addr   rulesr#  rB  r?  r  r  r  linenumr   rA  _chunksintersection_color_chunkr   r<  r  r   )r   r   	gold_treer  tagseqwordnumrn   posr.  r   	test_treegold_chunkstest_chunkschunkr  s                 @r   r   zRegexpChunkApp.show_trace  s   "%/'"&.7#"*weU+++#<"D$5$9$9$;$;<$
  <N!!%)NOOON""7E5999FK 12	""$$ #$-i.>.>.@.@$A$A 	( 	( G[dCeck!FNN3v;;''''
 
 
 
3u::>**
 
 

 EDeCJJN.C.CDDDs5zzA~&& 	9 	9AAvv%%eZ888&&w0CYOOOO%%e]U1q5\-IJJJ&&w0CYOOON!!%$777N""<1DiPPP'bqb	22G(()9)9););<<I,,y11K,,y11K$11+>> 8 8!!!UJ7777${2 9 9!!!UK8888${2 9 9!!!UK88889e]333w(;YGGG
 	sD/3Q<<<<<r   c                 d   d| j         d<   | j                             dd           | j        D ]\  }}}||k    r|                    dd                    d t          t          | j                                                  d 	          D                                 } | j	        |         j
        di | j         | j         
                    |
           | j                             d|dz              d}| j        D ]\  }}d| d| d}t          j        ||          D ]}	| j                             d||	                    d          z  ||	                    d          z             | j                             d|z  ||	                    d          z  ||	                    d          z             | j                             d||	                    d          z  ||	                    d          z             ސ | j	        |         j
        di | j         d| j         d<   d S )Nr  rp  r   r   z
<<TAGSET>>r   c              3       K   | ]	}d |z  V  
dS )z	%s	%sNr  )r  items     r   	<genexpr>z+RegexpChunkApp.show_help.<locals>.<genexpr>  s;          #T)     r   c                 `    t          j        d| d                   r
d| d         fp	d| d         fS )Nz\w+r   rh   )r   rg   )t_ws    r   r   z*RegexpChunkApp.show_help.<locals>.<lambda>"  s9    BHVSV,D,D -,!"CF-+ !3q6{ r   )key)ry   z



















z1.0 + %d charsz(?s)(<z
>)(.*?)(</z>)rf  rh   rg  rm   r~   ro  r  )r  r  r  replacejoinsortedlistr   itemsr  r   _HELPTAB_FG_PARAMSr   r  r   finditerr  startr   _HELPTAB_BG_PARAMS)
r   re  namer  r  Cr'  r  patternms
             r   rd  zRegexpChunkApp.show_help  se    (WE5)))$(I 	F 	F D(Ds{{|| II  $* !2!2!4!455!+ !+% % %      +d#*EET-DEEE###222##E4)+;<<<$#'#4 T TKC=s==c===G[$77 T T,,Wa!''!**na!%%PQ((lSSS,,$sNA

NAaL   ,,Wa!''!**na!%%PQ((lSSSSTT +d#*EET-DEEEE *Wr   c                 @    |                      | j        dz
             dS Nrh   r   _view_historyr   r   s     r   r  zRegexpChunkApp._history_prev9  #    4.2333wr   c                 @    |                      | j        dz              dS r  r  r   s     r   r  zRegexpChunkApp._history_next=  r  r   c                    t          dt          t          | j                  dz
  |                    }| j        sd S || j        k    rd S d| j        d<   | j                            dd           | j                            d| j        |         d                    | j                            dd           || _        | 	                    | j        |         d                    | 
                    | j        |         d                   | _        | j        r%d | j                            d	          D             }ng }t          |          | _        |                                  |                                  | j        r|                                  | j        t          | j                  dz
  k     r:d
                    | j        dz   t          | j                            | j        d<   d S d| j        d<   d S )Nr   rh   r  rp  r   r   r   c                 6    g | ]}t          j        |          S r  r   
fromstringr  lines     r   
<listcomp>z0RegexpChunkApp._view_history.<locals>.<listcomp>T  s3         *400  r   r   zGrammar {}/{}:r  rV  )r!  r  r  r   r   r   r  r   r   _syntax_highlight_grammarr   r   splitr   r   r   _highlight_devsetr   r   formatr  )r   indexr  s      r   r  zRegexpChunkApp._view_historyA  s   As3t}--1599::} 	FD'''F $, ue,,,udmE&:1&=>>>  5111#&&t}U';A'>???"&"8"8u9Ma9P"Q"Q" 	  399$??  EE
 E(//    	OOT]!3!3a!777(8(?(?#a'DM"") )Df%%%
 )3Df%%%r   c                 4    |                      ddd           dS )Nscrollrh   pager   r  r   s     r   r   zRegexpChunkApp._devset_nextj  s    Ha000wr   c                 4    |                      ddd           dS )Nr  r   r  r   r  r   s     r   r   zRegexpChunkApp._devset_prevn  s    Hb&111wr   c                 X    | j         d S | j                                          d | _         d S r   )r   r   r   s     r   r   zRegexpChunkApp.destroyr  s.    8Fr   c                 b   d}| j         }|dk    rL|d                             d          r1|                     | j        t	          |d                   z              n|dk    rO|d                             d          r4|                     | j        |t	          |d                   z  z              nf|dk    rP|                     t	          t          |d                   | j                                        z                       nJ d| d|             |r|                                  d S d S )	Nrh   r  unitr   r  movetozbad scroll command r   )	r   
startswithr   r   r   floatr   r   r   )r   r   argsNshowing_traces        r   r  zRegexpChunkApp._devset_scrollx  s9   +h47#5#5f#=#=T.T!W=>>>>  T!W%7%7%?%? T.Sa\\1AABBBB  StAw$2C2G2G2I2I!IJJKKKK<<G<<d<<<<< 	OO	 	r   c                 ^   || j         }t          t          d|          | j                                        dz
            }|| j         k    r	| j        sd S || _         d| _        d| j        d<   d| j        d<   d| j        d<   d| j        d<   | j        	                    d	d
           d| j         dz   | j                                        fz  | j
        d<   | j        | j         | j         dz            }i | _        ddi| _        t          |          D ]\  }}d}t          |                                          D ]H\  }\  }}t!          |          | j        ||f<   || d| dz  }t!          |          | j        ||dz   f<   I| j                            d
|d d         dz              | j        |                                  d| j        d<   | j         | j                                        z  }	| j         dz   | j                                        z  }
| j                            |	|
           d S )Nr   rh   Fr  rp  ro  rn   ru   r   r   r  r  r   /r   r   z

rm   )r   r  r!  r   r   r   r  r  r  r  r  r   r  r  r#  rB  r  r   r   r  r  r   )r   r  samplesentnumsentlinestrr  rn   r  firstlasts              r   r   zRegexpChunkApp.show_devset  s^   =%E C5MM4#4#8#8#:#:Q#>??D%%%d.A%F!#%-'"&07# #+w!'veU+++#<"D$5$9$9$;$;<$
 
 T.1BQ1FFG1v&v.. 	@ 	@MGTG(1$++--(@(@ B B$$14WWg-.d++S++++58\\Wgk122N!!%")>???? <#""$$$",w !D$5$9$9$;$;;!A%):)>)>)@)@@ud+++++r   c                    t                      }d}|D ]s}t          |t                    rW|                                | j        k    r'|                    ||t          |          z   f           |t          |          z  }n|dz  }t|S )Nr   rh   )r   
isinstancer   r   r   addr  )r   treechunksr  childs        r   r  zRegexpChunkApp._chunks  s     	 	E%&& ;;==D$555JJ3u::)=>???3u::%1r   c                    | j         d S | j                            ddd           | j                            ddd           | j                            ddd           | j                            ddd           t	          |                    d                    D ]m\  }}|                                st          j        d|          }d }|	                    d	          rk|
                    d	          }d
|dz   |
                    d	          fz  }d
|dz   |                    d	          fz  }| j                            d||           t          j        d|          D ]}||
                                |k    r nd
|dz   |
                                fz  }d
|dz   |                                fz  }|	                                dv r| j                            d||           | j                            d||           od S )Nr  r   r   r  r  rc   r   z(\\.|[^#])*(#.*)?rm   z%d.%drh   z[<>{}]z<>)r   r   
tag_remover  r#  r  r   r   rg   groupr  r   r  )r   r   linenor  r  comment_startr	  r   s           r   r  z(RegexpChunkApp._syntax_highlight_grammar  s   8F""9eU;;;""7E5999""7E5999eU;;;%gmmD&9&9:: 	; 	;LFD::<< -t44A Mwwqzz 9 !

vz1771::66vz1558844''	1a888[400 ; ; ,m1K1KEvz1779955vz15577337799$$O++GQ::::O++GQ::::%	; 	;r   c           	         | j         d S | j                            ddd           g | _        t	          |                    d                    D ]\  }}t          j        dd|          }|                                }|rU	 t          j
        |           G# t          $ r2}| j                            dd|dz   z  d	|dz   z             Y d }~~d }~ww xY wd
| j        d<   d S )Nr  r   r   r   r   r   z%s.0rh   z%s.0 lineendr   r  )r   r   r  _grammarcheck_errsr#  r  r   r   r   r   r  r   r  r%  )r   r   r  r  r   s        r   _grammarcheckzRegexpChunkApp._grammarcheck  s   8F""7E5999"$%gmmD&9&9:: 		 		LFD60%>>D::<<D #.t4444!   O++6A:!6&ST*8U        !Fs   B
C (CCc                 ,   |rt          j                     | _        | j                            dd          x| _        }|                     |          }|| j        k    rd S || _        | j        t          | j	                  dz
  k     r
d| j
        d<   |                     |           	 |r d |                    d          D             }ng }n4# t          $ r'}|                     |           d | _        Y d }~d S d }~ww xY wt!          |          | _        | j                            ddd           t          j                     | _        | j        r|                                  n|                                  | j        s|                                  d S d S )	Nr   r   rh   rV  r  c                 6    g | ]}t          j        |          S r  r  r  s     r   r  z)RegexpChunkApp.update.<locals>.<listcomp>  s3        $.t44  r   r   r  )r:  r   r   r   r   r   r   r   r  r   r  r  r  r   r  r   r   r  r   r   r   r  r9  r>  )r   eventr   r   r  r   s         r   r   zRegexpChunkApp.update  s    	."&)++D "&!4!4UE!B!BBw "33G<<!888F&8D# T]!3!3a!777(2Df%&&w///	!   2 8 8 > >  
  	 	 	w'''DLFFFFF		 )//""7E5999#y{{ 	%OO""$$$' 		 	s   &$C 
C<C77C<c                    || j         | j        | j        dz            }| j                            ddd           | j                            ddd           | j                            ddd           t	          |          D ]\  }}|                     |                                          }|                     |          }|                     |          }|                    |          D ]}| 	                    ||d           ||z
  D ]}| 	                    ||d           ||z
  D ]}| 	                    ||d           d S )Nrh   rx  r   r   rz  r{  )
r   r   r  r  r#  rA  rB  r  r  r  )r   r  r  r  r  r  r  r  s           r   r  z RegexpChunkApp._highlight_devset  su   >[!2T5F5J!JKF!!*eU;;;!!+ue<<<!!+ue<<< #,F"3"3 	? 	?GY(()9)9););<<I,,y11K,,y11K$11+>> > >!!'5*====${2 ? ?!!'5+>>>>${2 ? ?!!'5+>>>>?	? 	?r   c                     	 | j                             |          S # t          t          f$ r(}| j                            ddd           |cY d }~S d }~ww xY w)Nr  r   r   )r   parser   
IndexErrorr   r  )r   wordsr   s      r   rA  zRegexpChunkApp._chunkparse5  sl    	<%%e,,,J' 	 	 	 O##GUE:::LLLLLL	s    AA
AAc           	          |\  }}| j                             || j        |          d| j        ||f          | j        |          d| j        ||f         dz
              d S )NrF   rh   )r  r  r  r  )r   r  r  r'  r  r   s         r   r  zRegexpChunkApp._color_chunk@  s}    
s|G$EEt|GUN'CEE|G$GGt|GSL'AA'EGG	
 	
 	
 	
 	
r   c                     d | _         d | _        d | _        d| _        g | _        d| _        | j                            dd           |                     d           | 	                                 d S )Nr   r   r   )
r   r   r   r   r   r   r   r  r   r   )r   s    r   r   zRegexpChunkApp.resetH  sl    "& ue,,,r   z# Regexp Chunk Parsing Grammar
# Saved %(date)s
#
# Development set: %(devset)s
#   Precision: %(precision)s
#   Recall:    %(recall)s
#   F-score:   %(fscore)s

%(grammar)s
c                 $   |sddg}t          |d          }|sd S | j        rS| j        |                     | j        d         d                   k    r$d | j        d         dd          D             \  }}}n| j        d	x}x}}nd
x}x}}t          |d          5 }|                    | j        t          t          j
                    | j        |||| j                                                  z             d d d            d S # 1 swxY w Y   d S )NzChunk Gramamr.chunkz	All files*r  	filetypesdefaultextensionr   r   c              3   &   K   | ]}d d|z  z  V  dS )z%.2f%%r   Nr  )r  vs     r   r  z.RegexpChunkApp.save_grammar.<locals>.<genexpr>j  s<       ) ))*C!G$) ) ) ) ) )r   rh   zGrammar not well formedzNot finished evaluation yetr  )dater   r/  r0  r7  r   )r   r   r   r   r   openwriteSAVE_GRAMMAR_TEMPLATEdictr:  ctimer   r   r   )r   filenameftypesr/  r0  r7  outfiles          r   r   zRegexpChunkApp.save_grammara  s    	13EFF(6HUUUH = 		HT48N8NM"a 9
 9
 
 
) ).2mB.?.C) ) )%Ivvv \!*CCIC*GGIG(C   	GMM*+'!! L..00  
 
 
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   ADD	D	c                    |sddg}t          |d          }|sd S | j                            dd           |                                  t	          |          5 }|                                }d d d            n# 1 swxY w Y   t          j        dd|                                          }| j        	                    d|           |                                  d S )	Nr  r  r  r  r   r   z2^\# Regexp Chunk Parsing Grammar[\s\S]*F-score:.*
r   )
r   r   r  r   r"  readr   r   lstripr   )r   r'  r(  infiler   s        r   r   zRegexpChunkApp.load_grammar  s    	13EFF&(SSSH ue,,,(^^ 	$vkkmmG	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$&Er7
 

&(( 	 	ug...s   A;;A?A?c           	      4   |sddg}t          |d          }|sd S t          |d          5 }|                    d           |                    dt          j                    z             |                    d| j        z             t          | j                  D ]\  }\  }}}}d	|d
z   t          | j                  |dz  |dz  |dz  fz  }	|                    d|	z             |                    d	                    d |
                                                                D                                  | j        r/| j        |                     | j        d         d                   k    s| j        |                    d           n|                    d           |                    d	                    d | j        
                                                                D                                  d d d            d S # 1 swxY w Y   d S )N)zChunk Gramamr History.txtr  r/  r  r  z'# Regexp Chunk Parsing Grammar History
z# Saved %s
z# Development set: %s
z>Grammar %d/%d (precision=%.2f%%, recall=%.2f%%, fscore=%.2f%%)rh   r   z
%s
r   c              3       K   | ]	}d |z  V  
dS z  %s
Nr  r  s     r   r  z.RegexpChunkApp.save_history.<locals>.<genexpr>  s&      %T%T$ho%T%T%T%T%T%Tr   r   r   z#
Current Grammar (not well-formed)
z!
Current Grammar (not evaluated)
c              3       K   | ]	}d |z  V  
dS r1  r  r  s     r   r  z.RegexpChunkApp.save_history.<locals>.<genexpr>  s&      UUHtOUUUUUUr   )r   r"  r#  r:  r&  r   r#  r   r  r  r   r  r   r   r   r   )
r   r'  r(  r)  r.  rG  rH  rI  rJ  hdrs
             r   r   zRegexpChunkApp.save_history  se    	79KLF(6FSSSH (C   	GMMDEEEMM.4:<<7888MM3d6FFGGG#,T]#;#; V V<Aq!Q%1uc$-00!c'1s7AGLM 
 hn---bgg%T%T!''))//BSBS%T%T%TTTUUUU +))$-*;A*>??@ @ <'MM"IJJJJMM"GHHHGGUU8J8J8L8L8R8R8T8TUUUUU  -	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   GHHHc                     d}d}	 ddl m}  |||                                           d S #  t          | j        ||           Y d S xY w)Nz<NLTK RegExp Chunk Parser Application
Written by Edward Loperz2About: Regular Expression Chunk Parser Applicationr   )Message)messager   )tkinter.messageboxr5  showr   r   )r   r   ABOUTTITLEr5  s        r   r   zRegexpChunkApp.about  sp    TD	-222222GE///4466666	-TXue,,,,,,s	   %- Ac                 4   || j                             |           | j                             t          t          | j                  | j                                                              |                     d           |                     d           d S )Nrh   r   )r   r   r  r  r   r   r   r   r}   s     r   r   zRegexpChunkApp.set_devset_size  s    !!$'''c#dk"2"2D4E4I4I4K4KLLMMMr   c                 <   || j                             |           | j                                         }| j                            t          |                      | j                            t          dt          |           dz  dz                       d S )N)r}   ir   r_   )r   r   r   r   r  absr   r  r<  s     r   r   zRegexpChunkApp.resize  s    JNN4   z~~
CII,///!!s3#d))r0AR0G'H'H!IIIIIr   c                 J    t                      rdS  | j        j        |i | dS )z
        Enter the Tkinter mainloop.  This function must be called if
        this demo is created from a non-interactive program (e.g.
        from a secript); otherwise, the demo will close as soon as
        the script completes.
        N)r   r   mainloop)r   r  kwargss      r   r@  zRegexpChunkApp.mainloop  s5     99 	F4*6*****r   )r   Nr   r   Nr   )B__name__
__module____qualname____doc__r   r  r%  r  r;  r@  r=  rP  rO  r  r  r  r  _FONT_PARAMSr  r  r  _HELPTAB_BG_COLOR_HELPTAB_FG_COLORr  r  r  r   r   r   r   r   r   r   _DRAW_LINESr   r9  r>  rE  r   r   r   rd  r  r  r  r   r   r   r  r   r  r  r  r   r  rA  r  r   r$  r   r   r   r   r   r   r@  r  r   r   r   r   -   s,        .(.$. 	. 	h	.
 	l. 	$. 	!. 	$. 	n. 	j. 	k. 	d. 	'. 	n. 	'.  	!." 	 #. .$ 	!%.& 	g'.( 	~).* 	&+., 	&-.. 	+/.0 	&1.2 	$3.4 	5.6 	7.8 	l9.: 	";.< 	&=.> 	!?.@ 	{A.B 	C.D 	 E. .F +,/400[. . .Fne e eDP 
'''(	$$&)))*	ddf---.	ddT***+	ttd###$	44b1112	ttQ4445	'''(	446***+	$$&)))*L" K+ K#JCO O "	 	 	 d"
 
 
O "
 
 
 TVHQGGGN4{555LDF1EEEMd"  O TF  N 000000O	 	 	  i i i iV: : :
 
 
`$ `$ `$D   HKE" E" E"N  IJ IJ IJV  0{J {J {Jz N== == ==~ +  +  +D    '3 '3 '3R        +, +, +, +,Z
 
 
; ; ;6! ! !"1 1 1f? ? ? ?.	 	 	
 
 
  	    <       B- - -   J J J J	+ 	+ 	+ 	+ 	+r   r   c                  F    t                                                       d S r   )r   r@  r  r   r   apprK    s     r   __main__rK  )'rE  randomr   textwrapr:  tkinterr   r   r   r   r   r   r	   r
   r   r   tkinter.filedialogr   r   tkinter.fontr   
nltk.chunkr   r   nltk.chunk.regexpr   nltk.corpusr   r   nltk.draw.utilr   	nltk.treer   	nltk.utilr   r   rK  rB  __all__r  r   r   <module>rY     s     				                          B A A A A A A A       4 4 4 4 4 4 4 4 - - - - - - 1 1 1 1 1 1 1 1 # # # # # #            e+ e+ e+ e+ e+ e+ e+ e+P-      zCEEE'r   