
    Ng/                     :    d Z ddlZddlmZ  G d de          ZdS )z
A word stemmer based on the Lancaster (Paice/Husk) stemming algorithm.
Paice, Chris D. "Another Stemmer." ACM SIGIR Forum 24.3 (1990): 56-61.
    N)StemmerIc                   P    e Zd ZdZdZddZddZd Zd Zd	 Z	d
 Z
d Zd Zd ZdS )LancasterStemmera/  
    Lancaster Stemmer

        >>> from nltk.stem.lancaster import LancasterStemmer
        >>> st = LancasterStemmer()
        >>> st.stem('maximum')     # Remove "-um" when word is intact
        'maxim'
        >>> st.stem('presumably')  # Don't remove "-um" when word is not intact
        'presum'
        >>> st.stem('multiply')    # No action taken if word ends with "-ply"
        'multiply'
        >>> st.stem('provision')   # Replace "-sion" with "-j" to trigger "j" set of rules
        'provid'
        >>> st.stem('owed')        # Word starting with vowel must contain at least 2 letters
        'ow'
        >>> st.stem('ear')         # ditto
        'ear'
        >>> st.stem('saying')      # Words starting with consonant must contain at least 3
        'say'
        >>> st.stem('crying')      #     letters and one of those letters must be a vowel
        'cry'
        >>> st.stem('string')      # ditto
        'string'
        >>> st.stem('meant')       # ditto
        'meant'
        >>> st.stem('cement')      # ditto
        'cem'
        >>> st_pre = LancasterStemmer(strip_prefix_flag=True)
        >>> st_pre.stem('kilometer') # Test Prefix
        'met'
        >>> st_custom = LancasterStemmer(rule_tuple=("ssen4>", "s1t."))
        >>> st_custom.stem("ness") # Change s to t
        'nest'
    )szai*2.za*1.zbb1.zcity3s.zci2>zcn1t>zdd1.zdei3y>zdeec2ss.zdee1.zde2>zdooh4>ze1>zfeil1v.zfi2>zgni3>zgai3y.zga2>zgg1.zht*2.z	hsiug5ct.zhsi3>zi*1.zi1y>zji1d.zjuf1s.zju1d.zjo1d.zjeh1r.zjrev1t.zjsim2t.zjn1d.zj1s.zlbaifi6.zlbai4y.zlba3>zlbi3.zlib2l>zlc1.zlufi4y.zluf3>zlu2.zlai3>zlau3>zla2>zll1.zmui3.zmu*2.zmsi3>zmm1.znois4j>znoix4ct.znoi3>znai3>zna2>znee0.zne2>znn1.zpihs4>zpp1.zre2>zrae0.zra2.zro2>zru2>zrr1.zrt1>zrei3y>zsei3y>zsis2.zsi2>zssen4>zss0.zsuo3>zsu*2.zs*1>zs0.z	tacilp4y.zta2>ztnem4>ztne3>ztna3>ztpir2b.ztpro2b.ztcud1.ztpmus2.ztpec2iv.ztulo2v.ztsis0.ztsi3>ztt1.zuqi3.zugo1.zvis3j>zvie0.zvi2>zylb1>zyli3y>zylp0.zyl2>zygo1.zyhp1.zymo1.zypo1.zyti3>zyte3>zytl2.zyrtsi5.zyra3>zyro3>zyfi3.zycn2t>zyca3>zzi2>zzy1s.NFc                 B    i | _         || _        |r|n| j        | _        dS )z,Create an instance of the Lancaster stemmer.N)rule_dictionary_strip_prefixdefault_rule_tuple_rule_tuple)self
rule_tuplestrip_prefix_flags      O/var/www/html/ai-engine/env/lib/python3.11/site-packages/nltk/stem/lancaster.py__init__zLancasterStemmer.__init__   s.      ".)3P::9P    c                 *   |r|n| j         }t          j        d          }i | _        |D ]i}|                    |          st          d| d          |dd         }|| j        v r!| j        |                             |           ^|g| j        |<   jdS )a(  Validate the set of rules used in this stemmer.

        If this function is called as an individual method, without using stem
        method, rule_tuple argument will be compiled into self.rule_dictionary.
        If this function is called within stem, self._rule_tuple will be used.

        z^[a-z]+\*?\d[a-z]*[>\.]?$z	The rule z is invalidr      N)r
   recompiler   match
ValueErrorappend)r   r   
valid_rulerulefirst_letters        r   
parseRuleszLancasterStemmer.parseRules   s     $.CZZ43C
Z <==
! 	< 	<D##D)) @ !>T!>!>!>???!9Lt333$\299$????6:V$\22	< 	<r   c                     |                                 }| j        r|                     |          n|}|}| j        s|                                  |                     ||          S )z(Stem a word using the Lancaster stemmer.)lowerr   _LancasterStemmer__stripPrefixr   r   _LancasterStemmer__doStemming)r   wordintact_words      r   stemzLancasterStemmer.stem   sk     zz||+/+=Gt!!$'''4  # 	OO  {333r   c                    t          j        d          }d}|r)|                     |          }|dk     s||         | j        vrd}nd}| j        ||                  D ]}|                    |          }|r|                                \  }	}
}}}t          |          }|                    |	ddd                   r{|
r@||k    r9|                     ||          r#| 	                    |||          }d}|dk    rd} n;|                     ||          r#| 	                    |||          }d}|dk    rd} n|dk    rd}|)|S )z Perform the actual word stemmingz#^([a-z]+)(\*?)(\d)([a-z]*)([>\.]?)$Tr   FN.)
r   r    _LancasterStemmer__getLastLetterr   r   groupsintendswith_LancasterStemmer__isAcceptable_LancasterStemmer__applyRule)r   r    r!   r   proceedlast_letter_positionrule_was_appliedr   
rule_matchending_stringintact_flagremove_totalappend_string	cont_flags                 r   __doStemmingzLancasterStemmer.__doStemming   s    Z FGG
 5	$#'#7#7#=#=  %q((,-T5III $)  !06J1KL #& #&D!+!1!1$!7!7J! !& '--//)'()%
 (+<'8'8  ==ttt)<== &* &#';#6#64;N;N$(,<" <"#6 ,0+;+;(,lM,& ,&D 8<$4'0C'7'727$)E!%!4!4T<!H!H &'+'7'7$(,(" (" 48 0#,#3#3.3G %#u,,#Gk  5	$l r   c                     d}t          t          |                    D ]}||                                         r|} |S )zHGet the zero-based index of the last alphabetic character in this stringr$   )rangelenisalpha)r   r    last_letterpositions       r   __getLastLetterz LancasterStemmer.__getLastLetter  sL    c$ii(( 	 	HH~%%'' &r   c                     d}|d         dv rt          |          |z
  dk    rd}n/t          |          |z
  dk    r|d         dv rd}n|d         dv rd}|S )z1Determine if the word is acceptable for stemming.Fr   aeiouy   T   r   r8   )r   r    r2   word_is_acceptables       r   __isAcceptablezLancasterStemmer.__isAcceptable$  s    " 7h4yy<'1,,%)" YY%**Aw(""%)""aH$$%)"!!r   c                 L    t          |          |z
  }|d|         }|r||z  }|S )z#Apply the stemming rule to the wordr   rA   )r   r    r2   r3   new_word_lengths        r   __applyRulezLancasterStemmer.__applyRule5  s<     d))l2Ao%&  	"M!Dr   c                 l    dD ]0}|                     |          r|t          |          d         c S 1|S )zYRemove prefix from a word.

        This function originally taken from Whoosh.

        )	kilomicromilliintraultramegananopicopseudoN)
startswithr8   )r   r    prefixs      r   __stripPrefixzLancasterStemmer.__stripPrefix@  sN    

 	+ 	+F v&& +CKKMM****+r   c                     dS )Nz<LancasterStemmer> )r   s    r   __repr__zLancasterStemmer.__repr__U  s    ##r   )NF)N)__name__
__module____qualname____doc__r	   r   r   r"   r   r&   r*   r+   r   rV   rU   r   r   r   r      s        ! !HtlQ Q Q Q< < < <.4 4 4= = =~  " " ""	 	 	  *$ $ $ $ $r   r   )rZ   r   nltk.stem.apir   r   rU   r   r   <module>r\      sj     
			 " " " " " "E$ E$ E$ E$ E$x E$ E$ E$ E$ E$r   