
    Ng.                         d dl Z d dlmZ d dlmZ d dlmZ d dlmZ  G d de j	                  Z
 G d d	e j	                  ZdS )
    N)closing)data)PorterStemmer)SnowballStemmerc                   ,    e Zd Zd Zd Zd Zd Zd ZdS )SnowballTestc                    t          dd          }|                    d          dk    sJ |                    d          dk    sJ |                    d          dk    sJ |                    d          d	k    sJ |                    d
          d	k    sJ |                    d          d	k    sJ |                    d          d	k    sJ |                    d          dk    sJ |                    d          dk    sJ t          dd          }|                    d          dk    sJ |                    d          d	k    sJ |                    d          dk    sJ t          d          }|                    d          dk    sJ |                    d          dk    sJ |                    d          dk    sJ |                    d          d	k    sJ |                    d          dk    sJ dS )z
        this unit testing for test the snowball arabic light stemmer
        this stemmer deals with prefixes and suffixes
        arabicTu&   الْعَرَبِــــــيَّةu   عربu   العربيةu   فقالواu   قالu   الطالباتu   طالبu   فالطالباتu   والطالباتu   الطالبونu   اللذانu   منFu   اللذu   الكلماتu   كلمNr   stem)self
ar_stemmers     T/var/www/html/ai-engine/env/lib/python3.11/site-packages/nltk/test/unit/test_stem.pytest_arabiczSnowballTest.test_arabic
   s.    %Xt44
GHHHTTTT/00H<<<<~..(::::122j@@@@344
BBBB344
BBBB122j@@@@~...@@@@v&&&0000$Xu55
~..*<<<<122j@@@@/00H<<<<$X..
GHHHTTTT/00H<<<<~..(::::122j@@@@/00H<<<<<<    c                 Z    t          d          }|                    d          dk    sJ d S )Nrussianu   авантненькаяu   авантненькr   )r   stemmer_russians     r   test_russianzSnowballTest.test_russian'   s6    ))44##$>??CYYYYYYYr   c                    t          d          }t          dd          }|                    d          dk    sJ |                    d          dk    sJ |                    d          dk    sJ |                    d          dk    sJ d S )NgermanT)ignore_stopwordsu	   Schränkeschrankkeinenkeinr   )r   stemmer_germanstemmer_german2s      r   test_germanzSnowballTest.test_german+   s    (22)(TJJJ""=11Y>>>>##M22i????""8,,6666##H--999999r   c                     t          d          }|                    d          dk    sJ |                    d          dk    sJ d S )Nspanish	Visionadovisionalguealgur   r   stemmers     r   test_spanishzSnowballTest.test_spanish5   sQ    !),,||K((H4444 ||G$$......r   c                 Z    t          d          }|                    d          dk    sJ d S )Nenglishzy'syr   r%   s     r   test_short_strings_bugz#SnowballTest.test_short_strings_bug=   s2    !),,||E""c))))))r   N)__name__
__module____qualname__r   r   r   r'   r+    r   r   r   r   	   sb        = = =:Z Z Z: : :/ / /* * * * *r   r   c                   8    e Zd Zd Zd Zd Zd Zd Zd Zd Z	dS )	
PorterTestc                     t          t          j        d                              d                    5 }|                                                                cd d d            S # 1 swxY w Y   d S )Nz*stemmers/porter_test/porter_vocabulary.txtutf-8encoding)r   r   findopenread
splitlinesr   fps     r   _vocabularyzPorterTest._vocabularyC   s    IBCCHH  I  
 
 	* 7799''))	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	*s   &A))A-0A-c                     t          |          }t          |                                 |          D ]@\  }}|                    |          }||k    s J d                    ||||                      Ad S )N)modez*{} should stem to {} in {} mode but got {})r   zipr<   r   format)r   stemmer_modeexpected_stemsr&   word	true_stemour_stems          r   _test_against_expected_outputz(PorterTest._test_against_expected_outputK   s    \222"4#3#3#5#5~FF 		 		OD)||D))HI%%%;BB	  &%%%		 		r   c                 *   t          t          j        d                              d                    5 }|                     t
          j        |                                                                           ddd           dS # 1 swxY w Y   dS )az  Tests all words from the test vocabulary provided by M Porter

        The sample vocabulary and output were sourced from
        https://tartarus.org/martin/PorterStemmer/voc.txt and
        https://tartarus.org/martin/PorterStemmer/output.txt
        and are linked to from the Porter Stemmer algorithm's homepage
        at https://tartarus.org/martin/PorterStemmer/
        z-stemmers/porter_test/porter_martin_output.txtr3   r4   N)	r   r   r6   r7   rF   r   MARTIN_EXTENSIONSr8   r9   r:   s     r   test_vocabulary_martin_modez&PorterTest.test_vocabulary_martin_modeX   s     IEFFKK  L  
 
 	 ../1E1E1G1G  	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	   ABBBc                 *   t          t          j        d                              d                    5 }|                     t
          j        |                                                                           d d d            d S # 1 swxY w Y   d S )Nz+stemmers/porter_test/porter_nltk_output.txtr3   r4   )	r   r   r6   r7   rF   r   NLTK_EXTENSIONSr8   r9   r:   s     r   test_vocabulary_nltk_modez$PorterTest.test_vocabulary_nltk_modej   s    ICDDII  J  
 
 	 ..-rwwyy/C/C/E/E  	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	rJ   c                    t          t          j        d                              d                    5 }|                     t
          j        |                                                                           d d d            n# 1 swxY w Y   |                     t
          j        t          j        d                              d                                                                                     d S )Nz/stemmers/porter_test/porter_original_output.txtr3   r4   )	r   r   r6   r7   rF   r   ORIGINAL_ALGORITHMr8   r9   r:   s     r   test_vocabulary_original_modez(PorterTest.test_vocabulary_original_modet   s    IGHHMM  N  
 
 	 ..0"''))2F2F2H2H  	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	**,IGHHT7T##TVVZ\\	
 	
 	
 	
 	
s   ABBBc                 T    t                                          d          dk    sJ dS )zTest for bug https://github.com/nltk/nltk/issues/1581

        Ensures that 'oed' can be stemmed without throwing an error.
        oedoNr   r   )r   s    r   test_oed_bugzPorterTest.test_oed_bug   s-    
 ##E**c111111r   c                 8   t                      }|                    d          dk    sJ |                    d          dk    sJ |                    dd          dk    sJ |                    d          dk    sJ |                    dd          dk    sJ d	S )
zTest for improvement on https://github.com/nltk/nltk/issues/2507

        Ensures that stems are lowercased when `to_lowercase=True`
        OnonIiF)to_lowercaseGithubgithubNrT   )r   porters     r   test_lowercase_optionz PorterTest.test_lowercase_option   s    
 {{4  D(((({{33&&&&{{3U{33s::::{{8$$0000{{8%{88HDDDDDDr   N)
r,   r-   r.   r<   rF   rI   rM   rP   rU   r_   r/   r   r   r1   r1   B   s        * * *    $  
 
 
02 2 2
E 
E 
E 
E 
Er   r1   )unittest
contextlibr   nltkr   nltk.stem.porterr   nltk.stem.snowballr   TestCaser   r1   r/   r   r   <module>rf      s                 * * * * * * . . . . . .6* 6* 6* 6* 6*8$ 6* 6* 6*r[E [E [E [E [E" [E [E [E [E [Er   