
    NgF                     V    d dl mZ d dlZd dlZd dlmZ ddlmZ  G d de          Z	dS )    )defaultdictN)xrange   )NGramc                   p    e Zd ZdZdZ ej        d          Z ej        d          Zd
dZ	d Z
d Zd	 ZdS )LangProfile   i z
^[A-Za-z]$z.*[A-Za-z].*Nc                     t          t                    | _        || j                            |           |dgt          j        z  }|| _        || _        d S )Nr   )r   intfrequpdater   N_GRAMnamen_words)selfr   r   r   s       Y/var/www/html/ai-engine/env/lib/python3.11/site-packages/langdetect/utils/lang_profile.py__init__zLangProfile.__init__   sR    $$	IT"""?cEL(G	    c                     | j         |dS t          |          }|dk     s|t          j        k    rdS | j        |dz
  xx         dz  cc<   | j        |xx         dz  cc<   dS )zAdd n-gram to profile.Nr   )r   lenr   r   r   r   )r   gramlengths      r   addzLangProfile.add   sv    9FTA::%,..FVaZ   A%   	$1r   c                 |   | j         dS t          | j        d         | j        z  | j                  }d}t          t          j        | j                            D ]X\  }}||k    r.| j        t          |          dz
  xx         |z  cc<   | j        |= 9| j
                            |          r||z  }Y|| j        d         dz  k     rst          t          j        | j                            D ]N\  }}| j                            |          r-| j        t          |          dz
  xx         |z  cc<   | j        |= MdS dS )zAEliminate below less frequency n-grams and noise Latin alphabets.Nr   r      )r   maxr   LESS_FREQ_RATIOMINIMUM_FREQlistsix	iteritemsr   r   ROMAN_CHAR_REmatchROMAN_SUBSTR_RE)r   	thresholdromankeycounts        r   omit_less_freqzLangProfile.omit_less_freq&   sX   9FQ4+??ARSS	s}TY7788 	 	JC	!!SXXaZ(((E1(((IcNN#))#..  4<?a'''"3=#;#;<< ' '
U'--c22 'LS!,,,5,,,	#	 ('' 'r   c                    |dS t          j        |          }t                      }|D ]_}|                    |           t          dt           j        dz             D ]*}|                     |                    |                     +`dS )zUpdate the language profile with (fragmented) text.
        Extract n-grams from text and add their frequency into the profile.
        Nr   )r   normalize_viadd_charr   r   r   get)r   textr   chns        r   r   zLangProfile.update;   s     <F!$''ww 	& 	&BMM"Au|A~.. & &!%%%%&	& 	&r   )NNN)__name__
__module____qualname__r   r   recompiler"   r$   r   r   r)   r    r   r   r   r   
   s        LOBJ}--M bj11O	 	 	 	  ' ' '*& & & & &r   r   )
collectionsr   r4   r    	six.movesr   ngramr   objectr   r6   r   r   <module>r;      s    # # # # # # 				 



            <& <& <& <& <&& <& <& <& <& <&r   