
    Ng&                         d dl Z d dlmZ d dlmZ d dlmZ 	 d dlZn# e$ r Y nw xY wd dl	m
Z
  G d de
          Zd Zd	 Z G d
 d          Z G d d          ZdS )    N)abstractmethod)sqrt)stdout)ClusterIc                   l    e Zd ZdZddZddZed             Zd Zed             Z	d	 Z
d
 Zd Zd ZdS )VectorSpaceClustererz
    Abstract clusterer which takes tokens and maps them into a vector space.
    Optionally performs singular value decomposition to reduce the
    dimensionality.
    FNc                 0    d| _         || _        || _        dS )a)  
        :param normalise:       should vectors be normalised to length 1
        :type normalise:        boolean
        :param svd_dimensions:  number of dimensions to use in reducing vector
                                dimensionsionality with SVD
        :type svd_dimensions:   int
        N)_Tt_should_normalise_svd_dimensions)self	normalisesvd_dimensionss      M/var/www/html/ai-engine/env/lib/python3.11/site-packages/nltk/cluster/util.py__init__zVectorSpaceClusterer.__init__   s!     !*-    c                     t          |          dk    sJ  j        r"t          t           j        |                    } j        r j        t          |d                   k     rt          j                            t          j	        t          j
        |                              \  }}}|d  j                 t          j         j        t          j                  z  }|d d d  j        f         }|d  j        d d f         }	t          j	        t          j        ||	                    }t          j	        |           _                             ||           |r fd|D             S d S )Nr   c                 :    g | ]}                     |          S  )classify).0vectorr   s     r   
<listcomp>z0VectorSpaceClusterer.cluster.<locals>.<listcomp>?   s%    @@@fDMM&))@@@r   )lenr   listmap
_normaliser   numpylinalgsvd	transposearrayidentityfloat64dotr
   cluster_vectorspace)
r   vectorsassign_clusterstraceudvtSTDts
   `         r   clusterzVectorSpaceClusterer.cluster(   sr   7||a ! 	:3t8899G  	*D$83wqz??$J$J))%/%+g:N:N*O*OPPJQ2(D(()EN$em- - A !!!+t+++,A*d**AAA-.Boei2&6&677Gq))DH 	  %000  	A@@@@@@@@	A 	Ar   c                     dS )zD
        Finds the clusters using the given set of vectors.
        Nr   )r   r'   r)   s      r   r&   z(VectorSpaceClusterer.cluster_vectorspaceA         r   c                     | j         r|                     |          }| j        t          j        | j        |          }|                     |          }|                     |          S N)r   r   r
   r   r%   classify_vectorspacecluster_name)r   r   r0   s      r   r   zVectorSpaceClusterer.classifyG   s`    ! 	-__V,,F8Ytx00F++F33  )))r   c                     dS )zN
        Returns the index of the appropriate cluster for the vector.
        Nr   r   r   s     r   r5   z)VectorSpaceClusterer.classify_vectorspaceO   r2   r   c                     | j         r|                     |          }| j        t          j        | j        |          }|                     ||          S r4   )r   r   r
   r   r%   likelihood_vectorspace)r   r   labels      r   
likelihoodzVectorSpaceClusterer.likelihoodU   sO    ! 	-__V,,F8Ytx00F**65999r   c                 @    |                      |          }||k    rdndS )zP
        Returns the likelihood of the vector belonging to the cluster.
        g      ?g        )r5   )r   r   r0   	predicteds       r   r:   z+VectorSpaceClusterer.likelihood_vectorspace\   s+     --f55	**ss3r   c                     | j         r|                     |          }| j        t          j        | j        |          }|S )zU
        Returns the vector after normalisation and dimensionality reduction
        )r   r   r
   r   r%   r8   s     r   r   zVectorSpaceClusterer.vectorc   s@     ! 	-__V,,F8Ytx00Fr   c                 L    |t          t          j        ||                    z  S )z7
        Normalises the vector to unit length.
        r   r   r%   r8   s     r   r   zVectorSpaceClusterer._normalisem   s#     UYvv667777r   )FN)FF)__name__
__module____qualname____doc__r   r0   r   r&   r   r5   r<   r:   r   r   r   r   r   r   r      s         
. 
. 
. 
.A A A A2   ^
* * *   ^
: : :4 4 4  8 8 8 8 8r   r   c                 P    | |z
  }t          t          j        ||                    S )z}
    Returns the euclidean distance between vectors u and v. This is equivalent
    to the length of the vector (u - v).
    rA   )r*   vdiffs      r   euclidean_distancerI   t   s'    
 q5D	$%%&&&r   c           	          dt          j        | |          t          t          j        | |                     t          t          j        ||                    z  z  z
  S )zw
    Returns 1 minus the cosine of the angle between vectors v and u. This is
    equal to ``1 - (u.v / |u||v|)``.
       )r   r%   r   )r*   rG   s     r   cosine_distancerL   }   sG    
 	!Q4	!Q#8#84	!Q;P;P#PQRRr   c                   ,    e Zd ZdZd ZddZd Zd ZdS )	_DendrogramNodezTree node of a dendrogram.c                 "    || _         || _        d S r4   )_value	_children)r   valuechildrens      r   r   z_DendrogramNode.__init__   s    !r   Tc                     | j         r6g }| j         D ]*}|                    |                    |                     +|S |r| j        gS | gS r4   )rQ   extendleavesrP   )r   valuesrV   childs       r   rV   z_DendrogramNode.leaves   sa    > 	F 4 4ell6223333M 	K= 6Mr   c                    | j         | fg}t          |          |k     r|                                \  }}|j        s|                    ||f           nl|j        D ]=}|j        r|                    |j         |f           &|                    d|f           >|                                 t          |          |k     g }|D ],\  }}|                    |                                           -|S Nr   )rP   r   poprQ   pushappendsortrV   )r   nqueueprioritynoderX   groupss          r   rc   z_DendrogramNode.groups   s    +t$%%jj1nn"YY[[NHd> 

Hd+,,, - -? -LL%,!67777LL!U,,,,JJLLL %jj1nn # 	) 	)NHdMM$++--((((r   c                 >    t          | j        |j                  dk     S rZ   )rL   rP   )r   
comparators     r   __lt__z_DendrogramNode.__lt__   s    t{J,=>>BBr   N)T)rB   rC   rD   rE   r   rV   rc   rf   r   r   r   rN   rN      s`        $$" " "	 	 	 	  *C C C C Cr   rN   c                   8    e Zd ZdZg fdZd Zd Zg fdZd ZdS )
Dendrograma  
    Represents a dendrogram, a tree with a specified branching order.  This
    must be initialised with the leaf items, then iteratively call merge for
    each branch. This class constructs a tree representing the order of calls
    to the merge function.
    c                 r    d |D             | _         t          j        | j                   | _        d| _        dS )zs
        :param  items: the items at the leaves of the dendrogram
        :type   items: sequence of (any)
        c                 ,    g | ]}t          |          S r   )rN   )r   items     r   r   z'Dendrogram.__init__.<locals>.<listcomp>   s     ???t,,???r   rK   N)_itemscopy_original_items_merge)r   itemss     r   r   zDendrogram.__init__   s9    
 @????#y55r   c                      t          |          dk    sJ t           j        g fd|D             R  } xj        dz  c_        | j        |d         <   |dd         D ]
} j        |= dS )a=  
        Merges nodes at given indices in the dendrogram. The nodes will be
        combined which then replaces the first node specified. All other nodes
        involved in the merge will be removed.

        :param  indices: indices of the items to merge (at least two)
        :type   indices: seq of int
           c              3   2   K   | ]}j         |         V  d S r4   )rl   )r   ir   s     r   	<genexpr>z#Dendrogram.merge.<locals>.<genexpr>   s)      -N-Ndk!n-N-N-N-N-N-Nr   rK   r   N)r   rN   ro   rl   )r   indicesrb   rt   s   `   r   mergezDendrogram.merge   s     7||q    t{O-N-N-N-Ng-N-N-NOOOq"&GAJ 	 	AA	 	r   c                     t          | j                  dk    rt          | j        g| j        R  }n| j        d         }|                    |          S )z
        Finds the n-groups of items (leaves) reachable from a cut at depth n.
        :param  n: number of groups
        :type   n: int
        rK   r   )r   rl   rN   ro   rc   )r   r_   roots      r   rc   zDendrogram.groups   sO     t{a"4;====DD;q>D{{1~~r   c                 B   d\  }}}t          | j                  dk    rt          | j        g| j        R  }n| j        d         }| j        }|r|}nd |D             }t          t          t           |                    dz   dz  t          z
  dz
            dfd	d }|j        |fg}	fd	|D             }
|	r|		                                \  }}t          t          d
 |j                            }t          t          |j        |                    }|rt          |          }t          |          }t          t          |                    D ]}||         |v r_||k    r | |d|                     n3||k    r | ||d                     n | |||                      |          |
|<   k||cxk    r|k    rn n | |||                      ||
|                     |d           |j        D ]%}|j        r|	                    |j        |f           &|	                                 |
D ]} ||            |d           |	 |d                    fd|D                                   |d           dS )z
        Print the dendrogram in ASCII art to standard out.

        :param leaf_labels: an optional list of strings to use for labeling the
                            leaves
        :type leaf_labels: list
        )+-|rK   r   c                 "    g | ]}d |j         z  S )z%s)rP   )r   leafs     r   r   z#Dendrogram.show.<locals>.<listcomp>   s    >>>tt{*>>>r   rr    c                      |z   |  |z   S r4   r   )centreleftrightlhalfrhalfs      r   formatzDendrogram.show.<locals>.format   s!    dl;F;EEM;;;r   c                 .    t          j        |            d S r4   )r   write)strs    r   displayz Dendrogram.show.<locals>.display   s    Lr   c                 &    g | ]} d           S )r   r   )r   r   r   s     r   r   z#Dendrogram.show.<locals>.<listcomp>  s!    444TVVC[[444r   c                 8    |                      d          d         S )NFr   )rV   )cs    r   <lambda>z!Dendrogram.show.<locals>.<lambda>  s    %1C r   
 c              3   B   K   | ]}|                               V  d S r4   )center)r   rk   widths     r   ru   z"Dendrogram.show.<locals>.<genexpr>#  s/      @@tE**@@@@@@r   N)r   r   )r   rl   rN   ro   rn   maxr   intrP   r[   r   rQ   indexminranger]   r^   join)r   leaf_labelsJOINHLINKVLINKry   rV   last_rowr   r`   	verticalsra   rb   child_left_leafrv   min_idxmax_idxrt   rX   verticalr   r   r   r   s                       @@@@r   showzDendrogram.show   sX    +eU t{a"4;====DD;q>D% 	?"HH>>v>>>H CX&&''!+
EEMA%&&	< 	< 	< 	< 	< 	< 	<	 	 	 +t$%4444V444	 	"YY[[NHd"3'C'CT^#T#TUUO3v|_==>>G 'g,,g,,3v;;'' * *!9//G||tS% 8 89999gtUC 8 89999tUE : :;;;#)6%==IaLL,,,,W,,,,,GFF5%778888GIaL))))GDMMM 8 8? 8LL%,!6777JJLLL% " "!!!!GDMMM9  	> 	@@@@x@@@@@AAAr   c                     t          | j                  dk    rt          | j        g| j        R  }n| j        d         }|                    d          }dt          |          z  S )NrK   r   Fz<Dendrogram with %d leaves>)r   rl   rN   ro   rV   )r   ry   rV   s      r   __repr__zDendrogram.__repr__&  s_    t{a"4;====DD;q>DU##,s6{{::r   N)	rB   rC   rD   rE   r   rw   rc   r   r   r   r   r   rh   rh      s                  
 
 
  " G G G GR; ; ; ; ;r   rh   )rm   abcr   mathr   sysr   r   ImportErrornltk.cluster.apir   r   rI   rL   rN   rh   r   r   r   <module>r      sK                     	LLLL 	 	 	D	 & % % % % %\8 \8 \8 \8 \88 \8 \8 \8~' ' 'S S S(C (C (C (C (C (C (C (CV|; |; |; |; |; |; |; |; |; |;s    %%