
    NgS0                        U d Z ddlZddlZddlZddlmZmZ ddlmZm	Z	m
Z
mZmZmZmZmZ ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ dZdZ G d de          Z G d de          Z G d de          Z  G d de          Z!ee e!dZ"e	e#ee         f         e$d<    G d de%          Z& G d de          Z'dS )zWrapper around scikit-learn NearestNeighbors implementation.

The vector store can be persisted in json, bson or parquet format.
    N)ABCabstractmethod)AnyDictIterableListLiteralOptionalTupleType)uuid4)Document)
Embeddings)guard_import)VectorStore)maximal_marginal_relevance      c                       e Zd ZdZdeddfdZeedefd                        Zede	ddfd            Z
ede	fd	            ZdS )
BaseSerializerz Base class for serializing data.persist_pathreturnNc                     || _         d S Nr   )selfr   s     d/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/vectorstores/sklearn.py__init__zBaseSerializer.__init__   s    (    c                     dS )z>The file extension suggested by this serializer (without dot).N clss    r   	extensionzBaseSerializer.extension         r   datac                     dS )z"Saves the data to the persist_pathNr!   r   r&   s     r   savezBaseSerializer.save#   r%   r   c                     dS )z$Loads the data from the persist_pathNr!   r   s    r   loadzBaseSerializer.load'   r%   r   )__name__
__module____qualname____doc__strr   classmethodr   r$   r   r)   r,   r!   r   r   r   r      s        **)S )T ) ) ) ) M# M M M ^ [M 1 1 1 1 1 ^1 3c 3 3 3 ^3 3 3r   r   c                   J    e Zd ZdZedefd            ZdeddfdZdefdZ	dS )JsonSerializerzKSerialize data in JSON using the json package from python standard library.r   c                     dS )Njsonr!   r"   s    r   r$   zJsonSerializer.extension/       vr   r&   Nc                     t          | j        d          5 }t          j        ||           d d d            d S # 1 swxY w Y   d S )Nw)openr   r6   dumpr   r&   fps      r   r)   zJsonSerializer.save3   s    $#S)) 	 RIdB	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 s   9= =c                     t          | j        d          5 }t          j        |          cd d d            S # 1 swxY w Y   d S )Nr)r:   r   r6   r,   r   r=   s     r   r,   zJsonSerializer.load7   s    $#S)) 	!R9R==	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	!s   7;;)
r-   r.   r/   r0   r2   r1   r$   r   r)   r,   r!   r   r   r4   r4   ,   s|        UU#    [           !c ! ! ! ! ! !r   r4   c                   d     e Zd ZdZdeddf fdZedefd            ZdeddfdZ	defd	Z
 xZS )
BsonSerializerz>Serialize data in Binary JSON using the `bson` python package.r   r   Nc                 r    t                                          |           t          d          | _        d S Nbson)superr   r   rE   r   r   	__class__s     r   r   zBsonSerializer.__init__?   s.    &&& ((			r   c                     dS rD   r!   r"   s    r   r$   zBsonSerializer.extensionC   r7   r   r&   c                     t          | j        d          5 }|                    | j                            |                     d d d            d S # 1 swxY w Y   d S )Nwb)r:   r   writerE   dumpsr<   s      r   r)   zBsonSerializer.saveG   s    $#T** 	,bHHTY__T**+++	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	,s   .AAAc                     t          | j        d          5 }| j                            |                                          cd d d            S # 1 swxY w Y   d S )Nrb)r:   r   rE   loadsreadr@   s     r   r,   zBsonSerializer.loadK   s    $#T** 	.b9??27799--	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	.s   ,AAAr-   r.   r/   r0   r1   r   r2   r$   r   r)   r,   __classcell__rH   s   @r   rB   rB   <   s        HH)S )T ) ) ) ) ) ) #    [, , , , , ,.c . . . . . . . .r   rB   c                   d     e Zd ZdZdeddf fdZedefd            ZdeddfdZ	defd	Z
 xZS )
ParquetSerializerzFSerialize data in `Apache Parquet` format using the `pyarrow` package.r   r   Nc                     t                                          |           t          d          | _        t          d          | _        t          d          | _        d S )Npandaspyarrowzpyarrow.parquet)rF   r   r   pdpapqrG   s     r   r   zParquetSerializer.__init__S   sM    &&&x((y))011r   c                     dS )Nparquetr!   r"   s    r   r$   zParquetSerializer.extensionY   s    yr   r&   c                 ,   | j                             |          }| j        j                            |          }t
          j                            | j                  rt          | j                  dz   }t          j
        | j        |           	 | j                            || j                   t          j        |           d S # t          $ r!}t          j
        || j                   |d }~ww xY w| j                            || j                   d S )Nz-backup)rZ   	DataFramer[   Tablefrom_pandasospathexistsr   r1   renamer\   write_tableremove	Exception)r   r&   dftablebackup_pathexcs         r   r)   zParquetSerializer.save]   s   Wt$$))"--7>>$+,, 	:d/009<KId'555'##E4+<===
 	+&&&&&	    	+t'8999	 Gt'899999s    C 
C1C,,C1c                     | j                             | j                  }|                                }d |                                D             S )Nc                 >    i | ]\  }}||                                 S r!   )tolist).0colseriess      r   
<dictcomp>z*ParquetSerializer.load.<locals>.<dictcomp>p   s&    CCCfV]]__CCCr   )r\   
read_tabler   	to_pandasitems)r   rk   rj   s      r   r,   zParquetSerializer.loadm   sE    ""4#455__CC

CCCCr   rR   rT   s   @r   rV   rV   P   s        PP2S 2T 2 2 2 2 2 2 #    [: : : : : : Dc D D D D D D D Dr   rV   r6   rE   r^   SERIALIZER_MAPc                       e Zd ZdZdS )SKLearnVectorStoreExceptionz'Exception raised by SKLearnVectorStore.N)r-   r.   r/   r0   r!   r   r   r{   r{   z   s        11Dr   r{   c                      e Zd ZdZdddddedee         ded	         d
ededdfdZ	e
defd            Zd$dZd$dZ	 	 d%dee         deee                  deee                  dedee         f
dZd$dZeddee         dededeeeef                  fdZeddedededeeeef                  fdZefdedededee         fdZefdedededeeeef                  fdZeedfdee         deded ededee         fd!Zeedfdededed ededee         fd"Ze	 	 	 d&dee         dedeee                  deee                  dee         dedd fd#            Z dS )'SKLearnVectorStorezYSimple in-memory vector store based on the `scikit-learn` library
    `NearestNeighbors`.Nr6   cosine)r   
serializermetric	embeddingr   r   rx   r   kwargsr   c                   t          d          }t          dd          }|| _         |j        dd|i|| _        d| _        || _        || _        d | _        | j        #t          |         } || j                  | _        g | _	        g | _
        g | _        g | _        |                    g           | _        | j        :t          j                            | j                  r|                                  d S d S d S )	Nnumpyzsklearn.neighborszscikit-learn)pip_namer   Fr   r!   )r   _npNearestNeighbors
_neighbors_neighbors_fitted_embedding_function_persist_path_serializerry   _embeddings_texts
_metadatas_idsasarray_embeddings_nprc   rd   isfile_load)	r   r   r   r   r   r   npsklearn_neighborsserializer_clss	            r   r   zSKLearnVectorStore.__init__   s
    '""()<~VVV <+<UUFUfUU!&#, )59)+J7N-~4;MNNND /1!#&(!	 $&::b>>)bgnnT=O.P.P)JJLLLLL *)))r   c                     | j         S r   )r   r+   s    r   
embeddingszSKLearnVectorStore.embeddings   s    ''r   c                     | j         t          d          | j        | j        | j        | j        d}| j                             |           d S )NzFYou must specify a persist_path on creation to persist the collection.)idstexts	metadatasr   )r   r{   r   r   r   r   r)   r(   s     r   persistzSKLearnVectorStore.persist   sd    #-  
 9[*	
 
 	d#####r   c                     | j         t          d          | j                                         }|d         | _        |d         | _        |d         | _        |d         | _        |                                  d S )NzCYou must specify a persist_path on creation to load the collection.r   r   r   r   )r   r{   r,   r   r   r   r   _update_neighborsr(   s     r   r   zSKLearnVectorStore._load   s}    #-X   $$&&-7m{+K	     r   r   r   r   c                    t          |          }|pd |D             }| j                            |           | j                            | j                            |                     | j                            |pi gt          |          z             | j                            |           | 	                                 |S )Nc                 D    g | ]}t          t                                S r!   )r1   r   )rq   _s     r   
<listcomp>z0SKLearnVectorStore.add_texts.<locals>.<listcomp>   s"    444s577||444r   )
listr   extendr   r   embed_documentsr   lenr   r   )r   r   r   r   r   r   r   s          r   	add_textszSKLearnVectorStore.add_texts   s     e444V4446""" 8 H H P PQQQy@bTCKK-?AAA	   r   c                     t          | j                  dk    rt          d          | j                            | j                  | _        | j                            | j                   d| _        d S )Nr   (No data was added to SKLearnVectorStore.T)	r   r   r{   r   r   r   r   fitr   r+   s    r   r   z$SKLearnVectorStore._update_neighbors   sq    t  A%%-:   #h..t/?@@D/000!%r   )kquery_embeddingr   c                    | j         st          d          | j                            |g|          \  }}t	          t          |d         |d                             S )zgSearch k embeddings similar to the query embedding. Returns a list of
        (index, distance) tuples.r   )n_neighborsr   )r   r{   r   
kneighborsr   zip)r   r   r   r   neigh_dists
neigh_idxss         r   #_similarity_index_search_with_scorez6SKLearnVectorStore._similarity_index_search_with_score   sq    
 % 	-:   #'/"<"<1 #= #
 #
Z C
1{1~66777r   queryc                t      j                             |          }  j        |fd|i|} fd|D             S )Nr   c                     g | ]=\  }}t          j        |         d j        |         ij        |                   |f>S id)page_contentmetadatar   r   r   r   )rq   idxdistr   s      r   r   zCSKLearnVectorStore.similarity_search_with_score.<locals>.<listcomp>   sl     	
 	
 	
 T !%S!1"DIcNKdoc6JK   	
 	
 	
r   )r   embed_queryr   )r   r   r   r   r   indices_distss   `     r   similarity_search_with_scorez/SKLearnVectorStore.similarity_search_with_score   st     2>>uEE@@
 
 
$*
 
	
 	
 	
 	
 +	
 	
 	
 		
r   c                 :     | j         |fd|i|}d |D             S )Nr   c                     g | ]\  }}|S r!   r!   )rq   docr   s      r   r   z8SKLearnVectorStore.similarity_search.<locals>.<listcomp>  s    ...Q...r   )r   )r   r   r   r   docs_scoress        r   similarity_searchz$SKLearnVectorStore.similarity_search   s7     8d7MMMfMM..+....r   c                      | j         |fd|i|}t          | \  }}d |D             }t          t          t          |          |                    S )Nr   c                 <    g | ]}d t          j        |          z  S )   )mathexp)rq   r   s     r   r   zOSKLearnVectorStore._similarity_search_with_relevance_scores.<locals>.<listcomp>	  s%    777!dhtnn$777r   )r   r   r   )r   r   r   r   
docs_distsdocsdistsscoress           r   (_similarity_search_with_relevance_scoresz;SKLearnVectorStore._similarity_search_with_relevance_scores  sb     7T6uLLLVLL
:&e77777CT

F++,,,r   g      ?fetch_klambda_multc                        j         |fd|i|}t          | \  } j        f         }t           j                            | j        j                  |||          }	fd|	D             }
 fd|
D             S )a  Return docs selected using the maximal marginal relevance.
        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.
        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
        Returns:
            List of Documents selected by maximal marginal relevance.
        r   )dtype)r   r   c                      g | ]
}|         S r!   r!   )rq   iindicess     r   r   zNSKLearnVectorStore.max_marginal_relevance_search_by_vector.<locals>.<listcomp>-  s    888awqz888r   c                 |    g | ]8}t          j        |         d j        |         ij        |                   9S r   r   )rq   r   r   s     r   r   zNSKLearnVectorStore.max_marginal_relevance_search_by_vector.<locals>.<listcomp>.  s^     
 
 

 	 ![-	#G$/#2FG  
 
 
r   )r   r   r   r   r   arrayfloat32)r   r   r   r   r   r   r   r   result_embeddingsmmr_selectedmmr_indicesr   s   `          @r   'max_marginal_relevance_search_by_vectorz:SKLearnVectorStore.max_marginal_relevance_search_by_vector  s    , A@
 
 
$*
 
 -(
 /91HNN9DH,<N==#	
 
 
 9888<888
 
 
 

 #
 
 
 	
r   c                     | j         t          d          | j                             |          }|                     ||||          }|S )a  Return docs selected using the maximal marginal relevance.
        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.
        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
        Returns:
            List of Documents selected by maximal marginal relevance.
        NzCFor MMR search, you must specify an embedding function on creation.)
lambda_mul)r   
ValueErrorr   r   )r   r   r   r   r   r   r   r   s           r   max_marginal_relevance_searchz0SKLearnVectorStore.max_marginal_relevance_search6  sd    , #+U   ,88??	;;q'k < 
 
 r   c                 T    t          |fd|i|}|                    |||           |S )Nr   )r   r   )r}   r   )r#   r   r   r   r   r   r   vss           r   
from_textszSKLearnVectorStore.from_textsW  s;      	OOOOO
UiS999	r   )r   N)NN)NNN)!r-   r.   r/   r0   r   r
   r1   r	   r   r   propertyr   r   r   r   r   dictr   r   	DEFAULT_Kfloatintr   r   r   r   r   r   DEFAULT_FETCH_Kr   r   r2   r   r!   r   r   r}   r}      s         '+9?! ! !! sm	!
 56! ! ! 
! ! ! !F (J ( ( ( X($ $ $ $
! 
! 
! 
! +/#'	 } DJ' d3i 	
  
c    & & & & 9B8 8 8#E{8258MP8	eCJ	 8 8 8 8 '0
 
 

 #
;>
	eHeO$	%
 
 
 
& $-/ // /8;/	h/ / / / $-- -- -8;-	eHeO$	%- - - - & (
 (
;(
 (
 	(

 (
 (
 
h(
 (
 (
 (
Z &    	
   
h   B 
 +/#'&* Cy  DJ'	
 d3i  sm  
   [  r   r}   )(r0   r6   r   rc   abcr   r   typingr   r   r   r   r	   r
   r   r   uuidr   langchain_core.documentsr   langchain_core.embeddingsr   langchain_core.utilsr   langchain_core.vectorstoresr   &langchain_community.vectorstores.utilsr   r   r   r   r4   rB   rV   ry   r1   __annotations__RuntimeErrorr{   r}   r!   r   r   <module>r      sK    
   				 # # # # # # # # L L L L L L L L L L L L L L L L L L L L       - - - - - - 0 0 0 0 0 0 - - - - - - 3 3 3 3 3 3 M M M M M M	3 3 3 3 3S 3 3 3(! ! ! ! !^ ! ! ! . . . . .^ . . .( D  D  D  D  D  D  D  DH  3 3S$~../   	 	 	 	 	, 	 	 	c c c c c c c c c cr   