
    NgQ                        d dl mZ d dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
mZmZmZmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ d dlmZ ddZ ddZ! G d de          Z"dS )    )annotationsN)Path)AnyCallableDictIterableListOptionalTuple)Document)
Embeddingsguard_import)VectorStore)AddableMixinDocstore)InMemoryDocstore)DistanceStrategyx
np.ndarrayreturnc                x    | t          j        t           j                            | dd          dd          z  } | S )z!Normalize vectors to unit length.T)axiskeepdimsg-q=N)npcliplinalgnorm)r   s    b/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/vectorstores/scann.py	normalizer!      s3    T::E4	H	HHAH    r   c                      t          d          S )z=
    Import `scann` if available, otherwise raise error.
    scannr    r"   r    dependable_scann_importr&      s        r"   c                  B   e Zd ZdZddej        dfdFdZ	 	 dGdHd Z	 	 dGdId!Z	 	 dGdJd$Z	dKdLd&Z
	 	 	 dMdNd0Z	 	 	 dMdOd3Z	 	 	 dMdPd5Z	 	 	 dMdQd6Ze	 	 	 dRdSd8            Ze	 	 dGdTd9            Ze	 	 dGdUd;            ZdVdWd?Ze	 dVdd@dXdB            ZdYdDZ	 	 	 dMdOdEZdS )ZScaNNa  `ScaNN` vector store.

    To use, you should have the ``scann`` python package installed.

    Example:
        .. code-block:: python

            from langchain_community.embeddings import HuggingFaceEmbeddings
            from langchain_community.vectorstores import ScaNN

            model_name = "sentence-transformers/all-mpnet-base-v2"
            db = ScaNN.from_texts(
                ['foo', 'bar', 'barz', 'qux'],
                HuggingFaceEmbeddings(model_name=model_name))
            db.similarity_search('foo?', k=1)
    NF	embeddingr   indexr   docstorer   index_to_docstore_idDict[int, str]relevance_score_fn"Optional[Callable[[float], float]]normalize_L2booldistance_strategyr   scann_configOptional[str]c	                v    || _         || _        || _        || _        || _        || _        || _        || _        dS )z%Initialize with necessary components.N)r)   r*   r+   r,   r2   override_relevance_score_fn_normalize_L2_scann_config)	selfr)   r*   r+   r,   r.   r0   r2   r3   s	            r    __init__zScaNN.__init__3   sG     #
 $8!!2+=())r"   textsIterable[str]
embeddingsIterable[List[float]]	metadatasOptional[List[dict]]idsOptional[List[str]]kwargsr   	List[str]c                    t          | j        t                    st          d| j         d          t	          d          )NSIf trying to add texts, the underlying docstore should support adding items, which 	 does notz(Updates are not available in ScaNN, yet.)
isinstancer+   r   
ValueErrorNotImplementedError)r9   r;   r=   r?   rA   rC   s         r    __addzScaNN.__addH   sX     $-66 	@'+}@ @ @   ""LMMMr"   c                t    | j                             t          |                    } | j        ||f||d|S )al  Run more texts through the embeddings and add to the vectorstore.

        Args:
            texts: Iterable of strings to add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
            ids: Optional list of unique IDs.

        Returns:
            List of ids from adding the texts into the vectorstore.
        r?   rA   )r)   embed_documentslist_ScaNN__add)r9   r;   r?   rA   rC   r=   s         r    	add_textszScaNN.add_textsW   sB    $ ^33DKK@@
tz%TycTTVTTTr"   text_embeddings!Iterable[Tuple[str, List[float]]]c                    t          | j        t                    st          d| j         d          t	          | \  }} | j        ||f||d|S )a  Run more texts through the embeddings and add to the vectorstore.

        Args:
            text_embeddings: Iterable pairs of string and embedding to
                add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
            ids: Optional list of unique IDs.

        Returns:
            List of ids from adding the texts into the vectorstore.
        rF   rG   rM   )rH   r+   r   rI   ziprP   )r9   rR   r?   rA   rC   r;   r=   s          r    add_embeddingszScaNN.add_embeddingsl   s{    $ $-66 	@'+}@ @ @  
  1ztz%TycTTVTTTr"   Optional[bool]c                     t          d          )a3  Delete by vector ID or other criteria.

        Args:
            ids: List of ids to delete.
            **kwargs: Other keyword arguments that subclasses might use.

        Returns:
            Optional[bool]: True if deletion is successful,
            False otherwise, None if not implemented.
        z*Deletions are not available in ScaNN, yet.)rJ   )r9   rA   rC   s      r    deletezScaNN.delete   s     ""NOOOr"         List[float]kintfilterOptional[Dict[str, Any]]fetch_kList[Tuple[Document, float]]c                   t          j        |gt           j                  }| j        rt	          |          }| j                            |||n|          \  }}g }	t          |d                   D ]\  }
}|dk    r| j        |         }| j	        
                    |          t          t                    st          d| d           |od |                                D             }t          fd|                                D                       r#|	                    |d         |
         f           |	                    |d         |
         f           |                    d	          F| j        t&          j        t&          j        fv rt,          j        nt,          j        fd
|	D             }	|	d|         S )a  Return docs most similar to query.

        Args:
            embedding: Embedding vector to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter (Optional[Dict[str, Any]]): Filter by metadata. Defaults to None.
            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.
            **kwargs: kwargs to be passed to similarity search. Can include:
                score_threshold: Optional, a floating point value between 0 to 1 to
                    filter the resulting set of retrieved docs

        Returns:
            List of documents most similar to the query text and L2 distance
            in float for each. Lower score represents more similarity.
        dtypeNr   r   zCould not find document for id z, got c                J    i | ] \  }}|t          |t                    s|gn|!S r%   )rH   rO   ).0keyvalues      r    
<dictcomp>z@ScaNN.similarity_search_with_score_by_vector.<locals>.<dictcomp>   sC       "U 
5$(?(?J%U  r"   c              3  V   K   | ]#\  }}j                             |          |v V  $d S N)metadataget)rg   rh   ri   docs      r    	<genexpr>z?ScaNN.similarity_search_with_score_by_vector.<locals>.<genexpr>   s<      WW*#us|'',,5WWWWWWr"   score_thresholdc                6    g | ]\  }} |          ||fS r%   r%   )rg   ro   
similaritycmprq   s      r    
<listcomp>z@ScaNN.similarity_search_with_score_by_vector.<locals>.<listcomp>   sD       #C3z?33j!  r"   )r   arrayfloat32r7   r!   r*   search_batched	enumerater,   r+   searchrH   r   rI   itemsallappendrn   r2   r   MAX_INNER_PRODUCTJACCARDoperatorgele)r9   r)   r]   r_   ra   rC   vectorindicesscoresdocsji_idrt   ro   rq   s                @@@r    &similarity_search_with_score_by_vectorz,ScaNN.similarity_search_with_score_by_vector   s   0 9+RZ888 	'v&&F*33AAW
 
 gaj)) 	1 	1DAqBww+A.C-&&s++Cc8,, U !S3!S!Sc!S!STTT! &,llnn   WWWWWWWWW 5KKfQil 3444S&)A,/0000 **%677& )$68H8PQR R  [	     '+  D
 BQBxr"   querystrc                ^    | j                             |          } | j        ||f||d|}|S )a  Return docs most similar to query.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.

        Returns:
            List of documents most similar to the query text with
            L2 distance in float. Lower score represents more similarity.
        r_   ra   )r)   embed_queryr   )r9   r   r]   r_   ra   rC   r)   r   s           r    similarity_search_with_scorez"ScaNN.similarity_search_with_score   sV    * N..u55	:t:
 	
 

 
 
 r"   List[Document]c                >     | j         ||f||d|}d |D             S )a  Return docs most similar to embedding vector.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.

        Returns:
            List of Documents most similar to the embedding.
        r   c                    g | ]\  }}|S r%   r%   rg   ro   _s      r    ru   z5ScaNN.similarity_search_by_vector.<locals>.<listcomp>      222Q222r"   )r   )r9   r)   r]   r_   ra   rC   docs_and_scoress          r    similarity_search_by_vectorz!ScaNN.similarity_search_by_vector   sQ    ( F$E
 	
 

 
 
 32/2222r"   c                >     | j         ||f||d|}d |D             S )a  Return docs most similar to query.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter: (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.

        Returns:
            List of Documents most similar to the query.
        r   c                    g | ]\  }}|S r%   r%   r   s      r    ru   z+ScaNN.similarity_search.<locals>.<listcomp>)  r   r"   )r   )r9   r   r]   r_   ra   rC   r   s          r    similarity_searchzScaNN.similarity_search  sJ    ( <$;1
#W
 
8>
 
 32/2222r"   List[List[float]]c                `   t          d          }|                    dt          j                  }	|                    dd           }
t	          j        |t          j                  }|rt          |          }|
|j        	                    ||
          }n|	t          j
        k    rA|j                            |dd                                                                          }n@|j                            |dd                                                                          }g }|d |D             }t          |          D ]5\  }}|r||         ni }|                    t!          ||	                     6t#          t          |                    }t%          |          t%          |          k    r/t'          t%          |           d
t%          |           d          t)          t#          t+          |                                |                              } | ||||fd|i|S )Nr$   r2   r3   rd      dot_product
squared_l2c                N    g | ]"}t          t          j                              #S r%   )r   uuiduuid4)rg   r   s     r    ru   z ScaNN.__from.<locals>.<listcomp>Q  s&    4443tz||$$444r"   )page_contentrm   z ids provided for z, documents. Each document should have an id.r0   )r   rn   r   EUCLIDEAN_DISTANCEr   rv   rw   r!   scann_ops_pybindcreate_searcherr~   builderscore_brute_forcebuildry   r}   r   dictlen	Exceptionr   rU   values)clsr;   r=   r)   r?   rA   r0   rC   r$   r2   r3   r   r*   	documentsr   textrm   index_to_idr+   s                      r    __fromzScaNN.__from+  sG    W%%"JJ!1!D
 
 zz.$77*BJ777 	'v&&F#*::6<PPEE $4$FFF*2261mLL&&((UWW  *2261lKK&&((UWW 
 	;44e444C '' 	M 	MGAt'08y||bHX4(KKKLLLL9S>>**{s9~~--{## 4 4s9~~ 4 4 4  
 $D[-?-?-A-A9)M)M$N$NOOs	
 

 &
 
 
 	
r"   c                R    |                     |          } | j        |||f||d|S )aN  Construct ScaNN wrapper from raw documents.

        This is a user friendly interface that:
            1. Embeds documents.
            2. Creates an in memory docstore
            3. Initializes the ScaNN database

        This is intended to be a quick way to get started.

        Example:
            .. code-block:: python

                from langchain_community.vectorstores import ScaNN
                from langchain_community.embeddings import OpenAIEmbeddings
                embeddings = OpenAIEmbeddings()
                scann = ScaNN.from_texts(texts, embeddings)
        rM   )rN   _ScaNN__from)r   r;   r)   r?   rA   rC   r=   s          r    
from_textszScaNN.from_textsg  sR    4 ..u55
sz
  
 
 
 
 	
r"   List[Tuple[str, List[float]]]c                X    d |D             }d |D             } | j         |||f||d|S )a  Construct ScaNN wrapper from raw documents.

        This is a user friendly interface that:
            1. Embeds documents.
            2. Creates an in memory docstore
            3. Initializes the ScaNN database

        This is intended to be a quick way to get started.

        Example:
            .. code-block:: python

                from langchain_community.vectorstores import ScaNN
                from langchain_community.embeddings import OpenAIEmbeddings
                embeddings = OpenAIEmbeddings()
                text_embeddings = embeddings.embed_documents(texts)
                text_embedding_pairs = list(zip(texts, text_embeddings))
                scann = ScaNN.from_embeddings(text_embedding_pairs, embeddings)
        c                    g | ]
}|d          S )r   r%   rg   ts     r    ru   z)ScaNN.from_embeddings.<locals>.<listcomp>  s    ///!1///r"   c                    g | ]
}|d          S )r   r%   r   s     r    ru   z)ScaNN.from_embeddings.<locals>.<listcomp>  s    444qad444r"   rM   )r   )r   rR   r)   r?   rA   rC   r;   r=   s           r    from_embeddingszScaNN.from_embeddings  se    8 0////44O444
sz
  
 
 
 
 	
r"   folder_path
index_nameNonec                   t          |          }|d                    |          z  }|                    dd           | j                            t          |                     t          |d                    |          z  d          5 }t          j        | j	        | j
        f|           ddd           dS # 1 swxY w Y   dS )zSave ScaNN index, docstore, and index_to_docstore_id to disk.

        Args:
            folder_path: folder path to save index, docstore,
                and index_to_docstore_id to.
        {index_name}.scannr   Texist_okparents{index_name}.pklwbN)r   formatmkdirr*   	serializer   openpickledumpr+   r,   )r9   r   r   path
scann_pathfs         r    
save_localzScaNN.save_local  s    K  077:7NNN
$555 	
S__--- $+22j2III4PP 	GTUK(ABAFFF	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	Gs   "B==CC)allow_dangerous_deserializationr   c                  |st          d          t          |          }|d                    |          z  }|                    dd           t	          d          }|j                            t          |                    }	t          |d                    |          z  d          5 }
t          j
        |
          \  }}d	d	d	           n# 1 swxY w Y    | ||	||fi |S )
a  Load ScaNN index, docstore, and index_to_docstore_id from disk.

        Args:
            folder_path: folder path to load index, docstore,
                and index_to_docstore_id from.
            embedding: Embeddings to use when generating queries
            index_name: for saving with a specific index file name
            allow_dangerous_deserialization: whether to allow deserialization
                of the data which involves loading a pickle file.
                Pickle files can be modified by malicious actors to deliver a
                malicious payload that results in execution of
                arbitrary code on your machine.
        aB  The de-serialization relies loading a pickle file. Pickle files can be modified to deliver a malicious payload that results in execution of arbitrary code on your machine.You will need to set `allow_dangerous_deserialization` to `True` to enable deserialization. If you do this, make sure that you trust the source of the data. For example, if you are loading a file that you created, and know that no one else has modified the file, then this is safe to do. Do not set this to `True` if you are loading a file from an untrusted source (e.g., some random site on the internet.).r   r   Tr   r$   r   rbN)rI   r   r   r   r   r   load_searcherr   r   r   load)r   r   r)   r   r   rC   r   r   r$   r*   r   r+   r,   s                r    
load_localzScaNN.load_local  sL   . / 		"   K  077:7NNN
$555W%%&44S__EE $+22j2III4PP 	TU  $	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 s9eX/CNNvNNNs   .CCCCallable[[float], float]c                    | j         | j         S | j        t          j        k    r| j        S | j        t          j        k    r| j        S t          d          )a8  
        The 'correct' relevance function
        may differ depending on a few things, including:
        - the distance / similarity metric used by the VectorStore
        - the scale of your embeddings (OpenAI's are unit normed. Many others are not!)
        - embedding dimensionality
        - etc.
        NzJUnknown distance strategy, must be cosine, max_inner_product, or euclidean)r6   r2   r   r~   %_max_inner_product_relevance_score_fnr   _euclidean_relevance_score_fnrI   )r9   s    r    _select_relevance_score_fnz ScaNN._select_relevance_score_fn  se     +733 !%5%GGG==#'7'JJJ55   r"   c                   	 |                     dd          	|                                 t          d           | j        |f|||d|}fd|D             }		fd|D             }|S )z?Return docs and their similarity scores on a scale from 0 to 1.rq   NzLnormalize_score_fn must be provided to ScaNN constructor to normalize scores)r]   r_   ra   c                0    g | ]\  }}| |          fS r%   r%   )rg   ro   scorer.   s      r    ru   zBScaNN._similarity_search_with_relevance_scores.<locals>.<listcomp>,  s;     
 
 
1;eS$$U++,
 
 
r"   c                *    g | ]\  }}|k    ||fS r%   r%   )rg   ro   rs   rq   s      r    ru   zBScaNN._similarity_search_with_relevance_scores.<locals>.<listcomp>0  s7     # # ##C00 j!000r"   )popr   rI   r   )
r9   r   r]   r_   ra   rC   r   docs_and_rel_scoresr.   rq   s
           @@r    (_similarity_search_with_relevance_scoresz.ScaNN._similarity_search_with_relevance_scores  s     !**%6==!<<>>%9   <$;
	
 

 
 

 
 
 
?N
 
 
 &# # # #':# # #
 #"r"   )r)   r   r*   r   r+   r   r,   r-   r.   r/   r0   r1   r2   r   r3   r4   )NN)r;   r<   r=   r>   r?   r@   rA   rB   rC   r   r   rD   )
r;   r<   r?   r@   rA   rB   rC   r   r   rD   )
rR   rS   r?   r@   rA   rB   rC   r   r   rD   rl   )rA   rB   rC   r   r   rW   )rZ   Nr[   )r)   r\   r]   r^   r_   r`   ra   r^   rC   r   r   rb   )r   r   r]   r^   r_   r`   ra   r^   rC   r   r   rb   )r)   r\   r]   r^   r_   r`   ra   r^   rC   r   r   r   )r   r   r]   r^   r_   r`   ra   r^   rC   r   r   r   )NNF)r;   rD   r=   r   r)   r   r?   r@   rA   rB   r0   r1   rC   r   r   r(   )r;   rD   r)   r   r?   r@   rA   rB   rC   r   r   r(   )rR   r   r)   r   r?   r@   rA   rB   rC   r   r   r(   )r*   )r   r   r   r   r   r   )r   r   r)   r   r   r   r   r1   rC   r   r   r(   )r   r   )__name__
__module____qualname____doc__r   r   r:   rP   rQ   rV   rY   r   r   r   r   classmethodr   r   r   r   r   r   r   r%   r"   r    r(   r(   !   s        . BF".>.Q&** * * * *2 +/#'N N N N N$ +/#'	U U U U U0 +/#'	U U U U U8P P P P P" +/> > > > >F +/    D +/3 3 3 3 3@ +/3 3 3 3 32  +/#'"9
 9
 9
 9
 [9
v 
 +/#'!
 !
 !
 !
 [!
F 
 +/#'$
 $
 $
 $
 [$
LG G G G G$ 
 "	3O 163O 3O 3O 3O 3O [3Oj   8 +/"# "# "# "# "# "# "#r"   r(   )r   r   r   r   )r   r   )#
__future__r   r   r   r   pathlibr   typingr   r   r   r   r	   r
   r   numpyr   langchain_core.documentsr   langchain_core.embeddingsr   langchain_core.utilsr   langchain_core.vectorstoresr   !langchain_community.docstore.baser   r   &langchain_community.docstore.in_memoryr   &langchain_community.vectorstores.utilsr   r!   r&   r(   r%   r"   r    <module>r      sp   " " " " " "          G G G G G G G G G G G G G G G G G G     - - - - - - 0 0 0 0 0 0 - - - - - - 3 3 3 3 3 3 D D D D D D D D C C C C C C C C C C C C   ! ! ! !T# T# T# T# T#K T# T# T# T# T#r"   