
    Ng;                        d dl mZ d dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
mZmZ d dlZd dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ  ej        e          Z G d de          ZdS )    )annotationsN)deepcopy)Enum)AnyIterableListOptionalTuple)Document)
Embeddings)run_in_executor)VectorStore)maximal_marginal_relevancec                  >   e Zd ZdZ	 dBdCdZedDd            Z	 	 	 dEdFdZe	 	 	 	 	 	 	 dGdHd            Z	 G d d e
          Zd!ej        dfdId(Zd!ej        dfdJd*Zd!ej        dfdKd,Zd!ej        dfdLd-Z	 	 	 dMdd0dNd4Z	 	 	 dOdPd9ZdQd<ZdRd>ZdSdTd@Z	 dSdTdAZdS )URockseta  `Rockset` vector store.

    To use, you should have the `rockset` python package installed. Note that to use
    this, the collection being used must already exist in your Rockset instance.
    You must also ensure you use a Rockset ingest transformation to apply
    `VECTOR_ENFORCE` on the column being used to store `embedding_key` in the
    collection.
    See: https://rockset.com/blog/introducing-vector-search-on-rockset/ for more details

    Everything below assumes `commons` Rockset workspace.

    Example:
        .. code-block:: python

            from langchain_community.vectorstores import Rockset
            from langchain_community.embeddings.openai import OpenAIEmbeddings
            import rockset

            # Make sure you use the right host (region) for your Rockset instance
            # and APIKEY has both read-write access to your collection.

            rs = rockset.RocksetClient(host=rockset.Regions.use1a1, api_key="***")
            collection_name = "langchain_demo"
            embeddings = OpenAIEmbeddings()
            vectorstore = Rockset(rs, collection_name, embeddings,
                "description", "description_embedding")

    commonsclientr   
embeddingsr   collection_namestrtext_keyembedding_key	workspacec                Z   	 ddl m} n# t          $ r t          d          w xY wt          ||          st	          dt          |                     || _        || _        || _        || _	        || _
        || _        	 | j                            d           dS # t          $ r Y dS w xY w)aN  Initialize with Rockset client.
        Args:
            client: Rockset client object
            collection: Rockset collection to insert docs / query
            embeddings: Langchain Embeddings object to use to generate
                        embedding for given text.
            text_key: column in Rockset collection to use to store the text
            embedding_key: column in Rockset collection to use to store the embedding.
                           Note: We must apply `VECTOR_ENFORCE()` on this column via
                           Rockset ingest transformation.

        r   )RocksetClient]Could not import rockset client python package. Please install it with `pip install rockset`.z;client should be an instance of rockset.RocksetClient, got 	langchainN)rocksetr   ImportError
isinstance
ValueErrortype_client_collection_name_embeddings	_text_key_embedding_key
_workspaceset_applicationAttributeError)selfr   r   r   r   r   r   r   s           f/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/vectorstores/rocksetdb.py__init__zRockset.__init__1   s    *	------- 	 	 	@  	 &-00 	&F||& &  
  /%!+#	L((55555 	 	 	DD	s   	 # B 
B*)B*returnc                    | j         S N)r%   r+   s    r,   r   zRockset.embeddingsa   s        N    textsIterable[str]	metadatasOptional[List[dict]]idsOptional[List[str]]
batch_sizeintkwargs	List[str]c                   g }g }t          |          D ]\  }}	t          |          |k    r||                     |          z  }g }i }
|r(t          |          |k    rt          ||                   }
|rt          |          |k    r||         |
d<   |	|
| j        <   | j                            |	          |
| j        <   |                    |
           t          |          dk    r||                     |          z  }g }|S )a  Run more texts through the embeddings and add to the vectorstore

                Args:
            texts: Iterable of strings to add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
            ids: Optional list of ids to associate with the texts.
            batch_size: Send documents in batches to rockset.

        Returns:
            List of ids from adding the texts into the vectorstore.

        _idr   )		enumeratelen_write_documents_to_rocksetr   r&   r%   embed_queryr'   append)r+   r4   r6   r8   r:   r<   batch
stored_idsitextdocs              r,   	add_textszRockset.add_textse   s   ( 
 '' 	 	GAt5zzZ''d>>uEEE
C -S^^a//y|,, $s3xx!|| VE
"&C'+'7'C'CD'I'IC#$LLu::>>$::5AAAJEr2    	embeddingc
                    |
J d            |s
J d            |s
J d            |s
J d             | |||||          }|                     ||||	           |S )znCreate Rockset wrapper with existing texts.
        This is intended as a quicker way to get started.
        NzRockset Client cannot be NonezCollection name cannot be emptyzText key name cannot be emptyzEmbedding key cannot be empty)rJ   )clsr4   rL   r6   r   r   r   r   r8   r:   r<   r   s               r,   
from_textszRockset.from_texts   s    & !!#B!!!AA AAAA888888======#fi(MRR%C<<<r2   c                  "    e Zd ZdZdZdZddZdS )	Rockset.DistanceFunction
COSINE_SIMEUCLIDEAN_DISTDOT_PRODUCTr.   r   c                     | j         dk    rdS dS )NrS   ASCDESC)valuer1   s    r,   order_byz!Rockset.DistanceFunction.order_by   s    z---u6r2   N)r.   r   )__name__
__module____qualname__rR   rS   rT   rY    r2   r,   DistanceFunctionrQ      s7        !
)#	 	 	 	 	 	r2   r^      querykdistance_func	where_strOptional[str]List[Tuple[Document, float]]c                T     | j         | j                            |          |||fi |S )a  Perform a similarity search with Rockset

        Args:
            query (str): Text to look up documents similar to.
            distance_func (DistanceFunction): how to compute distance between two
                vectors in Rockset.
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.
            where_str (Optional[str], optional): Metadata filters supplied as a
                SQL `where` condition string. Defaults to None.
                eg. "price<=70.0 AND brand='Nintendo'"

            NOTE: Please do not let end-user to fill this and always be aware
                  of SQL injection.

        Returns:
            List[Tuple[Document, float]]: List of documents with their relevance score
        )1similarity_search_by_vector_with_relevance_scoresr%   rC   r+   r`   ra   rb   rc   r<   s         r,   'similarity_search_with_relevance_scoresz/Rockset.similarity_search_with_relevance_scores   sH    2 FtE((//	
 

 
 
 	
r2   List[Document]c                T     | j         | j                            |          |||fi |S )zaSame as `similarity_search_with_relevance_scores` but
        doesn't return the scores.
        )similarity_search_by_vectorr%   rC   rh   s         r,   similarity_searchzRockset.similarity_search   sG     0t/((//	
 

 
 
 	
r2   List[float]c                <     | j         ||||fi |}d |D             S )zZAccepts a query_embedding (vector), and returns documents with
        similar embeddings.c                    g | ]\  }}|S r]   r]   ).0rI   _s      r,   
<listcomp>z7Rockset.similarity_search_by_vector.<locals>.<listcomp>   s    222Q222r2   )rg   )r+   rL   ra   rb   rc   r<   docs_and_scoress          r,   rl   z#Rockset.similarity_search_by_vector   sE     Q$Pq-
 
6<
 
 32/2222r2   c           	     j   d}d|v r|d         }|                      |||||          }	 | j        j                            d|i          }n4# t          $ r'}	t
                              d|	           g cY d}	~	S d}	~	ww xY wg }
|j        D ](}i }t          |t                    s*J d
                    t          |                                |                                D ]\  }}|| j        k    rHt          |t                    s0J d
                    | j        t          |                                |}X|d	k    rBt          |t                    s*J d

                    t          |                                |}|dvr|||<   |
                    t#          ||          |f           *|
S )z|Accepts a query_embedding (vector), and returns documents with
        similar embeddings along with their relevance scores.Texclude_embeddingsr`   )sqlz$Exception when querying Rockset: %s
Nz;document should be of type `dict[str,Any]`. But found: `{}`zIpage content stored in column `{}` must be of type `str`. But found: `{}`distzDComputed distance between vectors must of type `float`. But found {})r?   _event_time_meta)page_contentmetadata)_build_query_sqlr#   Queriesr`   	Exceptionloggererrorresultsr    dictformatr"   itemsr&   r   floatrD   r   )r+   rL   ra   rb   rc   r<   rv   q_strquery_responseefinalResultdocumentr|   vr{   scores                   r,   rg   z9Rockset.similarity_search_by_vector_with_relevance_scores   s(    "6))!'(<!=%%}a4F
 
	!\177We<L7MMNN 	 	 	LL@!DDDIIIIII	 57&. 	 	HH$   LSSX   
 !(( $ $1&&%a-- 6 6*fT^T!WW556 6 6 $%LL&[[%a// & &'fT!WWoo& & & EE=== #$HQK|hGGGO    s   "A 
A=A82A=8A=         ?)rc   fetch_klambda_multr   c                    j                             |          }  j        |f||dd| fdD             }t          t	          j        |          |||          }	|	D ]}
|
         j         j        = fd|	D             S )a  Return docs selected using the maximal marginal relevance.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            distance_func (DistanceFunction): how to compute distance between two
                vectors in Rockset.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
            where_str: where clause for the sql query
        Returns:
            List of Documents selected by maximal marginal relevance.
        F)ra   rc   rv   c                4    g | ]}|j         j                 S r]   )r|   r'   )rq   rI   r+   s     r,   rs   z9Rockset.max_marginal_relevance_search.<locals>.<listcomp>S  s#    PPPCcl4#67PPPr2   )r   ra   c                     g | ]
}|         S r]   r]   )rq   rG   initial_docss     r,   rs   z9Rockset.max_marginal_relevance_search.<locals>.<listcomp>a  s    :::AQ:::r2   )r%   rC   rl   r   nparrayr|   r'   )r+   r`   ra   r   r   rc   r<   query_embeddingr   selected_indicesrG   r   s   `          @r,   max_marginal_relevance_searchz%Rockset.max_marginal_relevance_search-  s    : *66u==7t7
$	
 

 
 
 QPPP<PPP
5H_%%#	
 
 
 " 	> 	>AQ()<==::::)9::::r2   Tr   rv   boolc                0   d                     t          t          |                    }|j         d| j         d| d}|rd| dnd}|rd| j         d	nd}d
| d| d| j         d| j         d| d|                                 dt          |           dS )zABuilds Rockset SQL query to query similar vectors to query_vector,(z, [z
]) as distzWHERE 
rK   z EXCEPT(z),zSELECT * z
FROM .zORDER BY dist z
LIMIT )joinmapr   rX   r'   r(   r$   rY   )	r+   r   rb   ra   rc   rv   q_embedding_strdistance_strselect_embeddings	            r,   r}   zRockset._build_query_sqle  s    ((3sO#<#<==)/    $2E         .7?*Y****R	2DM.t*....# 		 ( 
o -    %%''	 
 1vv   	r2   rE   
List[dict]c                |    | j         j                            | j        || j                  }d |j        D             S )N
collectiondatar   c                    g | ]	}|j         
S r]   )r?   )rq   
doc_statuss     r,   rs   z7Rockset._write_documents_to_rockset.<locals>.<listcomp>  s    BBB:
BBBr2   )r#   	Documentsadd_documentsr$   r(   r   )r+   rE   add_doc_ress      r,   rB   z#Rockset._write_documents_to_rockset~  sG    l,::,5DO ; 
 
 CB1ABBBBr2   Nonec                    	 ddl m n# t          $ r t          d          w xY w| j        j                            | j        fd|D             | j                   dS )z1Delete a list of docs from the Rockset collectionr   )DeleteDocumentsRequestDatar   c                (    g | ]} |           S ))idr]   )rq   rG   r   s     r,   rs   z(Rockset.delete_texts.<locals>.<listcomp>  s(    @@@q,,222@@@r2   r   N)rockset.modelsr   r   r#   r   delete_documentsr$   r(   )r+   r8   r   s     @r,   delete_textszRockset.delete_texts  s    	AAAAAAA 	 	 	@  	 	//,@@@@C@@@o 	0 	
 	
 	
 	
 	
s   
 $Optional[bool]c                    	 |g }|                      |           n3# t          $ r&}t                              d|           Y d }~dS d }~ww xY wdS )Nz.Exception when deleting docs from Rockset: %s
FT)r   r   r   r   )r+   r8   r<   r   s       r,   deletezRockset.delete  sp    	{c"""" 	 	 	LLJANNN55555	 ts    
AAAc                :   K   t          d | j        |fi | d {V S r0   )r   r   )r+   r8   r<   s      r,   adeletezRockset.adelete  s6       %T4;FFvFFFFFFFFFr2   )r   )r   r   r   r   r   r   r   r   r   r   r   r   )r.   r   )NNr3   )r4   r5   r6   r7   r8   r9   r:   r;   r<   r   r.   r=   )NNrK   rK   rK   Nr3   )r4   r=   rL   r   r6   r7   r   r   r   r   r   r   r   r   r8   r9   r:   r;   r<   r   r.   r   )r`   r   ra   r;   rb   r^   rc   rd   r<   r   r.   re   )r`   r   ra   r;   rb   r^   rc   rd   r<   r   r.   rj   )rL   rn   ra   r;   rb   r^   rc   rd   r<   r   r.   rj   )rL   rn   ra   r;   rb   r^   rc   rd   r<   r   r.   re   )r_   r   r   )r`   r   ra   r;   r   r;   r   r   rc   rd   r<   r   r.   rj   )r_   NT)r   rn   rb   r^   ra   r;   rc   rd   rv   r   r.   r   )rE   r   r.   r=   )r8   r=   r.   r   r0   )r8   r9   r<   r   r.   r   )rZ   r[   r\   __doc__r-   propertyr   rJ   classmethodrO   r   r^   rR   ri   rm   rl   rg   r   r}   rB   r   r   r   r]   r2   r,   r   r      sI        H #. . . . .`       X  +/#'& & & & &P 
 +/!#'    [8	 	 	 	 	4 	 	 	 *:*E#'
 
 
 
 
H *:*E#'
 
 
 
 
, *:*E#'3 3 3 3 3& *:*E#'2 2 2 2 2n  4; $(4; 4; 4; 4; 4; 4;x #'#'    2C C C C
 
 
 
 	 	 	 	 	 *.G G G G G G Gr2   r   )
__future__r   loggingcopyr   enumr   typingr   r   r   r	   r
   numpyr   langchain_core.documentsr   langchain_core.embeddingsr   langchain_core.runnablesr   langchain_core.vectorstoresr   &langchain_community.vectorstores.utilsr   	getLoggerrZ   r   r   r]   r2   r,   <module>r      s.   " " " " " "              7 7 7 7 7 7 7 7 7 7 7 7 7 7     - - - - - - 0 0 0 0 0 0 4 4 4 4 4 4 3 3 3 3 3 3 M M M M M M		8	$	$OG OG OG OG OGk OG OG OG OG OGr2   