
    Ngm0                       d dl mZ d dlZd dlmZ d dlmZmZmZm	Z	m
Z
mZmZmZmZ d dlmZ d dlmZ erd dlmZ d dlmZ  G d	 d
ee          Z edeeef                   Z ej        e          ZdZ G d de          ZdS )    )annotationsN)Enum)	TYPE_CHECKINGAnyDict	GeneratorIterableListOptionalTypeVarUnion)Document)VectorStore)
Embeddings)
Collectionc                  "    e Zd ZdZdZ	 dZ	 dZdS )DocumentDBSimilarityTypez)DocumentDB Similarity Type as enumerator.cosine
dotProduct	euclideanN)__name__
__module____qualname____doc__COSDOTEUC     g/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/vectorstores/documentdb.pyr   r      s+        33
C
C
Cr   r   DocumentDBDocumentType)bound   c                      e Zd ZdZdddddGdZedHd            ZdIdZedJd            Z	dKdZ
dLdZdej        ddfdMd%Z	 dNdOd,ZdPd/Ze	 	 dQdRd2            ZdNdSd6ZdNdTd9Z	 	 	 dUdVdCZ	 	 dWd&dDdXdFZd&S )YDocumentDBVectorSearcha  `Amazon DocumentDB (with MongoDB compatibility)` vector store.
    Please refer to the official Vector Search documentation for more details:
    https://docs.aws.amazon.com/documentdb/latest/developerguide/vector-search.html

    To use, you should have both:
    - the ``pymongo`` python package installed
    - a connection string and credentials associated with a DocumentDB cluster

    Example:
        . code-block:: python

            from langchain_community.vectorstores import DocumentDBVectorSearch
            from langchain_community.embeddings.openai import OpenAIEmbeddings
            from pymongo import MongoClient

            mongo_client = MongoClient("<YOUR-CONNECTION-STRING>")
            collection = mongo_client["<db_name>"]["<collection_name>"]
            embeddings = OpenAIEmbeddings()
            vectorstore = DocumentDBVectorSearch(collection, embeddings)
    vectorSearchIndextextContentvectorContent)
index_nametext_keyembedding_key
collection"Collection[DocumentDBDocumentType]	embeddingr   r)   strr*   r+   c               n    || _         || _        || _        || _        || _        t
          j        | _        dS )a  Constructor for DocumentDBVectorSearch

        Args:
            collection: MongoDB collection to add the texts to.
            embedding: Text embedding model to use.
            index_name: Name of the Vector Search index.
            text_key: MongoDB field that will contain the text
                for each document.
            embedding_key: MongoDB field that will contain the embedding
                for each document.
        N)_collection
_embedding_index_name	_text_key_embedding_keyr   r   _similarity_type)selfr,   r.   r)   r*   r+   s         r    __init__zDocumentDBVectorSearch.__init__B   s;    ( &#%!+ 8 <r   returnc                    | j         S N)r2   r7   s    r    
embeddingsz!DocumentDBVectorSearch.embeddings]   s
    r   c                    | j         S )zUReturns the index name

        Returns:
            Returns the index name

        )r3   r<   s    r    get_index_namez%DocumentDBVectorSearch.get_index_namea   s     r   connection_string	namespacekwargsr   c                    	 ddl m} n# t          $ r t          d          w xY w ||          }|                    d          \  }}||         |         }	 | |	|fi |S )a  Creates an Instance of DocumentDBVectorSearch from a Connection String

        Args:
            connection_string: The DocumentDB cluster endpoint connection string
            namespace: The namespace (database.collection)
            embedding: The embedding utility
            **kwargs: Dynamic keyword arguments

        Returns:
            an instance of the vector store

        r   )MongoClientzGCould not import pymongo, please install it with `pip install pymongo`..)pymongorD   ImportErrorsplit)
clsr@   rA   r.   rB   rD   clientdb_namecollection_namer,   s
             r    from_connection_stringz-DocumentDBVectorSearch.from_connection_stringj   s    (	+++++++ 	 	 	)  	
 *k*;<<#,??3#7#7 G__5
s:y33F333s   	 #boolc                    | j                                         }| j        }|D ] }|                    d          }||k    r dS !dS )zVerifies if the specified index name during instance
            construction exists on the collection

        Returns:
          Returns True on success and False if no such index exists
            on the collection
        nameTF)r1   list_indexesr3   pop)r7   cursorr)   rescurrent_index_names        r    index_existsz#DocumentDBVectorSearch.index_exists   s]     !..00%
 	 	C!$!Z//tt 0 ur   Nonec                p    |                                  r!| j                            | j                   dS dS )zEDeletes the index specified during instance construction if it existsN)rV   r1   
drop_indexr3   r<   s    r    delete_indexz#DocumentDBVectorSearch.delete_index   s@     	:''(899999	: 	:r   i      @   
dimensionsint
similarityr   mef_constructiondict[str, Any]c           	         || _         | j        j        | j        | j        did||||ddgd}| j        j        }|                    |          }|S )a  Creates an index using the index name specified at
            instance construction

        Args:
            dimensions: Number of dimensions for vector similarity.
                The maximum number of supported dimensions is 2000

            similarity: Similarity algorithm to use with the HNSW index.
                 Possible options are:
                    - DocumentDBSimilarityType.COS (cosine distance),
                    - DocumentDBSimilarityType.EUC (Euclidean distance), and
                    - DocumentDBSimilarityType.DOT (dot product).

            m: Specifies the max number of connections for an HNSW index.
                Large impact on memory consumption.

            ef_construction: Specifies the size of the dynamic candidate list
                for constructing the graph for HNSW index. Higher values lead
                to more accurate results but slower indexing speed.


        Returns:
            An object describing the created index

        vectorhnsw)typer_   r]   r`   efConstruction)rP   keyvectorOptions)createIndexesindexes)r6   r1   rP   r3   r5   databasecommand)r7   r]   r_   r`   ra   create_index_commandscurrent_databasecreate_index_responsess           r    create_indexz#DocumentDBVectorSearch.create_index   s    @ !+ "-2 !, /: &&0&0*9& &
 
!
 !
$  +4 2B1I1I!2
 2
 &%r   NtextsIterable[str]	metadatasOptional[List[Dict[str, Any]]]r
   c                   |                     dt                    }|pd |D             }g }g }g }t          t          ||                    D ]k\  }	\  }
}|                    |
           |                    |           |	dz   |z  dk    r-|                    |                     ||                     g }g }l|r)|                    |                     ||                     |S )N
batch_sizec              3     K   | ]}i V  d S r;   r   ).0_s     r    	<genexpr>z3DocumentDBVectorSearch.add_texts.<locals>.<genexpr>   s"      :M:M!2:M:M:M:M:M:Mr      r   )getDEFAULT_INSERT_BATCH_SIZE	enumeratezipappendextend_insert_texts)r7   rr   rt   rB   rw   
_metadatastexts_batchmetadatas_batch
result_idsitextmetadatas               r    	add_textsz DocumentDBVectorSearch.add_texts   s	    ZZ.GHH
-6-M:M:Mu:M:M:M

#,S
-C-C#D#D 	% 	%Aht$$$""8,,,A#q((!!$"4"4[/"R"RSSS "$ 	Pd00oNNOOOr   	List[str]List[Dict[str, Any]]c                     |sg S  j                             |          } fdt          |||          D             } j                            |          }|j        S )zUsed to Load Documents into the collection

        Args:
            texts: The list of documents strings to load
            metadatas: The list of metadata objects associated with each document

        Returns:

        c                <    g | ]\  }}}j         |j        |i|S r   )r4   r5   )ry   tr`   r.   r7   s       r    
<listcomp>z8DocumentDBVectorSearch._insert_texts.<locals>.<listcomp>  sB     
 
 
1i ^Q 3YD!D
 
 
r   )r2   embed_documentsr   r1   insert_manyinserted_ids)r7   rr   rt   r=   	to_insertinsert_results   `     r    r   z$DocumentDBVectorSearch._insert_texts   s~      	I _44U;;

 
 
 
#&ui#D#D
 
 
	
 (44Y??))r   Optional[List[dict]],Optional[Collection[DocumentDBDocumentType]]c                j    |t          d           | ||fi |}|                    ||           |S )Nz*Must provide 'collection' named parameter.)rt   )
ValueErrorr   )rI   rr   r.   rt   r,   rB   vectorstores          r    
from_textsz!DocumentDBVectorSearch.from_texts  sQ     IJJJc*i::6::ey999r   idsOptional[List[str]]Optional[bool]c                \    |t          d          |D ]}|                     |           dS )Nz#No document ids provided to delete.T)r   delete_document_by_id)r7   r   rB   document_ids       r    deletezDocumentDBVectorSearch.delete  sA    ;BCCC 	4 	4K&&{3333tr   r   Optional[str]c                    	 ddl m} n"# t          $ r}t          d          |d}~ww xY w|t          d          | j                            d ||          i           dS )zjRemoves a Specific Document by Id

        Args:
            document_id: The document identifier
        r   )ObjectIdz>Unable to import bson, please install with `pip install bson`.Nz"No document id provided to delete._id)bson.objectidr   rG   r   r1   
delete_one)r7   r   r   es       r    r   z,DocumentDBVectorSearch.delete_document_by_id&  s    	....... 	 	 	P 	 ABBB##UHH[,A,A$BCCCCCs   	 
(#(   (   r=   List[float]k	ef_searchfilterOptional[Dict[str, Any]]List[Document]c           	         |si }d|idd|| j         | j        ||diig}| j                            |          }g }|D ]@}|                    | j                  }	|                    t          |	|                     A|S )a   Returns a list of documents.

        Args:
            embeddings: The query vector
            k: the number of documents to return
            ef_search: Specifies the size of the dynamic candidate list
                that HNSW index uses during search. A higher value of
                efSearch provides better recall at cost of speed.
            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
        Returns:
            A list of documents closest to the query vector
        z$matchz$searchvectorSearch)rd   pathr_   r   efSearch)page_contentr   )r5   r6   r1   	aggregaterR   r4   r   r   )
r7   r=   r   r   r   pipelinerS   docsrT   r   s
             r     _similarity_search_without_scorez7DocumentDBVectorSearch._similarity_search_without_score7  s    *  	Fv"", $ 3&*&;$-% %
*
 !++H55 	C 	CC774>**DKKdSAAABBBBr   )r   queryc                   | j                             |          }|                     ||||          }d |D             S )N)r=   r   r   r   c                    g | ]}|S r   r   )ry   docs     r    r   z<DocumentDBVectorSearch.similarity_search.<locals>.<listcomp>t  s    $$$$$$r   )r2   embed_queryr   )r7   r   r   r   r   rB   r=   r   s           r    similarity_searchz(DocumentDBVectorSearch.similarity_searchg  sR     _0077
44!Q)F 5 
 
 %$t$$$$r   )
r,   r-   r.   r   r)   r/   r*   r/   r+   r/   )r9   r   )r9   r/   )
r@   r/   rA   r/   r.   r   rB   r   r9   r%   )r9   rN   )r9   rW   )
r]   r^   r_   r   r`   r^   ra   r^   r9   rb   r;   )rr   rs   rt   ru   rB   r   r9   r
   )rr   r   rt   r   r9   r
   )NN)rr   r   r.   r   rt   r   r,   r   rB   r   r9   r%   )r   r   rB   r   r9   r   )r   r   r9   rW   )r   r   N)
r=   r   r   r^   r   r^   r   r   r9   r   )r   r   )r   r/   r   r^   r   r^   r   r   rB   r   r9   r   )r   r   r   r   r8   propertyr=   r?   classmethodrM   rV   rZ   r   r   rq   r   r   r   r   r   r   r   r   r   r    r%   r%   ,   s        4 .%,= = = = = =6    X        4 4 4 [4>   $: : : : /G/K!<& <& <& <& <&B 59    ,* * * *0 
 +/CG    [    D D D D D( +/. . . . .f 	% ,0% % % % % % % %r   r%   )
__future__r   loggingenumr   typingr   r   r   r   r	   r
   r   r   r   langchain_core.documentsr   langchain_core.vectorstoresr   langchain_core.embeddingsr   pymongo.collectionr   r/   r   r!   	getLoggerr   loggerr~   r%   r   r   r    <module>r      s   " " " " " "       
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 . - - - - - 3 3 3 3 3 3 .444444------    sD    !!9c3hPPP 		8	$	$ H% H% H% H% H%[ H% H% H% H% H%r   