
    Ng F                       d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZmZmZmZmZmZ d dlZd dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlm Z   e!g d          Z"dZ#ddZ$ G d de          Z%dS )    )annotationsN)ConfigParser)Path)AnyCallableDictIterableListOptionalTuple)Document)
Embeddingsguard_import)VectorStore)Docstore)InMemoryDocstore)maximal_marginal_relevance)angular	euclidean	manhattanhammingdotr   returnr   c                      t          d          S )z1Import annoy if available, otherwise raise error.annoyr        b/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/vectorstores/annoy.pydependable_annoy_importr       s       r   c                  <   e Zd ZdZdHdZedId            Z	 dJdKdZdLdZ	 dMdNd$Z		 dMdOd&Z
	 dMdPd(Z	 dMdQd*Z	 dMdRd+Z	 dMdSd,Z	 	 	 dTdUd2Z	 	 	 dTdVd3Zeded4dfdWd:            Zeded4dfdXd;            Zeded4dfdYd>            ZdZd[dDZed?dEd\dG            ZdS )]Annoya  `Annoy` vector store.

    To use, you should have the ``annoy`` python package installed.

    Example:
        .. code-block:: python

            from langchain_community.vectorstores import Annoy
            db = Annoy(embedding_function, index, docstore, index_to_docstore_id)

    embedding_functionr   indexr   metricstrdocstorer   index_to_docstore_idDict[int, str]c                L    || _         || _        || _        || _        || _        dS )z%Initialize with necessary components.N)r#   r$   r%   r'   r(   )selfr#   r$   r%   r'   r(   s         r   __init__zAnnoy.__init__*   s/     #5
 $8!!!r   r   Optional[Embeddings]c                    d S Nr   )r+   s    r   
embeddingszAnnoy.embeddings9   s	     tr   NtextsIterable[str]	metadatasOptional[List[dict]]kwargs	List[str]c                     t          d          )Nz=Annoy does not allow to add new data once the index is build.)NotImplementedError)r+   r1   r3   r5   s       r   	add_textszAnnoy.add_texts>   s     "K
 
 	
r   idxs	List[int]distsList[float]List[Tuple[Document, float]]c                   g }t          ||          D ]m\  }}| j        |         }| j                            |          }t	          |t
                    st          d| d|           |                    ||f           n|S )a  Turns annoy results into a list of documents and scores.

        Args:
            idxs: List of indices of the documents in the index.
            dists: List of distances of the documents in the index.
        Returns:
            List of Documents and scores.
        Could not find document for id , got )zipr(   r'   search
isinstancer   
ValueErrorappend)r+   r:   r<   docsidxdist_iddocs           r   process_index_resultszAnnoy.process_index_resultsH   s     T5)) 	% 	%IC+C0C-&&s++Cc8,, U !S3!S!Sc!S!STTTKKd$$$$r      	embeddingkintsearch_kc                p    | j                             |||d          \  }}|                     ||          S a}  Return docs most similar to query.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            search_k: inspect up to search_k nodes which defaults
                to n_trees * n if not provided
        Returns:
            List of Documents most similar to the query and score for each
        TrR   include_distances)r$   get_nns_by_vectorrL   )r+   rO   rP   rR   r:   r<   s         r   &similarity_search_with_score_by_vectorz,Annoy.similarity_search_with_score_by_vector\   sE     j22q8t 3 
 
e ))$666r   docstore_indexc                p    | j                             |||d          \  }}|                     ||          S rT   )r$   get_nns_by_itemrL   )r+   rY   rP   rR   r:   r<   s         r   %similarity_search_with_score_by_indexz+Annoy.similarity_search_with_score_by_indexn   sE     j00AD 1 
 
e ))$666r   queryc                ^    |                      |          }|                     |||          }|S )a~  Return docs most similar to query.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            search_k: inspect up to search_k nodes which defaults
                to n_trees * n if not provided

        Returns:
            List of Documents most similar to the query and score for each
        )r#   rX   )r+   r]   rP   rR   rO   rG   s         r   similarity_search_with_scorez"Annoy.similarity_search_with_score   s3     ++E22	::9aRRr   List[Document]c                H    |                      |||          }d |D             S )a  Return docs most similar to embedding vector.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            search_k: inspect up to search_k nodes which defaults
                to n_trees * n if not provided

        Returns:
            List of Documents most similar to the embedding.
        c                    g | ]\  }}|S r   r   .0rK   _s      r   
<listcomp>z5Annoy.similarity_search_by_vector.<locals>.<listcomp>       222Q222r   )rX   )r+   rO   rP   rR   r5   docs_and_scoress         r   similarity_search_by_vectorz!Annoy.similarity_search_by_vector   s6     EEq(
 
 32/2222r   c                H    |                      |||          }d |D             S )az  Return docs most similar to docstore_index.

        Args:
            docstore_index: Index of document in docstore
            k: Number of Documents to return. Defaults to 4.
            search_k: inspect up to search_k nodes which defaults
                to n_trees * n if not provided

        Returns:
            List of Documents most similar to the embedding.
        c                    g | ]\  }}|S r   r   rc   s      r   rf   z4Annoy.similarity_search_by_index.<locals>.<listcomp>   rg   r   )r\   )r+   rY   rP   rR   r5   rh   s         r   similarity_search_by_indexz Annoy.similarity_search_by_index   s6     DDAx
 
 32/2222r   c                H    |                      |||          }d |D             S )al  Return docs most similar to query.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            search_k: inspect up to search_k nodes which defaults
                to n_trees * n if not provided

        Returns:
            List of Documents most similar to the query.
        c                    g | ]\  }}|S r   r   rc   s      r   rf   z+Annoy.similarity_search.<locals>.<listcomp>   rg   r   )r_   )r+   r]   rP   rR   r5   rh   s         r   similarity_searchzAnnoy.similarity_search   s/     ;;E1hOO22/2222r            ?fetch_klambda_multfloatc                     j                             ||dd           fdD             }t          t          j        |gt          j                  |||          }fd|D             }g }	|D ]h}
 j        |
         } j                            |          }t          |t                    st          d| d	|           |	                    |           i|	S )
a  Return docs selected using the maximal marginal relevance.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            embedding: Embedding to look up documents similar to.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            k: Number of Documents to return. Defaults to 4.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.

        Returns:
            List of Documents selected by maximal marginal relevance.
        rN   FrU   c                D    g | ]}j                             |          S r   )r$   get_item_vector)rd   ir+   s     r   rf   zAAnnoy.max_marginal_relevance_search_by_vector.<locals>.<listcomp>   s)    BBBdj0033BBBr   )dtype)rP   rs   c                ,    g | ]}|d k    |         S )rN   r   )rd   rx   r:   s     r   rf   zAAnnoy.max_marginal_relevance_search_by_vector.<locals>.<listcomp>   s"    EEEQ"WWDGWWWr   r@   rA   )r$   rW   r   nparrayfloat32r(   r'   rC   rD   r   rE   rF   )r+   rO   rP   rr   rs   r5   r0   mmr_selectedselected_indicesrG   rx   rJ   rK   r:   s   `            @r   'max_marginal_relevance_search_by_vectorz-Annoy.max_marginal_relevance_search_by_vector   s   2 z++wu , 
 
 CBBBTBBB
1Hi[
333#	
 
 
 FEEE\EEE! 	 	A+A.C-&&s++Cc8,, U !S3!S!Sc!S!STTTKKr   c                b    |                      |          }|                     ||||          }|S )a  Return docs selected using the maximal marginal relevance.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
        Returns:
            List of Documents selected by maximal marginal relevance.
        )rs   )r#   r   )r+   r]   rP   rr   rs   r5   rO   rG   s           r   max_marginal_relevance_searchz#Annoy.max_marginal_relevance_search   sA    0 ++E22	;;q'{ < 
 
 r   d   r0   List[List[float]]r   treesn_jobsc                   |t           vr't          d| dt          t                                t          d          }	|st          d          t	          |d                   }
|	                    |
|          }t          |          D ]\  }}|                    ||           |                    ||           g }t          |          D ]5\  }}|r||         ni }|	                    t          ||                     6d	 t          t	          |                    D             t          fd
t          |          D                       } | |j        |||          S )NzUnsupported distance metric: z. Expected one of r   z/embeddings must be provided to build AnnoyIndexr   r%   )r   )page_contentmetadatac                P    i | ]#}|t          t          j                              $S r   )r&   uuiduuid4)rd   rx   s     r   
<dictcomp>z Annoy.__from.<locals>.<dictcomp>6  s(    KKKq#djll++KKKr   c                (    i | ]\  }}|         |S r   r   )rd   rx   rK   index_to_ids      r   r   z Annoy.__from.<locals>.<dictcomp>8  s#    DDDVQ[^SDDDr   )INDEX_METRICSrE   listr   len
AnnoyIndex	enumerateadd_itembuildrF   r   ranger   embed_query)clsr1   r0   rO   r3   r%   r   r   r5   r   fr$   rx   emb	documentstextr   r'   r   s                     @r   __fromzAnnoy.__from  s    &&=F = ='+M':':= =   W%% 	PNOOO
1  6 22
++ 	# 	#FAsNN1c""""E&)))	 '' 	M 	MGAt'08y||bHX4(KKKLLLLKKU3y>>5J5JKKK#DDDDy/C/CDDD
 
 s9(%;OOOr   c           	     T    |                     |          } | j        |||||||fi |S )a  Construct Annoy wrapper from raw documents.

        Args:
            texts: List of documents to index.
            embedding: Embedding function to use.
            metadatas: List of metadata dictionaries to associate with documents.
            metric: Metric to use for indexing. Defaults to "angular".
            trees: Number of trees to use for indexing. Defaults to 100.
            n_jobs: Number of jobs to use for indexing. Defaults to -1.

        This is a user friendly interface that:
            1. Embeds documents.
            2. Creates an in memory docstore
            3. Initializes the Annoy database

        This is intended to be a quick way to get started.

        Example:
            .. code-block:: python

                from langchain_community.vectorstores import Annoy
                from langchain_community.embeddings import OpenAIEmbeddings
                embeddings = OpenAIEmbeddings()
                index = Annoy.from_texts(texts, embeddings)
        )embed_documents_Annoy__from)	r   r1   rO   r3   r%   r   r   r5   r0   s	            r   
from_textszAnnoy.from_texts<  sJ    H ..u55
sz:y)VUF
 
NT
 
 	
r   text_embeddingsList[Tuple[str, List[float]]]c           	     Z    d |D             }d |D             }	 | j         ||	|||||fi |S )a  Construct Annoy wrapper from embeddings.

        Args:
            text_embeddings: List of tuples of (text, embedding)
            embedding: Embedding function to use.
            metadatas: List of metadata dictionaries to associate with documents.
            metric: Metric to use for indexing. Defaults to "angular".
            trees: Number of trees to use for indexing. Defaults to 100.
            n_jobs: Number of jobs to use for indexing. Defaults to -1

        This is a user friendly interface that:
            1. Creates an in memory docstore with provided embeddings
            2. Initializes the Annoy database

        This is intended to be a quick way to get started.

        Example:
            .. code-block:: python

                from langchain_community.vectorstores import Annoy
                from langchain_community.embeddings import OpenAIEmbeddings
                embeddings = OpenAIEmbeddings()
                text_embeddings = embeddings.embed_documents(texts)
                text_embedding_pairs = list(zip(texts, text_embeddings))
                db = Annoy.from_embeddings(text_embedding_pairs, embeddings)
        c                    g | ]
}|d          S )r   r   rd   ts     r   rf   z)Annoy.from_embeddings.<locals>.<listcomp>  s    ///!1///r   c                    g | ]
}|d          S )   r   r   s     r   rf   z)Annoy.from_embeddings.<locals>.<listcomp>  s    444qad444r   )r   )
r   r   rO   r3   r%   r   r   r5   r1   r0   s
             r   from_embeddingszAnnoy.from_embeddingse  s]    J 0////44O444
sz:y)VUF
 
NT
 
 	
r   Ffolder_pathprefaultboolNonec                   t          |          }t          j        |d           t                      }| j        j        | j        d|d<   | j                            t          |dz            |           t          |dz  d          5 }t          j        | j        | j        |f|           d	d	d	           d	S # 1 swxY w Y   d	S )
a  Save Annoy index, docstore, and index_to_docstore_id to disk.

        Args:
            folder_path: folder path to save index, docstore,
                and index_to_docstore_id to.
            prefault: Whether to pre-load the index into memory.
        T)exist_ok)r   r%   ANNOYindex.annoy)r   	index.pklwbN)r   osmakedirsr   r$   r   r%   saver&   openpickledumpr'   r(   )r+   r   r   pathconfig_objectfiles         r   
save_localzAnnoy.save_local  s    K  
D4(((($k"
 "
g 	
D=011HEEE$$d++ 	YtK(A=QSWXXX	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Ys   
#B::B>B>)allow_dangerous_deserializationr   c                  |st          d          t          |          }t          d          }t          |dz  d          5 }t	          j        |          \  }}}	ddd           n# 1 swxY w Y   t          |	d         d                   }
|	d         d         }|                    |
|	          }|                    t          |d
z                        | |j	        ||||          S )aR  Load Annoy index, docstore, and index_to_docstore_id to disk.

        Args:
            folder_path: folder path to load index, docstore,
                and index_to_docstore_id from.
            embeddings: Embeddings to use when generating queries.
            allow_dangerous_deserialization: whether to allow deserialization
                of the data which involves loading a pickle file.
                Pickle files can be modified by malicious actors to deliver a
                malicious payload that results in execution of
                arbitrary code on your machine.
        aB  The de-serialization relies loading a pickle file. Pickle files can be modified to deliver a malicious payload that results in execution of arbitrary code on your machine.You will need to set `allow_dangerous_deserialization` to `True` to enable deserialization. If you do this, make sure that you trust the source of the data. For example, if you are loading a file that you created, and know that no one else has modified the file, then this is safe to do. Do not set this to `True` if you are loading a file from an untrusted source (e.g., some random site on the internet.).r   r   rbNr   r   r%   r   r   )
rE   r   r   r   r   loadrQ   r   r&   r   )r   r   r0   r   r   r   r   r'   r(   r   r   r%   r$   s                r   
load_localzAnnoy.load_local  sJ   ( / 		"   K  W%%$$d++ 	t  	$	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 g&s+,,w'1  6 22

3tm+,,---s"E68=Q
 
 	
s   A((A,/A,)
r#   r   r$   r   r%   r&   r'   r   r(   r)   )r   r-   r/   )r1   r2   r3   r4   r5   r   r   r6   )r:   r;   r<   r=   r   r>   )rM   rN   )rO   r=   rP   rQ   rR   rQ   r   r>   )rY   rQ   rP   rQ   rR   rQ   r   r>   )r]   r&   rP   rQ   rR   rQ   r   r>   )
rO   r=   rP   rQ   rR   rQ   r5   r   r   r`   )
rY   rQ   rP   rQ   rR   rQ   r5   r   r   r`   )
r]   r&   rP   rQ   rR   rQ   r5   r   r   r`   )rM   rp   rq   )rO   r=   rP   rQ   rr   rQ   rs   rt   r5   r   r   r`   )r]   r&   rP   rQ   rr   rQ   rs   rt   r5   r   r   r`   )r1   r6   r0   r   rO   r   r3   r4   r%   r&   r   rQ   r   rQ   r5   r   r   r"   )r1   r6   rO   r   r3   r4   r%   r&   r   rQ   r   rQ   r5   r   r   r"   )r   r   rO   r   r3   r4   r%   r&   r   rQ   r   rQ   r5   r   r   r"   )F)r   r&   r   r   r   r   )r   r&   r0   r   r   r   r   r"   )__name__
__module____qualname____doc__r,   propertyr0   r9   rL   rX   r\   r_   ri   rl   ro   r   r   classmethodDEFAULT_METRICr   r   r   r   r   r   r   r   r"   r"      sw       
 
9 9 9 9    X +/
 
 
 
 
   * CE7 7 7 7 7& @B7 7 7 7 7& 79    & CE3 3 3 3 3( @B3 3 3 3 3( 793 3 3 3 3(  - - - - -d      <  +/$#P #P #P #P [#PJ 
 +/$&
 &
 &
 &
 [&
P 
 +/$)
 )
 )
 )
 [)
VY Y Y Y Y(  166
 6
 6
 6
 6
 [6
 6
 6
r   r"   )r   r   )&
__future__r   r   r   r   configparserr   pathlibr   typingr   r   r   r	   r
   r   r   numpyr{   langchain_core.documentsr   langchain_core.embeddingsr   langchain_core.utilsr   langchain_core.vectorstoresr   !langchain_community.docstore.baser   &langchain_community.docstore.in_memoryr   &langchain_community.vectorstores.utilsr   	frozensetr   r   r    r"   r   r   r   <module>r      s   " " " " " " 				   % % % % % %       G G G G G G G G G G G G G G G G G G     - - - - - - 0 0 0 0 0 0 - - - - - - 3 3 3 3 3 3 6 6 6 6 6 6 C C C C C C M M M M M M	QQQRR! ! ! !

 
 
 
 
K 
 
 
 
 
r   