
    Ng                         d dl mZ d dlmZmZmZmZmZ d dlZ	d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ  G d dee          Z G d de          ZdS )    )Enum)AnyDictListOptionalUnionN)CallbackManagerForRetrieverRun)Document)
Embeddings)BaseRetriever)
get_fields)
ConfigDict)maximal_marginal_relevancec                       e Zd ZdZdZdZdS )
SearchTypez-Enumerator of the types of search to perform.
similaritymmrN)__name__
__module____qualname____doc__r   r        c/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/retrievers/docarray.pyr   r      s        77J
CCCr   r   c            
       ~   e Zd ZU dZdZeed<   eed<   eed<   eed<   e	j
        Ze	ed<   dZeed	<   dZee         ed
<    ed          Zdededee         fdZdej        d	edeeeeef         ef                  fdZdej        dee         fdZdej        dee         fdZdeeeef         ef         defdZdS )DocArrayRetrievera  `DocArray Document Indices` retriever.

    Currently, it supports 5 backends:
    InMemoryExactNNIndex, HnswDocumentIndex, QdrantDocumentIndex,
    ElasticDocIndex, and WeaviateDocumentIndex.

    Args:
        index: One of the above-mentioned index instances
        embeddings: Embedding model to represent text as vectors
        search_field: Field to consider for searching in the documents.
            Should be an embedding/vector/tensor.
        content_field: Field that represents the main content in your document schema.
            Will be used as a `page_content`. Everything else will go into `metadata`.
        search_type: Type of search to perform (similarity / mmr)
        filters: Filters applied for document retrieval.
        top_k: Number of documents to return
    Nindex
embeddingssearch_fieldcontent_fieldsearch_type   top_kfiltersT)arbitrary_types_allowedqueryrun_managerreturnc                :   t          j        | j                            |                    }| j        t
          j        k    r|                     |          }nC| j        t
          j        k    r| 	                    |          }nt          d| j         d          |S )zGet documents relevant for a query.

        Args:
            query: string to find relevant documents for

        Returns:
            List of relevant documents
        zSearch type z5 does not exist. Choose either 'similarity' or 'mmr'.)nparrayr   embed_queryr!   r   r   _similarity_searchr   _mmr_search
ValueError)selfr&   r'   	query_embresultss        r   _get_relevant_documentsz)DocArrayRetriever._get_relevant_documents5   s     HT_88??@@	z444--i88GG//&&y11GG8t/ 8 8 8  
 r   r1   c                 $   ddl m}m} i }| j        }t	          | j        |          r| j        |d<   d}n*t	          | j        |          r| j        |d<   n
| j        |d<   | j        r | j                                                            ||          j	        di |
                    |          }| j                            |          }t          |d	          r|j        }|d
|         }n"| j                            |||          j        }|S )a  
        Perform a search using the query embedding and return top_k documents.

        Args:
            query_emb: Query represented as an embedding
            top_k: Number of documents to return

        Returns:
            A list of top_k documents matching the query
        r   )ElasticDocIndexWeaviateDocumentIndexwhere_filter r&   filter_query)r&   r   )limit	documentsN)r&   r   r:   r   )docarray.indexr5   r6   r   
isinstancer   r$   build_queryfindfilterbuildexecute_queryhasattrr;   )	r0   r1   r#   r5   r6   filter_argsr   r&   docss	            r   _searchzDocArrayRetriever._searchQ   sZ    	JIIIIIII(dj"788 	7*.,K'LL
O44 	7#'<K  *.,K'< 	
&&((#,    	' ' &	' '
 U##  :++E22Dt[)) &~<DD:??l% #    r   c                 \                           | j                  } fd|D             }|S )z
        Perform a similarity search.

        Args:
            query_emb: Query represented as an embedding

        Returns:
            A list of documents most similar to the query
        r1   r#   c                 :    g | ]}                     |          S r   _docarray_to_langchain_doc.0docr0   s     r   
<listcomp>z8DocArrayRetriever._similarity_search.<locals>.<listcomp>   s'    HHHC422377HHHr   )rF   r#   )r0   r1   rE   r2   s   `   r   r-   z$DocArrayRetriever._similarity_search   s:     ||itz|BBHHHH4HHHr   c                                            |d          t          | fdD              j                  } fd|D             }|S )z
        Perform a maximal marginal relevance (mmr) search.

        Args:
            query_emb: Query represented as an embedding

        Returns:
            A list of diverse documents related to the query
           rH   c                 ~    g | ]9}t          |t                    r|j                 nt          |j                  :S r   )r=   dictr   getattrrL   s     r   rO   z1DocArrayRetriever._mmr_search.<locals>.<listcomp>   sW         c4((5D%&&S$"344  r   )kc                 F    g | ]}                     |                   S r   rJ   )rM   idxrE   r0   s     r   rO   z1DocArrayRetriever._mmr_search.<locals>.<listcomp>   s+    VVV#42249==VVVr   )rF   r   r#   )r0   r1   mmr_selectedr2   rE   s   `   @r   r.   zDocArrayRetriever._mmr_search   s     ||ir|::1     	   j	
 	
 	
 WVVVVVVVr   rN   c                     t          |t                    r|                                nt          |          }| j        |vrt          d| j         d          t          t          |t                    r|| j                 nt          || j                            }|D ]l}t          |t                    r||         nt          ||          }t          |t          t          t          t          f          r|| j        k    r
||j        |<   m|S )a;  
        Convert a DocArray document (which also might be a dict)
        to a langchain document format.

        DocArray document can contain arbitrary fields, so the mapping is done
        in the following way:

        page_content <-> content_field
        metadata <-> all other fields excluding
            tensors and embeddings (so float, int, string)

        Args:
            doc: DocArray document

        Returns:
            Document in langchain format

        Raises:
            ValueError: If the document doesn't contain the content field
        z.Document does not contain the content field - .)page_content)r=   rS   keysr   r    r/   r
   rT   strintfloatboolmetadata)r0   rN   fieldslc_docnamevalues         r   rK   z,DocArrayRetriever._docarray_to_langchain_doc   s   ,  *#t44I*S//V++VASVVV   #t$$2T/00d011
 
 
  	. 	.D!+C!6!6NCIIGC<N<NE53UD"9::.D...(-%r   )r   r   r   r   r   r   __annotations__r   r]   r   r   r!   r#   r^   r$   r   r   model_configr	   r   r
   r3   r*   ndarrayr   r   rF   r-   r.   rK   r   r   r   r   r      s         $ E3(3K333E3NNN!GXc]!!!: $  L 4	
 
h   8,,,/,	eDcNC'(	), , , ,\BJ 4>    RZ DN    2*eDcNC4G.H *X * * * * * *r   r   )enumr   typingr   r   r   r   r   numpyr*   langchain_core.callbacksr	   langchain_core.documentsr
   langchain_core.embeddingsr   langchain_core.retrieversr   langchain_core.utils.pydanticr   pydanticr   &langchain_community.vectorstores.utilsr   r]   r   r   r   r   r   <module>rs      s5         3 3 3 3 3 3 3 3 3 3 3 3 3 3     C C C C C C - - - - - - 0 0 0 0 0 0 3 3 3 3 3 3 4 4 4 4 4 4       M M M M M M    d   z z z z z z z z z zr   