
    NgW`                        d dl mZ d dlZd dlZd dlZd dlZd dlmZmZm	Z	m
Z
mZmZmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d	ZddZddZ G d de          ZdS )    )annotationsN)AnyCallableDictIterableListOptionalType)Document)
Embeddingsguard_import)VectorStore)maximal_marginal_relevance   returnr   c                      t          d          S )zImport lancedb package.lancedbr        d/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/vectorstores/lancedb.pyimport_lancedbr      s    	"""r   filterDict[str, str]strc                d    d                     d |                                 D                       S )z2Converts a dict filter to a LanceDB filter string.z AND c                $    g | ]\  }}| d | dS )z = ''r   ).0kvs      r   
<listcomp>z#to_lance_filter.<locals>.<listcomp>   s(    CCCDAqA1CCCr   )joinitems)r   s    r   to_lance_filterr%      s+    <<CCFLLNNCCCDDDr   c                  f   e Zd ZdZdddddddddddd	ddefd`dZdadbd%Zedcd&            Z	 	 ddded/Z		 dfdgd3Z
	 	 	 	 	 	 	 dhdid?ZdjdAZ	 	 dddkdCZ	 	 	 dldmdGZdndIZ	 	 	 dldodLZ	 	 	 dldodMZ	 	 dddpdNZ	 	 	 	 dqdrdQZ	 	 	 	 dsdtdWZ	 	 	 	 dsdudXZe	 	 	 	 	 	 	 	 	 	 	 	 dvdwd\            Z	 	 	 	 	 dxdyd_ZdS )zLanceDBay  `LanceDB` vector store.

    To use, you should have ``lancedb`` python package installed.
    You can install it with ``pip install lancedb``.

    Args:
        connection: LanceDB connection to use. If not provided, a new connection
                    will be created.
        embedding: Embedding to use for the vectorstore.
        vector_key: Key to use for the vector in the database. Defaults to ``vector``.
        id_key: Key to use for the id in the database. Defaults to ``id``.
        text_key: Key to use for the text in the database. Defaults to ``text``.
        table_name: Name of the table to use. Defaults to ``vectorstore``.
        api_key: API key to use for LanceDB cloud database.
        region: Region to use for LanceDB cloud database.
        mode: Mode to use for adding data to the table. Valid values are
              ``append`` and ``overwrite``. Defaults to ``overwrite``.



    Example:
        .. code-block:: python
            vectorstore = LanceDB(uri='/lancedb', embedding_function)
            vectorstore.add_texts(['text1', 'text2'])
            result = vectorstore.similarity_search('text1')
    Nz/tmp/lancedbvectoridtextvectorstore	overwritel2
connectionOptional[Any]	embeddingOptional[Embeddings]uriOptional[str]
vector_keyid_keytext_key
table_nameapi_keyregionmodetabledistancererankerrelevance_score_fn"Optional[Callable[[float], float]]limitintc                    t          d          }|| _        || _        || _        || _        |dk    r|pt          j        d          nd| _        |	| _        |
| _	        || _
        || _        || _        d| _        t          ||j        j                  r|| _        n|d| _        nt%          d          t          |t&                    r+| j        $|                    d          rt%          d          | j        t%          d          t          ||j        j                  r|| _        nt          |t&          |j        j        f          rt%          d	          | j        |                    |          | _        nt          |t&                    rk|                    d          r(|                    || j        | j        
          | _        n.|                    |          | _        t5          j        d           |r	 t          ||j        j        |j        j        j        f          sJ || _        tA          |d          r|j!        nd| _"        dS # tF          $ r t%          d          w xY w| $                    |d          | _        dS )z$Initialize with Lance DB vectorstorer    LANCE_API_KEYNz9`reranker` has to be a lancedb.rerankers.Reranker object.zdb://z&API key is required for LanceDB cloud.z#embedding object should be providedzs`connection` has to be a lancedb.db.LanceDBConnection object.                `lancedb.db.LanceTable` is deprecated.)r8   r9   z[api key provided with local uri.                            The data will be stored locallynameremote_tablezj`table` has to be a lancedb.db.LanceTable or 
                    lancedb.remote.table.RemoteTable object.T)set_default)%r   
_embedding_vector_key_id_key	_text_keyosgetenvr8   r9   r:   r<   override_relevance_score_fnr@   
_fts_index
isinstance	rerankersReranker	_reranker
ValueErrorr   
startswithdbLanceDBConnection_connection
LanceTableconnectwarningswarnremoter;   RemoteTable_tablehasattrrE   _table_nameAssertionError	get_table)selfr.   r0   r2   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r@   r   s                    r   __init__zLanceDB.__init__:   s   & y))#%!@G2w<")O"<"<SW	 +=(
h 1 :;; 	%DNN!DNNK   c3 	KDL$8~~g&& K !IJJJ?"BCCCj'*">?? 	)D
S'**?$@AA 	8  
 |##*??3#7#7  c3'' 
~~g.. 	+2??dk ,; , ,(( ,3??3+?+?( =   !GJ17>3G3ST     $")%"8"8LEJJn     "    @   ...FFDKKKs   AI I/Fresultsr   scoreboolr   c                     j         j        }d|v rdn	d|v rdnd |s) fdt          t                              D             S r,|r, fdt          t                              D             S d S d S )N	_distance_relevance_scorec                    g | ]T}t          j                 |                                         d          |                                                   US metadata)page_contentrn   r   rK   as_py)r   idxrf   rd   s     r   r"   z+LanceDB.results_to_docs.<locals>.<listcomp>   sn       
 	 !(!8!=!C!C!E!E$Z05;;==    r   c                    g | ]t}t          j                 |                                         d          |                                                            |                                         fuS rm   rp   )r   rr   rf   	score_colrd   s     r   r"   z+LanceDB.results_to_docs.<locals>.<listcomp>   s     	 	 	  %,T^%<S%A%G%G%I%I!(!4S!9!?!?!A!A   I&s+1133	 	 	r   )schemanamesrangelen)rd   rf   rg   columnsrt   s   ``  @r   results_to_docszLanceDB.results_to_docs   s    .&'!!#II7***IIIE    
 !W..     
	5 
		 	 	 	 	 	 !W..	 	 	 	
	 
	 
	 
	r   c                    | j         S )N)rH   rd   s    r   
embeddingszLanceDB.embeddings   s
    r   textsIterable[str]	metadatasOptional[List[dict]]idsOptional[List[str]]kwargs	List[str]c                8   g }|pd |D             }| j                             t          |                    }t          |          D ]S\  }}||         }	|r||         n	d||         i}
|                    | j        |	| j        ||         | j        |d|
i           T|                                 }|)| j	        
                    | j        |          }|| _        n9| j        |                    || j                   n|                    |           d| _        |S )a  Turn texts into embedding and add it to the database

        Args:
            texts: Iterable of strings to add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
            ids: Optional list of ids to associate with the texts.
            ids: Optional list of ids to associate with the texts.

        Returns:
            List of ids of the added texts.
        c                N    g | ]"}t          t          j                              #S r   r   uuiduuid4r   _s     r   r"   z%LanceDB.add_texts.<locals>.<listcomp>   s&    777Ac$*,,''777r   r)   rn   Ndata)r:   )rH   embed_documentslist	enumerateappendrI   rJ   rK   rc   rX   create_tablera   r_   r8   addr:   rO   )rd   r~   r   r   r   docsr}   rr   r*   r0   rn   tbls               r   	add_textszLanceDB.add_texts   s6   $ 777777_44T%[[AA
"5)) 
	 
	IC"3I)2Hy~~s3x8HHKK$iL#c(ND	    nn;"//0@t/LLCDKK|#49----
r   rE   rG   Optional[bool]c                    ||r|| _         | j         }n
|}n| j         }	 | j                            |          S # t          $ r Y dS w xY w)a  
        Fetches a table object from the database.

        Args:
            name (str, optional): The name of the table to fetch. Defaults to None
                                    and fetches current table object.
            set_default (bool, optional): Sets fetched table as the default table.
                                        Defaults to False.

        Returns:
            Any: The fetched table object.

        Raises:
            ValueError: If the specified table is not found in the database.

        N)ra   rX   
open_table	Exception)rd   rE   rG   _names       r   rc   zLanceDB.get_table   sq    &  #' ($E	#..u555 	 	 	44	s   9 
AA   `   L2col_name
vector_colnum_partitionsOptional[int]num_sub_vectorsindex_cache_sizemetricNonec                    |                      |          }|r|                    |||||           dS |r|                    |           dS t          d          )aO  
        Create a scalar(for non-vector cols) or a vector index on a table.
        Make sure your vector column has enough data before creating an index on it.

        Args:
            vector_col: Provide if you want to create index on a vector column.
            col_name: Provide if you want to create index on a non-vector column.
            metric: Provide the metric to use for vector index. Defaults to 'L2'
                    choice of metrics: 'L2', 'dot', 'cosine'
            num_partitions: Number of partitions to use for the index. Defaults to 256.
            num_sub_vectors: Number of sub-vectors to use for the index. Defaults to 96.
            index_cache_size: Size of the index cache. Defaults to None.
            name: Name of the table to create index on. Defaults to None.

        Returns:
            None
        )r   vector_column_namer   r   r   z%Provide either vector_col or col_nameN)rc   create_indexcreate_scalar_indexrT   )	rd   r   r   r   r   r   r   rE   r   s	            r   r   zLanceDB.create_index  s    6 nnT"" 	F#-- /!1        	F##H-----DEEEr   r   c                    t          |d          5 }t          j        |                                                              d          cddd           S # 1 swxY w Y   dS )z!Get base64 string from image URI.rbzutf-8N)openbase64	b64encodereaddecode)rd   r2   
image_files      r   encode_imagezLanceDB.encode_image.  s    #t__ 	G
#JOO$5$566==gFF	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	Gs   9AAAurisc                @                                      } fd|D             }|d |D             }d} j        1t           j        d          r j                            |          }nt	          d          g }t          |          D ]Q\  }	}
|r||	         n	d||	         i}|                     j        |
 j        ||	          j	        ||	         d|i           R|) j
                             j        |	          }| _        n|                    |           |S )
as  Run more images through the embeddings and add to the vectorstore.

        Args:
            uris List[str]: File path to the image.
            metadatas (Optional[List[dict]], optional): Optional list of metadatas.
            ids (Optional[List[str]], optional): Optional list of IDs.

        Returns:
            List[str]: List of IDs of the added images.
        c                <    g | ]}                     |           S ))r2   )r   )r   r2   rd   s     r   r"   z&LanceDB.add_images.<locals>.<listcomp>G  s*    @@@CT&&3&//@@@r   Nc                N    g | ]"}t          t          j                              #S r   r   r   s     r   r"   z&LanceDB.add_images.<locals>.<listcomp>J  s&    3333tz||$$333r   embed_image)r   zEembedding object should be provided and must have embed_image method.r)   rn   r   )rc   rH   r`   r   rT   r   r   rI   rJ   rK   rX   r   ra   r_   r   )rd   r   r   r   r   r   	b64_textsr}   r   rr   embrn   s   `           r   
add_imageszLanceDB.add_images3  sW   " nn A@@@4@@@	;33d333C
?&74?M+R+R&44$4??JJW   !*-- 		 		HC)2Hy~~s3x8HHKK$cL#c(NIcN	    ;"//0@t/LLCDKKGGDMMM
r   queryr    r   c                   || j         }|                     |          }t          |t                    rt	          |          }|                    dd          }|                    dd          }|                    d          x}	rX|                    || j                                       |                              |	          	                    ||          }
nD|                    || j                                       |          	                    ||          }
|dk    r"| j
        |
                    | j
        	           |
                                }t          |          d
k    rt          j        d           |S )N	prefilterF
query_typer(   metrics)r   r   )r   hybrid)r=   r   zNo results found for the query.)r@   rc   rP   dictr%   getsearchrI   r   whererS   rerankto_arrowrx   r[   r\   )rd   r   r    r   rE   r   r   r   r   r   lance_queryr   s               r   _queryzLanceDB._queryg  s_    9
AnnT""fd## 	-$V,,FJJ{E22	ZZh77
jj+++7 	

4;K
LLqv33	 K 

4;K
LLqv33 
 !!dn&@777##%%t99>>M;<<<r   Callable[[float], float]c                    | j         r| j         S | j        dk    r| j        S | j        dk    r| j        S | j        dk    r| j        S t          d| j         d          )a8  
        The 'correct' relevance function
        may differ depending on a few things, including:
        - the distance / similarity metric used by the VectorStore
        - the scale of your embeddings (OpenAI's are unit normed. Many others are not!)
        - embedding dimensionality
        - etc.
        cosiner-   ipzANo supported normalization function for distance metric of type: z=.Consider providing relevance_score_fn to Chroma constructor.)rN   r<   _cosine_relevance_score_fn_euclidean_relevance_score_fn%_max_inner_product_relevance_score_fnrT   r|   s    r   _select_relevance_score_fnz"LanceDB._select_relevance_score_fn  s     + 	433=H$$22]d""55]d""==O15O O O  r   List[float]Optional[Dict[str, str]]c                    || j         } | j        ||f||d|}|                     ||                    dd                    S )zD
        Return documents most similar to the query vector.
        Nr   rE   rg   Frg   )r@   r   rz   pop)rd   r0   r    r   rE   r   ress          r   similarity_search_by_vectorz#LanceDB.similarity_search_by_vector  sY     9
Adk)QKvDKKFKK##Cvzz'5/I/I#JJJr   c                |    || j         }|                                  | j        ||fddi|}fd|D             S )zZ
        Return documents most similar to the query vector with relevance scores.
        Nrg   Tc                J    g | ]\  }}| t          |                    f S r   )float)r   docrg   r>   s      r   r"   zMLanceDB.similarity_search_by_vector_with_relevance_scores.<locals>.<listcomp>  sA     
 
 
8BUS$$U5\\223
 
 
r   )r@   r   r   )rd   r0   r    r   rE   r   docs_and_scoresr>   s          @r   1similarity_search_by_vector_with_relevance_scoresz9LanceDB.similarity_search_by_vector_with_relevance_scores  s|     9
A!<<>>:$:q
 
 $
(.
 

 
 
 
FU
 
 
 	
r   c                   || j         }|                    dd          }|                    dd          }|                    dd          }| j        t          d          |dk    s|d	k    r| j        | j        |                     |          }|                    | j        d
          | _        |d	k    r| j        	                    |          }	|	|f}
n|}
 | j
        |
|f||d|}|                     ||          S t          d          | j        	                    |          }	 | j
        |	|fd|i|}|                     ||          S )zAReturn documents most similar to the query with relevance scores.Nrg   TrE   r   r(   z4search needs an emmbedding function to be specified.ftsr   )replacer   r   z?Full text/ Hybrid search is not supported in LanceDB Cloud yet.r   )r@   r   rH   rT   r8   rO   rc   create_fts_indexrK   embed_queryr   rz   NotImplementedError)rd   r   r    r   r   rg   rE   r   r   r0   r   r   s               r   similarity_search_with_scorez$LanceDB.similarity_search_with_score  s    9
A

7D))zz&$''ZZh77
?"STTT*"8"8|#(?nnT**"%"6"6t~t"6"T"T)) $ ; ;E B BI'/FF"F!dk&!PFPPPP++Cu+===)U   33E::I$+iDD6DVDDC''5'999r   r   List[Document]c           
     .     | j         d|||||dd|}|S )ap  Return documents most similar to the query

        Args:
            query: String to query the vectorstore with.
            k: Number of documents to return.
            filter (Optional[Dict]): Optional filter arguments
                sql_filter(Optional[string]): SQL filter to apply to the query.
                prefilter(Optional[bool]): Whether to apply the filter prior
                                             to the vector search.
        Raises:
            ValueError: If the specified table is not found in the database.

        Returns:
            List of documents most similar to the query.
        F)r   r    rE   r   r   rg   r   )r   )rd   r   r    rE   r   r   r   r   s           r   similarity_searchzLanceDB.similarity_search  s>    0 0d/ 
14Cu
 
PV
 
 
r            ?fetch_klambda_multr   c                    || j         }| j        t          d          | j                            |          }|                     |||||          }|S )a?  Return docs selected using the maximal marginal relevance.
        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.

        Returns:
            List of Documents selected by maximal marginal relevance.
        NzBFor MMR search, you must specify an embedding function oncreation.)r   r   )r@   rH   rT   r   'max_marginal_relevance_search_by_vector)	rd   r   r    r   r   r   r   r0   r   s	            r   max_marginal_relevance_searchz%LanceDB.max_marginal_relevance_search  st    4 9
A?"W   O//66	;;# < 
 
 r   c                *  
  | j         d|||d|}t          t          j        |t          j                  |d                                         |p| j        |          
|                     |          }
fdt          |          D             }	|	S )aH  Return docs selected using the maximal marginal relevance.
        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.

        Returns:
            List of Documents selected by maximal marginal relevance.
        )r   r    r   )dtyper(   )r    r   c                "    g | ]\  }}|v 	|S r   r   )r   irmmr_selecteds      r   r"   zCLanceDB.max_marginal_relevance_search_by_vector.<locals>.<listcomp>h  s'    UUU$!Q1CTCTACTCTCTr   r   )	r   r   nparrayfloat32	to_pylistr@   rz   r   )rd   r0   r    r   r   r   r   rf   
candidatesselected_resultsr   s             @r   r   z/LanceDB.max_marginal_relevance_search_by_vector>  s    6 $+ 

 
 	
 
 2HYbj111H''))o4:#	
 
 
 ))'22
UUUU)J*?*?UUUr   clsType[LanceDB]r   c                f    t          d|||||||	|
||||d|}|                    ||           |S )N)r.   r0   r4   r5   r6   r7   r8   r9   r:   r<   r=   r>   )r   r   )r'   r   )r   r~   r0   r   r.   r4   r5   r6   r7   r8   r9   r:   r<   r=   r>   r   instances                    r   
from_textszLanceDB.from_textsk  sk    &  
!!!1
 
 
 
 	5I666r   
delete_alldrop_columnsc                   |                      |          }|r|                    |           dS |r=|                    d                    d                    |                               dS |r-| j        t          d          |                    |           dS |r|                    d           dS t          d          )a  
        Allows deleting rows by filtering, by ids or drop columns from the table.

        Args:
            filter: Provide a string SQL expression -  "{col} {operation} {value}".
            ids: Provide list of ids to delete from the table.
            drop_columns: Provide list of columns to drop from the table.
            delete_all: If True, delete all rows from the table.
        zid in ('{}'),Nz;Column operations currently not supported in LanceDB Cloud.truez6Provide either filter, ids, drop_columns or delete_all)rc   deleteformatr#   r8   r   r  rT   )rd   r   r  r   r  rE   r   r   s           r   r  zLanceDB.delete  s    $ nnT"" 	WJJv 	WJJ~,,SXXc]];;<<<<< 
	W|')Q     ..... 	WJJvUVVVr   )r.   r/   r0   r1   r2   r3   r4   r3   r5   r3   r6   r3   r7   r3   r8   r3   r9   r3   r:   r3   r;   r/   r<   r3   r=   r/   r>   r?   r@   rA   )F)rf   r   rg   rh   r   r   )r   r1   )NN)
r~   r   r   r   r   r   r   r   r   r   )NF)rE   r3   rG   r   r   r   )NNr   r   Nr   N)r   r3   r   r3   r   r   r   r   r   r   r   r3   rE   r3   r   r   )r2   r   r   r   )
r   r   r   r   r   r   r   r   r   r   )NNN)r   r   r    r   r   r/   rE   r3   r   r   r   r   )r   r   )r0   r   r    r   r   r   rE   r3   r   r   r   r   )
r   r   r    r   r   r   r   r   r   r   )NNNF)r   r   r    r   rE   r3   r   r/   r   r   r   r   r   r   )Nr   r   N)r   r   r    r   r   rA   r   r   r   r   r   r   r   r   )r0   r   r    r   r   rA   r   r   r   r   r   r   r   r   )NNr(   r)   r*   r+   NNr,   r-   NN)"r   r   r~   r   r0   r   r   r   r.   r/   r4   r3   r5   r3   r6   r3   r7   r3   r8   r3   r9   r3   r:   r3   r<   r3   r=   r/   r>   r?   r   r   r   r'   )NNNNN)r   r   r  r   r   r3   r  r   rE   r3   r   r   r   r   )__name__
__module____qualname____doc__	DEFAULT_Kre   rz   propertyr}   r   rc   r   r   r   r   r   r   r   r   r   r   r   classmethodr  r  r   r   r   r'   r'      s        : %)*.+$, $"($1!% $)#"&"&AE!UG UG UG UG UGn    <    X +/#'	. . . . .b IN    F #'$((+)+*. $"(F (F (F (F (FTG G G G +/#'	2 2 2 2 2n   $"$ $ $ $ $L   8  +/"K K K K K(  +/"
 
 
 
 
2  +/	&: &: &: &: &:V  " $#    @   +/* * * * *^   +/+  +  +  +  + Z 
 +/$($, $"($1!% $)"&"&AE# # # # [#N $(%) $,0"!W !W !W !W !W !W !Wr   r'   )r   r   )r   r   r   r   )
__future__r   r   rL   r   r[   typingr   r   r   r   r   r	   r
   numpyr   langchain_core.documentsr   langchain_core.embeddingsr   langchain_core.utilsr   langchain_core.vectorstoresr   &langchain_community.vectorstores.utilsr   r  r   r%   r'   r   r   r   <module>r     sP   " " " " " "  				   F F F F F F F F F F F F F F F F F F     - - - - - - 0 0 0 0 0 0 - - - - - - 3 3 3 3 3 3 M M M M M M	# # # #
E E E E
T
W T
W T
W T
W T
Wk T
W T
W T
W T
W T
Wr   