
    NgK,                        d dl mZ d dlZd dlZd dlZd dlZd dlmZmZm	Z	m
Z
mZ d dlmZ d dlmZ d dlmZmZ  ej        e          ZdZdZd	Zd
ZdZ G d de          ZdS )    )annotationsN)AnyIterableListOptionalType)Document)
Embeddings)VSTVectorStore	embeddingidtext
embeddingssimilarity_scorec                  |    e Zd ZdZdeeeedd$dZe	d%d            Z
	 d&d'dZ	 d(d)dZe	 d&d*d!            Zd+d#ZdS ),DuckDBa;
  `DuckDB` vector store.

    This class provides a vector store interface for adding texts and performing
    similarity searches using DuckDB.

    For more information about DuckDB, see: https://duckdb.org/

    This integration requires the `duckdb` Python package.
    You can install it with `pip install duckdb`.

    *Security Notice*: The default DuckDB configuration is not secure.

        By **default**, DuckDB can interact with files across the entire file system,
        which includes abilities to read, write, and list files and directories.
        It can also access some python variables present in the global namespace.

        When using this DuckDB vectorstore, we suggest that you initialize the
        DuckDB connection with a secure configuration.

        For example, you can set `enable_external_access` to `false` in the connection
        configuration to disable external access to the DuckDB connection.

        You can view the DuckDB configuration options here:

        https://duckdb.org/docs/configuration/overview.html

        Please review other relevant security considerations in the DuckDB
        documentation. (e.g., "autoinstall_known_extensions": "false",
        "autoload_known_extensions": "false")

        See https://python.langchain.com/docs/security for more information.

    Args:
        connection: Optional DuckDB connection
        embedding: The embedding function or model to use for generating embeddings.
        vector_key: The column name for storing vectors. Defaults to `embedding`.
        id_key: The column name for storing unique identifiers. Defaults to `id`.
        text_key: The column name for storing text. Defaults to `text`.
        table_name: The name of the table to use for storing embeddings. Defaults to
          `embeddings`.

    Example:
        .. code-block:: python

            import duckdb
            conn = duckdb.connect(database=':memory:',
                config={
                    # Sample configuration to restrict some DuckDB capabilities
                    # List is not exhaustive. Please review DuckDB documentation.
                        "enable_external_access": "false",
                        "autoinstall_known_extensions": "false",
                        "autoload_known_extensions": "false"
                    }
            )
            embedding_function = ... # Define or import your embedding function here
            vector_store = DuckDB(conn, embedding_function)
            vector_store.add_texts(['text1', 'text2'])
            result = vector_store.similarity_search('text1')
    N)
connection
vector_keyid_keytext_key
table_namer   Optional[Any]r   r
   r   strr   r   r   c                  	 ddl }n# t          $ r t          d          w xY w|| _         || _        || _        || _        || _        || _        | j        t          d          |t          j	        d           |p| j         
                    dddi	          | _        |                                  | j                            | j                  | _        dS )
z?Initialize with DuckDB connection and setup for vector storage.r   NzMCould not import duckdb package. Please install it with `pip install duckdb`.z0An embedding function or model must be provided.a?  No DuckDB connection provided. A new connection will be created.This connection is running in memory and no data will be persisted.To persist data, specify `connection=duckdb.connect(...)` when using the API. Please review the documentation of the vectorstore for security recommendations on configuring the connection.z:memory:enable_external_accessfalse)databaseconfig)duckdbImportError
_embedding_vector_key_id_key	_text_key_table_name
ValueErrorwarningswarnconnect_connection_ensure_tabletable_table)selfr   r   r   r   r   r   r    s           c/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/vectorstores/duckdb.py__init__zDuckDB.__init__T   s   	MMMM 	 	 	?  	
 #%!%?"OPPPMJ   & 
)<)<)A7(K *= *
 *
 	&,,T-=>>s    !returnOptional[Embeddings]c                    | j         S )z6Returns the embedding object used by the vector store.)r"   )r/   s    r0   r   zDuckDB.embeddings   s         textsIterable[str]	metadatasOptional[List[dict]]kwargsr   	List[str]c                2   d}	 ddl }d}n*# t          $ r t                              d           Y nw xY w|                    dd |D                       }|pd |D             }| j                            t          |                    }g }t          |          D ]\  }	}
||	         }|r-|	t          |          k     rt          j        ||	                   nd}|r3|                    | j        ||	         | j        |
| j        |d	|i           s| j                            d
| j         d||	         |
||g           |rX|j                            |          }| j                            d|           | j                            d
| j         d           |S )a  Turn texts into embedding and add it to the database using Pandas DataFrame

        Args:
            texts: Iterable of strings to add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
            kwargs: Additional parameters including optional 'ids' to associate
              with the texts.

        Returns:
            List of ids of the added texts.
        Fr   NTzgUnable to import pandas. Install it with `pip install -U pandas` to improve performance of add_texts().idsc                N    g | ]"}t          t          j                              #S  r   uuiduuid4.0_s     r0   
<listcomp>z$DuckDB.add_texts.<locals>.<listcomp>   s&     B B BqTZ\\!2!2 B B Br5   c                N    g | ]"}t          t          j                              #S r?   r@   rC   s     r0   rF   z$DuckDB.add_texts.<locals>.<listcomp>   s&    777Ac$*,,''777r5   metadatazINSERT INTO z VALUES (?,?,?,?)dfz SELECT * FROM df)pandasr!   loggerinfopopr"   embed_documentslist	enumeratelenjsondumpsappendr$   r%   r#   r+   executer&   	DataFrame	from_dictregister)r/   r6   r8   r:   have_pandaspdr=   r   dataidxr   r   rH   rI   s                 r0   	add_textszDuckDB.add_texts   s   " 		KK 	 	 	KK9    	 jj B BE B B BCC 777777_44T%[[AA
"5)) 	 	IC"3I !$s9~~!5!5 
9S>*** 
  c#h()"H	     ((F4#3FFFXtY9   
  	''--B%%dB///$$Bt/BBB   
s    $22   querykintList[Document]c                2     j                             |          } j                            d j                             j                   j                            |                    }  j        j         j        	                    g           |
                    t                    g                     t           d                              |                                           fdt          t!                              D             S )a  Performs a similarity search for a given query string.

        Args:
            query: The query string to search for.
            k: The number of similar texts to return.

        Returns:
            A list of Documents most similar to the query.
        list_cosine_similarity)excludez descc           
         g | ]q}t          j                 |         d          |         r?i t          j        d          |                   dt           t                   |         ini           rS )rH   rE   )page_contentrH   )r	   r%   rR   loadsSIMILARITY_ALIAS)rD   r\   docsr/   s     r0   rF   z,DuckDB.similarity_search.<locals>.<listcomp>   s     
 
 
  !$.1#6 
#C( jj!1#!677 +(**D1A,B3,G   	 	 	
 
 
r5   )r"   embed_queryr    FunctionExpressionColumnExpressionr#   ConstantExpressionr.   selectStarExpressionaliasri   orderlimitfetchdfrangerQ   )r/   r_   r`   r:   r   rd   rj   s   `     @r0   similarity_searchzDuckDB.similarity_search   s    O//66	!%!?!?$K(()9::K**955"
 "
 DKK..r.::*001ABB U&---..U1XXWYY 	
 
 
 
 
 SYY''
 
 
 	
r5   cls	Type[VST]c                T   |                     dd          }|                     dt                    }|                     dt                    }|                     dt                    }|                     dt                    }	t          ||||||	          }
 |
j        |fd|i| |
S )	a  Creates an instance of DuckDB and populates it with texts and
          their embeddings.

        Args:
            texts: List of strings to add to the vector store.
            embedding: The embedding function or model to use for generating embeddings.
            metadatas: Optional list of metadata dictionaries associated with the texts.
            kwargs: Additional keyword arguments including:
                - connection: DuckDB connection. If not provided, a new connection will
                  be created.
                - vector_key: The column name for storing vectors. Default "vector".
                - id_key: The column name for storing unique identifiers. Default "id".
                - text_key: The column name for storing text. Defaults to "text".
                - table_name: The name of the table to use for storing embeddings.
                    Defaults to "embeddings".

        Returns:
            An instance of DuckDB with the provided texts and their embeddings added.
        r   Nr   r   r   r   )r   r   r   r   r   r   r8   )getDEFAULT_VECTOR_KEYDEFAULT_ID_KEYDEFAULT_TEXT_KEYDEFAULT_TABLE_NAMEr   r]   )rw   r6   r   r8   r:   r   r   r   r   r   instances              r0   
from_textszDuckDB.from_texts   s    : ZZd33
ZZ.@AA
Hn55::j*:;;ZZ.@AA
 !!!
 
 
 	5@@I@@@@r5   Nonec           	         d| j          d| j         d| j         d| j         d	}| j                            |           dS )z0Ensures the table for storing embeddings exists.z$
        CREATE TABLE IF NOT EXISTS z (
            z" VARCHAR PRIMARY KEY,
            z VARCHAR,
            z9 FLOAT[],
            metadata VARCHAR
        )
        N)r&   r$   r%   r#   r+   rU   )r/   create_table_sqls     r0   r,   zDuckDB._ensure_table$  sq    $($4 \  ^  	   	  !122222r5   )r   r   r   r
   r   r   r   r   r   r   r   r   )r2   r3   )N)r6   r7   r8   r9   r:   r   r2   r;   )r^   )r_   r   r`   ra   r:   r   r2   rb   )rw   rx   r6   r;   r   r
   r8   r9   r:   r   r2   r   )r2   r   )__name__
__module____qualname____doc__r{   r|   r}   r~   r1   propertyr   r]   rv   classmethodr   r,   r?   r5   r0   r   r      s        : :~ %),$(,)? )? )? )? )? )?V    X +/B B B B BJ $%)
 )
 )
 )
 )
V 
 +/	. . . . [.`
3 
3 
3 
3 
3 
3r5   r   )
__future__r   rR   loggingrA   r(   typingr   r   r   r   r   langchain_core.documentsr	   langchain_core.embeddingsr
   langchain_core.vectorstoresr   r   	getLoggerr   rK   r{   r|   r}   r~   ri   r   r?   r5   r0   <module>r      s   " " " " " "     6 6 6 6 6 6 6 6 6 6 6 6 6 6 - - - - - - 0 0 0 0 0 0 8 8 8 8 8 8 8 8		8	$	$   ! % W3 W3 W3 W3 W3[ W3 W3 W3 W3 W3r5   