
    Ng                        d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
mZ d dlmZmZmZmZmZmZmZmZmZ d dlmZ d dlmZ d dlmZ erd d	lmZ d d
lmZ   G d de          Z!dS )    )annotationsN)contextmanager)StringIO)	TYPE_CHECKINGAnyDict	GeneratorIterableListOptionalTupleType)
Embeddings)VectorStore)Document)
connection)cursorc                     e Zd ZdZ G d deej                  Z G d d          ZdddddXdZ	 G d d          Z
dYdZdYdZ	 	 dZd[dZ	 d\d]dZd^d Z	 d\d_d(Zd`d,Ze	 	 	 	 	 dadbd2            Z	 	 dZdcd7Z	 ddded9Zdfd=Z	 dgdhdBZ	 dgdidEZ	 dgdjdFZ	 dgdkdGZ	 d\dldJZdmdMZdndOZdYdPZdYdQZdodTZ dodUZ!dpdVZ"dqdWZ#dS )rYellowbrickzYellowbrick as a vector database.
    Example:
        .. code-block:: python
            from langchain_community.vectorstores import Yellowbrick
            from langchain_community.embeddings.openai import OpenAIEmbeddings
            ...
    c                      e Zd ZdZdZdZdS )Yellowbrick.IndexTypez<Enumerator for the supported Index types within Yellowbrick.nonelshN)__name__
__module____qualname____doc__NONELSH     h/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/vectorstores/yellowbrick.py	IndexTyper   *   s        JJr!   r#   c                  *    e Zd ZdZ	 	 dddZdddZdS )Yellowbrick.IndexParamsz/Parameters for configuring a Yellowbrick index.N
index_type!Optional['Yellowbrick.IndexType']paramsOptional[Dict[str, Any]]c                L    |t           j        j        }|| _        |pi | _        d S N)r   r#   r   r&   r(   )selfr&   r(   s      r"   __init__z Yellowbrick.IndexParams.__init__3   s+    
 !(27
(DO ,BDKKKr!   keystrdefaultr   returnc                8    | j                             ||          S r+   )r(   get)r,   r.   r0   s      r"   	get_paramz!Yellowbrick.IndexParams.get_param=   s    ;??3000r!   NN)r&   r'   r(   r)   r+   )r.   r/   r0   r   r1   r   )r   r   r   r   r-   r4   r    r!   r"   IndexParamsr%   0   sU        == =A/3	' 	' 	' 	' 	'	1 	1 	1 	1 	1 	1 	1r!   r6   NF)schemaloggerdrop	embeddingr   connection_stringr/   tabler7   Optional[str]r8   Optional[logging.Logger]r9   boolr1   Nonec                  ddl m} |                                 |r|| _        nt	          j        t                    | _        | j                            t          j                   t	          j	                    }|                    t          j
                   t	          j        d          }	|                    |	           | j                            |           t          |t                    s| j                            d           dS d| _        d| _        d| _        || _        t*                              || j                  | _        t1          j        | j        j                   || _        || _        || _        d| _        |                                  | j                                         5 }
|ra| !                    | j        | j        |
	           | !                    | j        | j        z   | j        |
	           | "                    |
           | #                    |
           | $                    |
           ddd           dS # 1 swxY w Y   dS )
zInitialize with yellowbrick client.
        Args:
            embedding: Embedding operator
            connection_string: Format 'postgres://username:password@host:port/database'
            table: Table used to store / retrieve embeddings from
        r   )extrasz)%(asctime)s - %(levelname)s - %(message)sz+embeddings input must be Embeddings object.N
_lsh_index_lsh_hyperplane_content)r<   r7   r   )%psycopg2rB   register_uuidr8   logging	getLoggerr   setLevelERRORStreamHandlerDEBUG	FormattersetFormatter
addHandler
isinstancer   errorLSH_INDEX_TABLELSH_HYPERPLANE_TABLECONTENT_TABLEr;   r   DatabaseConnectionr   atexitregisterclose_connection_schema_table
_embedding_max_embedding_len_check_database_utf8
get_cursorr9   _drop_lsh_index_tables_create_schema_create_table)r,   r:   r;   r<   r7   r8   r9   rB   handler	formatterr   s              r"   r-   zYellowbrick.__init__@   su     	$##### 		, DKK!+H55DKK  ///+--GW]+++)*UVVI  +++K""7+++)Z00 	KKLLLF$0):!",!2%889JDKXX8999#"&!!###_'')) 	'V 4		DL	PPP		+(::<!    
 ++F333'''v&&&	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	's   5BIIIc                       e Zd ZU dZded<   dZded<   ded<   d fdZddZddZe	dd            Z
e	dd            Z xZS )Yellowbrick.DatabaseConnectionNr/   _connection_stringzOptional['PgConnection']_connectionlogging.Logger_loggerr;   r8   r1    'Yellowbrick.DatabaseConnection'c                    | j         >t                                          |           | _         || j         _        || j         _        | j         S r+   )	_instancesuper__new__rg   rj   )clsr;   r8   	__class__s      r"   ro   z&Yellowbrick.DatabaseConnection.__new__   s@     }$ % 4 43D0(.%= r!   r@   c                t    | j         r.| j         j        s$| j                                          d | _         d S d S d S r+   )rh   closedclose)r,   s    r"   rY   z/Yellowbrick.DatabaseConnection.close_connection   sQ     ((8(? ( &&(((#'   ( ( ( (r!   'PgConnection'c                    dd l }| j        r| j        j        r+|                    | j                  | _        d| j        _        | j        S )Nr   F)rF   rh   rs   connectrg   
autocommit)r,   rF   s     r"   get_connectionz-Yellowbrick.DatabaseConnection.get_connection   sP    OOO# 4t'7'> 4#+#3#3D4K#L#L .3 +##r!   %Generator['PgConnection', None, None]c              #  
  K   ddl m} |                                 }	 |V  |                                 d S # |$ rE}|                                 | j                            dd           t          d          |d }~ww xY w)Nr   )DatabaseErrorz2Database error occurred, rolling back transaction.T)exc_infozDatabase transaction failed.)rF   r|   ry   commitrollbackrj   rR   RuntimeError)r,   r|   connes       r"   get_managed_connectionz5Yellowbrick.DatabaseConnection.get_managed_connection   s      ......&&((D	


  ! J J J""HSW #    ##ABBIJs   8 BA A==B!Generator['PgCursor', None, None]c              #     K   |                                  5 }|                                }	 |V  |                                 n# |                                 w xY w	 d d d            d S # 1 swxY w Y   d S r+   )r   r   rt   )r,   r   r   s      r"   r_   z)Yellowbrick.DatabaseConnection.get_cursor   s      ,,.. #$# LLLLLNNNNFLLNNNNN# # # # # # # # # # # # # # # # # #s'   A-AA-AA--A14A1)r;   r/   r8   ri   r1   rk   r1   r@   )r1   ru   )r1   rz   )r1   r   )r   r   r   rm   __annotations__rh   ro   rY   ry   r   r   r_   __classcell__)rq   s   @r"   rV   rf   ~   s         	044444	! 	! 	! 	! 	! 	!	( 	( 	( 	(
	$ 	$ 	$ 	$ 
	 	 	 
	 
	# 	# 	# 
	# 	# 	# 	# 	#r!   rV   r   
'PgCursor'c                    ddl m} | j        rV|                    |                    d                              |                    | j                                       dS dS )z>
        Helper function: create schema if not exists
        r   sqlzE
                    CREATE SCHEMA IF NOT EXISTS {s}
                )sN)rF   r   rZ   executeSQLformat
Identifier)r,   r   r   s      r"   ra   zYellowbrick._create_schema   s     	!     < 		NN  &nnT\22       		 		r!   c                   ddl m} | j        r| j        fnd} |j        g || j        | j        z   R  }|                    | j        | j        z   dz             }|                    |                    d                              ||                     | j        r| j        fnd} |j        g || j        R  } |j        g || j        | j        z   R  }|                    | j        | j        z   dz             }|                    | j        | j        z   dz             }	|                    |                    d	                              ||||	
                     dS )z=
        Helper function: create table if not exists
        r   r   r    
_pk_doc_ida0  
                CREATE TABLE IF NOT EXISTS {t} (
                doc_id UUID NOT NULL,
                text VARCHAR(60000) NOT NULL,
                metadata VARCHAR(1024) NOT NULL,
                CONSTRAINT {c} PRIMARY KEY (doc_id))
                DISTRIBUTE ON (doc_id) SORT ON (doc_id)
            tc_pk_doc_id_embedding_id
_fk_doc_ida  
                CREATE TABLE IF NOT EXISTS {t1} (
                doc_id UUID NOT NULL,
                embedding_id SMALLINT NOT NULL,
                embedding FLOAT NOT NULL,
                CONSTRAINT {c1} PRIMARY KEY (doc_id, embedding_id),
                CONSTRAINT {c2} FOREIGN KEY (doc_id) REFERENCES {t2}(doc_id))
                DISTRIBUTE ON (doc_id) SORT ON (doc_id)
            t1t2c1c2N)	rF   r   rZ   r   r[   rU   r   r   r   )
r,   r   r   schema_prefixr   r   r   r   r   r   s
             r"   rb   zYellowbrick._create_table   s    	!     +/<?RCNLML4;9K+KLLLNN4;);;lJKKGG	 	 f   	
 	
 	
  ,0<?RS^8]8DK888S^M]MDK$:L,LMMM^^K$,,/HH
 
 ^^DK$*<<|KLLGG
 
 f	   	
 	
 	
 	
 	
r!   Optional['PgCursor']c                    |L| j                                         5 }|                     |||           ddd           dS # 1 swxY w Y   dS |                     |||           dS )z
        Helper function: Drop data. If a cursor is provided, use it;
        otherwise, obtain a new cursor for the operation.
        N)r7   )r   r_   _drop_table)r,   r<   r7   r   s       r"   r9   zYellowbrick.drop   s     >++-- ?  v >>>? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? VU6:::::s   AA	Ac                    ddl m} |r|                    ||          }n|                    |          }|                    d                              |          }|                    |           dS )zI
        Executes the drop table command using the given cursor.
        r   r   z1
        DROP TABLE IF EXISTS {} CASCADE
        N)rF   r   r   r   r   r   )r,   r   r<   r7   r   
table_namedrop_table_querys          r"   r   zYellowbrick._drop_table  s     	!      	/66JJ..J77
 
 &

	 	
 	'(((((r!   c                H   | j                                         5 }d}|                    |           |                                d         }ddd           n# 1 swxY w Y   |                                dk    s|                                dk    rdS t          d          )zE
        Helper function: Test the database is UTF-8 encoded
        z
                SELECT pg_encoding_to_char(encoding)
                FROM pg_database
                WHERE datname = current_database();
            r   Nutf8zutf-8TzDatabase encoding is not UTF-8)r   r_   r   fetchonelower	Exception)r,   r   queryencodings       r"   r^   z Yellowbrick._check_database_utf8  s     _'')) 	,VE
 NN5!!!((+H	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, >>v%%)9)9W)D)D4<===s   2AAAtextsIterable[str]	metadatasOptional[List[dict]]kwargsr   	List[str]c           
     $   d}t          |          }| j                            t          |                    }g }|sd |D             }|                    d          pt                                          }| j                                        5 }t                      }	t                      }
t          j
        |	ddt          j                  }t          j
        |
ddt          j                  }d}t          |          D ]\  }}t          t          j                              }|                    |           |                    ||t%          j        ||                   g           t          ||                   D ]\  }}|                    |||g           |dz  }||k    rm|                     ||	|
           |	                    d           |	                    d           |
                    d           |
                    d           d}|dk    r|                     ||	|
           d d d            n# 1 swxY w Y   |j        t          j        j        k    r(|                     |t          j        |                     |S )	Ni'  c                    g | ]}i S r    r    ).0_s     r"   
<listcomp>z)Yellowbrick.add_texts.<locals>.<listcomp>?  s    ++++++r!   index_params	")	delimiter	quotecharquotingr      )listr\   embed_documentsr3   r   r6   r   r_   r   csvwriterQUOTE_MINIMAL	enumerater/   uuiduuid4appendwriterowjsondumps_copy_to_dbseektruncater&   r#   r   _update_indexUUID)r,   r   r   r   
batch_size
embeddingsresultsr   r   
content_ioembeddings_iocontent_writerembeddings_writercurrent_batch_sizeitextdoc_uuidembedding_idr:   s                      r"   	add_textszYellowbrick.add_texts3  s    
U_44T%[[AA
 	,++U+++Izz.11N[5L5L5N5N_''))  	DV!J$JJM Zdc3CT  N !$
cFW! ! ! "#$U++ + +4tz||,,x(((''4IaL9Q9Q(RSSS/8A/G/G T T+L)%..,	/RSSSS"a'"%33$$VZGGGOOA&&&''***!&&q)))!**1---)*&!A%%  ]CCCA 	D  	D  	D  	D  	D  	D  	D  	D  	D  	D  	D  	D  	D  	D  	DD "k&;&???|TYx-@-@AAAs   F'II
Ir   r   r   c                   |                     d           |                     d           ddlm} | j        r| j        fnd} |j        g || j        | j        z   R  }|                    d                              |          }|	                    ||           | j        r| j        fnd} |j        g || j        R  }|                    d                              |          }|	                    ||           d S )Nr   r   r    z
            COPY {table} (doc_id, text, metadata) FROM 
            STDIN WITH (FORMAT CSV, DELIMITER E'\t', QUOTE '"')
        )r<   z
            COPY {table} (doc_id, embedding_id, embedding) FROM 
            STDIN WITH (FORMAT CSV, DELIMITER E'\t', QUOTE '"')
        )
r   rF   r   rZ   r   r[   rU   r   r   copy_expert)	r,   r   r   r   r   r   r<   content_copy_queryembeddings_copy_querys	            r"   r   zYellowbrick._copy_to_dbj  s8    	1      +/<?RPPt{T=O/OPPP WW
 

 &u&

 	 	-z:::+/<?R;;t{;;; #!
 !

 &u&

 	 	0-@@@@@r!    	langchainpublicrp   Type[Yellowbrick]c                F     | |||||          }	 |	j         d||d| |	S )a  Add texts to the vectorstore index.
        Args:
            texts: Iterable of strings to add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
            connection_string: URI to Yellowbrick instance
            embedding: Embedding function
            table: table to store embeddings
            kwargs: vectorstore specific parameters
        )r:   r;   r<   r7   r9   )r   r   r    )r   )
rp   r   r:   r   r;   r<   r7   r9   r   vsss
             r"   
from_textszYellowbrick.from_texts  sO    * c/
 
 
 	AEYAA&AAA
r!   idsOptional[List[str]]
delete_allOptional[bool]c                   ddl m |r                    d          }n~|mt          fd|D                       }                    d                              |          }                    d                              |          }nt          d	          | j        r| j        fnd
}| j        	                                5 } j
        g || j        | j        z   R  }	                    d                              |	|          }
|                    |
            j
        g || j        R  }	                    d                              |	|          }
|                    |
            | j        || j        | j        z   g|R  r[ j
        g || j        | j        z   R  }	                    d                              |	|          }
|                    |
           ddd           n# 1 swxY w Y   dS )zxDelete vectors by uuids.

        Args:
            ids: List of ids to delete, where each id is a uuid string.
        r   r   z'
                WHERE 1=1
            Nc              3  B   K   | ]}                     |          V  d S r+   )Literal)r   idr   s     r"   	<genexpr>z%Yellowbrick.delete.<locals>.<genexpr>  s-      88b#++b//888888r!   z, z5
                WHERE doc_id IN ({ids})
            )r   z*Either ids or delete_all must be provided.r    zDELETE FROM {table} {where_sql})r<   	where_sql)rF   r   r   tuplejoinr   
ValueErrorrZ   r   r_   r   r[   rU   r   _table_existsrS   )r,   r   r   r   r   uuidsids_formattedr   r   table_identifierr   r   s              @r"   deletezYellowbrick.delete  s    	!      	K II
 _8888C88888EGGDMM..u55M  f!   	 I IJJJ+/<?R_'')) 	&V-s~    $d.@ @      GG=>>EE&) F  E NN5!!!-s~J}JdkJJJGG=>>EE&) F  E NN5!!!!t!d&::=J   	& $23> $"$$(K$2F$F$ $ $   ABBII*i J   u%%%1	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	&4 ts   D&G::G>G>r   c                   ddl m} |                    |          }|                    |          }|                    |                    d                              ||                     |                                d         dk    S )z>
        Checks if a table exists in the given schema
        r   r   z
                SELECT COUNT(*)
                FROM sys.table t INNER JOIN sys.schema s ON t.schema_id = s.schema_id
                WHERE s.name = {schema} AND t.name = {table_name}
            )r7   r   )rF   r   r   r   r   r   r   )r,   r   r   r7   r   s        r"   r   zYellowbrick._table_exists  s     	!     V$$[[,,
GG  f%   	
 	
 	
   #a''r!   vectorList[float]	uuid.UUIDc                   dd l }d                    t          t          |                    }|                    |                                          }|                                }t          j        |d d                   }|S )Nr   ,   )bytes)	hashlibr   mapr/   sha1encodedigestr   r   )r,   r   r  
vector_strhash_objecthash_digestvector_uuids          r"   _generate_vector_uuidz!Yellowbrick._generate_vector_uuid  su    XXc#v..//
ll:#4#4#6#677!((**ik#2#&6777r!      kintList[Tuple[Document, float]]c                   ddl m} ddlm} |                    d          pt
                                          }| j                                        5 }d| j	        z   }| 
                    |          |                    d                              |                    |                    }	|                    |	           fdt          |          D             }
|                    d                              |                    |                    } ||||
           |                    |          }| j        r| j        fnd	} |j        g || j	        R  } |j        g || j	        | j        z   R  }|j        t
          j        j        k    r| j	        d
z   }|                     |||           | j        r| j        fnd	} |j        g || j	        | j        z   R  }|                    |          }|                    d                              ||||||                    |                    dd                              }|                    ||f           |                                }nV|                    d                              |||          }|                    ||f           |                                }ddd           n# 1 swxY w Y   g }|D ]R}t5          j        |d                   pi }t9          |d         |          }|                    ||d         f           S|S )a  Perform a similarity search with Yellowbrick with vector

        Args:
            embedding (List[float]): query embedding
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.

            NOTE: Please do not let end-user fill this and always be aware
                  of SQL injection.

        Returns:
            List[Document, float]: List of Documents and scores
        r   r   )execute_valuesr   tmp_z 
                CREATE TEMPORARY TABLE {} (
                doc_id UUID,
                embedding_id SMALLINT,
                embedding FLOAT)
                ON COMMIT DROP
                DISTRIBUTE REPLICATE
            c                :    g | ]\  }}t                    ||fS r    )r/   )r   r   embedding_value
tmp_doc_ids      r"   r   zFYellowbrick.similarity_search_with_score_by_vector.<locals>.<listcomp>%  s:       1L/ Z,@  r!   z:INSERT INTO {} (doc_id, embedding_id, embedding) VALUES %sr    	_tmp_hasha/  
                    WITH index_docs AS (
                    SELECT
                        t1.doc_id,
                        SUM(ABS(t1.hash-t2.hash)) as hamming_distance
                    FROM
                        {lsh_index} t1
                    INNER JOIN
                        {input_hash_table} t2
                    ON t1.hash_index = t2.hash_index
                    GROUP BY t1.doc_id
                    HAVING hamming_distance <= {hamming_distance}
                    )
                    SELECT
                        text,
                        metadata,
                       SUM(v1.embedding * v2.embedding) /
                        (SQRT(SUM(v1.embedding * v1.embedding)) *
                       SQRT(SUM(v2.embedding * v2.embedding))) AS score
                    FROM
                        {v1} v1
                    INNER JOIN
                        {v2} v2
                    ON v1.embedding_id = v2.embedding_id
                    INNER JOIN
                        {v3} v3
                    ON v2.doc_id = v3.doc_id
                    INNER JOIN
                        index_docs v4
                    ON v2.doc_id = v4.doc_id
                    GROUP BY v3.doc_id, v3.text, v3.metadata
                    ORDER BY score DESC
                    LIMIT %s
                hamming_distance)v1v2v3	lsh_indexinput_hash_tabler  a  
                    SELECT 
                        text,
                        metadata,
                        score
                    FROM
                        (SELECT
                            v2.doc_id doc_id,
                            SUM(v1.embedding * v2.embedding) /
                            (SQRT(SUM(v1.embedding * v1.embedding)) *
                            SQRT(SUM(v2.embedding * v2.embedding))) AS score
                        FROM
                            {v1} v1
                        INNER JOIN
                            {v2} v2
                        ON v1.embedding_id = v2.embedding_id
                        GROUP BY v2.doc_id
                        ORDER BY score DESC LIMIT %s
                        ) v4
                    INNER JOIN
                        {v3} v3
                    ON v4.doc_id = v3.doc_id
                    ORDER BY score DESC
                )r  r  r  Nr   )page_contentmetadata   )rF   r   psycopg2.extrasr  r3   r   r6   r   r_   r[   r
  r   r   r   r   r   rZ   rU   r&   r#   r   _generate_tmp_lsh_hashesrS   r   r4   fetchallr   loadsr   r   )r,   r:   r  r   r   r  r   r   tmp_embeddings_tablecreate_table_query
data_inputinsert_queryr  r   r  r  tmp_hash_tabler  r  	sql_queryr   	documentsresultr  docr  s                            @r"   &similarity_search_with_score_by_vectorz2Yellowbrick.similarity_search_with_score_by_vector  s    	!     222222zz.11N[5L5L5N5N_'')) z	,V#)DK#7 33I>>J!$	" 	" fS^^$899::  NN-...   5>y5I5I  J 77L fS^^$899::  N6<<<< 455B/3|CT\OOM<<<<<BQQd>P0PQQQB&+*?*CCC!%{!:--("   48< GR*CN "$(K$2F$F  	 $'>>.#A#A GG!# #F &'%5%([[$../A1EE& &  	 	G Z D   !//++GG 2 &   3 < y1$/// //++uz	, z	, z	, z	, z	, z	, z	, z	, z	, z	, z	, z	, z	, z	, z	,x 35	 	/ 	/Fz&),,2Hq	HEEECc6!9-....s   I=KK"%K"r   List[Document]c                n    | j                             |          } | j        d||d|}d |D             S )ae  Perform a similarity search with Yellowbrick

        Args:
            query (str): query string
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.

            NOTE: Please do not let end-user fill this and always be aware
                  of SQL injection.

        Returns:
            List[Document]: List of Documents
        r:   r  c                    g | ]\  }}|S r    r    r   r+  r   s      r"   r   z1Yellowbrick.similarity_search.<locals>.<listcomp>      ,,,Q,,,r!   r    r\   embed_queryr,  r,   r   r  r   r:   r)  s         r"   similarity_searchzYellowbrick.similarity_search  sZ     O//66	?D? 
1
 
(.
 
	 -,),,,,r!   c                Z    | j                             |          } | j        d||d|}|S )ar  Perform a similarity search with Yellowbrick

        Args:
            query (str): query string
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.

            NOTE: Please do not let end-user fill this and always be aware
                  of SQL injection.

        Returns:
            List[Document]: List of (Document, similarity)
        r/  r    r3  r5  s         r"   similarity_search_with_scorez(Yellowbrick.similarity_search_with_score  sL     O//66	?D? 
1
 
(.
 
	 r!   c                :     | j         d||d|}d |D             S )a  Perform a similarity search with Yellowbrick by vectors

        Args:
            embedding (List[float]): query embedding
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.

            NOTE: Please do not let end-user fill this and always be aware
                  of SQL injection.

        Returns:
            List[Document]: List of documents
        r/  c                    g | ]\  }}|S r    r    r1  s      r"   r   z;Yellowbrick.similarity_search_by_vector.<locals>.<listcomp>  r2  r!   r    )r,  )r,   r:   r  r   r)  s        r"   similarity_search_by_vectorz'Yellowbrick.similarity_search_by_vector  sF     @D? 
1
 
(.
 
	 -,),,,,r!   doc_idOptional[uuid.UUID]c                   ddl m} | j        r| j        fnd} |j        g || j        | j        z   R  } |j        g || j        | j        z   R  } |j        g || j        R  }|                    d                              |          }|rI|                    d                              |	                    t          |                              n|                    d          }	|                    d          }
|                    d	                              ||||	|

          }|                    |           dS )zAdd hashes to LSH indexr   r   r    zINSERT INTO {}zWHERE e.doc_id = {doc_id})r<  r   zGROUP BY 1, 2av  
            {query_prefix}
            SELECT
                e.doc_id as doc_id,
                h.id as hash_index,
                CASE WHEN SUM(e.embedding * h.hyperplane) > 0 THEN 1 ELSE 0 END as hash
            FROM {embedding_table} e
            INNER JOIN {hyperplanes} h ON e.embedding_id = h.hyperplane_id
            {condition}
            {group_by}
        )query_prefixembedding_tablehyperplanes	conditiongroup_byN)rF   r   rZ   r   r[   rT   rS   r   r   r   r/   r   )r,   r   r<  r   r   lsh_hyperplane_tablelsh_index_table_idembedding_table_idquery_prefix_idrB  rC  input_querys               r"   _update_lsh_hasheszYellowbrick._update_lsh_hashes  s    	!     +/<?R-s~  
 
 K$*CC 
  
  
 ,S^ 

 K$*>>
 
 
 ,S^H]HDKHHH''"233::;MNN CGG/0077s{{3v;;?W?W7XXX 	
 77?++gg

 
 &(.,  
 
 	& 	{#####r!   tmp_embedding_tabler'  c                   ddl m} | j        r| j        fnd} |j        g || j        | j        z   R  }|                    |          }|                    |          }|                    d                              |          }	|                    d          }
|                    d                              |	|||
          }|                    |           dS )	zGenerate temp LSHr   r   r    z+CREATE TEMPORARY TABLE {} ON COMMIT DROP ASz
GROUP BY 1a[  
            {query_prefix}
            SELECT
                h.id as hash_index,
                CASE WHEN SUM(e.embedding * h.hyperplane) > 0 THEN 1 ELSE 0 END as hash
            FROM {embedding_table} e
            INNER JOIN {hyperplanes} h ON e.embedding_id = h.hyperplane_id
            {group_by}
            DISTRIBUTE REPLICATE
        )r?  r@  rA  rC  N)	rF   r   rZ   r   r[   rT   r   r   r   )r,   r   rJ  r'  r   r   rD  tmp_embedding_table_idtmp_hash_table_idr?  rC  rH  s               r"   r   z$Yellowbrick._generate_tmp_lsh_hashes  s    	!     +/<?R-s~  
 
 K$*CC 
  
  
 "%0C!D!DNN>::wwLMMTT
 
 77<((gg	
 
 &%2,	  
 
 	" 	{#####r!   num_hyperplanesc                   ddl m} | j        r| j        fnd} |j        g || j        | j        z   R  }|                    |                    d                              |                     |	                                d         dk    rdS  |j        g || j        R  }|                    |                    d                              |                     |	                                d         }|dz  }|                    d	                              |
                    |          |
                    |          |
          }|                    |           dS )z4Generate random hyperplanes and store in Yellowbrickr   r   r    zSELECT COUNT(*) FROM {t})r   Nz!SELECT MAX(embedding_id) FROM {t}r   a2  
            WITH parameters AS (
                SELECT {num_hyperplanes} AS num_hyperplanes,
                    {dims_per_hyperplane} AS dims_per_hyperplane
            )
            INSERT INTO {hyperplanes_table} (id, hyperplane_id, hyperplane)
                SELECT id, hyperplane_id, (random() * 2 - 1) AS hyperplane
                FROM
                (SELECT range-1 id FROM sys.rowgenerator
                    WHERE range BETWEEN 1 AND
                    (SELECT num_hyperplanes FROM parameters) AND
                    worker_lid = 0 AND thread_id = 0) a,
                (SELECT range-1 hyperplane_id FROM sys.rowgenerator
                    WHERE range BETWEEN 1 AND
                    (SELECT dims_per_hyperplane FROM parameters) AND
                    worker_lid = 0 AND thread_id = 0) b
        )rN  dims_per_hyperplanehyperplanes_table)rF   r   rZ   r   r[   rT   r   r   r   r   r   )	r,   r   rN  r   r   rQ  r   num_dimensionsr&  s	            r"   _populate_hyperplanesz!Yellowbrick._populate_hyperplanes*  s         +/<?R*CN 

 K$*CC
 
 
 	sww9::AADUAVVWWW??Q!##FCN7M74;777swwBCCJJQJOOPPP**1-!ww
 
$ &KK88 #N ; ;/  
 
% 	. 	|$$$$$r!   c                   ddl m} | j        r| j        fnd} |j        g || j        | j        z   R  } |j        g || j        | j        z   R  }|                    | j        | j        z   dz             }|                    | j        | j        z   dz             }|                    |                    d          	                    ||||                     | j        r| j        fnd} |j        g || j        | j
        z   R  }|                    | j        | j
        z   dz             }	|                    |                    d	          	                    ||	
                     dS )z&Create LSH index and hyperplane tablesr   r   r    r   r   a  
                CREATE TABLE IF NOT EXISTS {t1} (
                doc_id UUID NOT NULL,
                hash_index SMALLINT NOT NULL,
                hash SMALLINT NOT NULL,
                CONSTRAINT {c1} PRIMARY KEY (doc_id, hash_index),
                CONSTRAINT {c2} FOREIGN KEY (doc_id) REFERENCES {t2}(doc_id))
                DISTRIBUTE ON (doc_id) SORT ON (doc_id)
            r   _pk_id_hp_ida2  
                CREATE TABLE IF NOT EXISTS {t} (
                id SMALLINT NOT NULL,
                hyperplane_id SMALLINT NOT NULL,
                hyperplane FLOAT NOT NULL,
                CONSTRAINT {c} PRIMARY KEY (id, hyperplane_id))
                DISTRIBUTE REPLICATE SORT ON (id)
            r   N)rF   r   rZ   r   r[   rS   rU   r   r   r   rT   )
r,   r   r   r   r   r   r   r   r   r   s
             r"   _create_lsh_index_tablesz$Yellowbrick._create_lsh_index_tablesT  s         +/<?RS^O]ODK$:N,NOOOS^M]MDK$:L,LMMM^^DK$*>>MNN^^DK$*>>MNNGG
 
 f	   	
 	
 	
& ,0<?RCNSMS4;9R+RSSSNN4;)BB^STTGG	 	 f   	
 	
 	
 	
 	
r!   c                    |                      | j        | j        | j        z   |           |                      | j        | j        | j        z   |           dS )zDrop LSH index tables)r7   r<   r   N)r9   rZ   r[   rS   rT   )r,   r   s     r"   r`   z"Yellowbrick._drop_lsh_index_tables  sm    		<t{T5I'IRX 	 	
 	
 	
 			<+ 99 	 	
 	
 	
 	
 	
r!   r   r%   c                t   |j         t          j        j        k    r| j                                        5 }|                     |           |                     |           |                     ||	                    dd                     | 
                    |           ddd           dS # 1 swxY w Y   dS dS )z"Create index from existing vectorsrN     N)r&   r   r#   r   r   r_   r`   rV  rS  r4   rI  r,   r   r   s      r"   create_indexzYellowbrick.create_index  s    "k&;&???++-- 0++F333--f555**L223DcJJ   ''///0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 @?s   A*B++B/2B/c                    |j         t          j        j        k    rI| j                                        5 }|                     |           ddd           dS # 1 swxY w Y   dS dS )zDrop an indexN)r&   r   r#   r   r   r_   r`   rZ  s      r"   
drop_indexzYellowbrick.drop_index  s    "k&;&???++-- 4++F3334 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 @?s   AAAc                    |j         t          j        j        k    rJ| j                                        5 }|                     ||           ddd           dS # 1 swxY w Y   dS dS )zHUpdate an index with a new or modified embedding in the embeddings tableN)r&   r   r#   r   r   r_   rI  )r,   r   r<  r   s       r"   r   zYellowbrick._update_index  s     "k&;&???++-- 8''7778 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 @?s   AAAc                   ddl m} 	 | j                                        5 }| j        r| j        fnd} |j        g || j        R  } |j        g || j        dz   R  } |j        g || j        | j        z   R  }|                    d          	                    ||          }|
                    |           |                     |           |                    d          	                    ||          }|
                    |           |                    d          	                    ||          }	|
                    |	           d d d            d S # 1 swxY w Y   d S # t          $ r}
t          d	|
           |
d }
~
ww xY w)
Nr   r   r    _v1zALTER TABLE {t1} RENAME TO {t2})r   r   z
                    INSERT INTO {t1} (doc_id, embedding_id, embedding) 
                    SELECT id, embedding_id, embedding FROM {t2}
                z
                    INSERT INTO {t1} (doc_id, text, metadata) 
                    SELECT DISTINCT id, text, metadata FROM {t2}
                zFailed to migrate schema: )rF   r   r   r_   rZ   r   r[   rU   r   r   r   rb   r   r   )r,   r   r   r   r   old_embeddingscontentalter_table_queryr&  insert_content_queryr   s              r"   migrate_schema_v1_to_v2z#Yellowbrick.migrate_schema_v1_to_v2  s>         #	H++--  537< GR+S^H]HDKHHH
!/!T!Te@S!T!T!T(#. "$(K$2D$D   %(GG,M$N$N$U$U!% %V % %! 0111""6***"ww   
 &!%     |,,,'*ww( (
 &G&77 % 3444A 5  5  5  5  5  5  5  5  5  5  5  5  5  5  5  5  5  5B  	H 	H 	H?A??@@aG	Hs;   E& D+EE& EE&  E!E& &
F0FF)r:   r   r;   r/   r<   r/   r7   r=   r8   r>   r9   r?   r1   r@   )r   r   r1   r@   r5   )r<   r/   r7   r=   r   r   r1   r@   r+   )r   r   r<   r/   r7   r=   r1   r@   )r1   r?   )r   r   r   r   r   r   r1   r   )r   r   r   r   r   r   r1   r@   )Nr   r   r   F)rp   r   r   r   r:   r   r   r   r;   r/   r<   r/   r7   r/   r9   r?   r   r   r1   r   )r   r   r   r   r   r   r1   r@   )r   )r   r   r   r/   r7   r/   r1   r?   )r   r   r1   r   )r  )r:   r   r  r  r   r   r1   r  )r   r/   r  r  r   r   r1   r-  )r   r/   r  r  r   r   r1   r  )r:   r   r  r  r   r   r1   r-  )r   r   r<  r=  r1   r@   )r   r   rJ  r/   r'  r/   r1   r@   )r   r   rN  r  r1   r@   )r   r%   r1   r@   )r   r%   r<  r   r1   r@   r   )$r   r   r   r   r/   enumEnumr#   r6   r-   rV   ra   rb   r9   r   r^   r   r   classmethodr   r   r   r
  r,  r6  r8  r;  rI  r   rS  rV  r`   r[  r]  r   re  r    r!   r"   r   r   !   sM            C   1 1 1 1 1 1 1 1, !%+/<' <' <' <' <' <'|4# 4# 4# 4# 4# 4# 4# 4#l   "1
 1
 1
 1
l !%'+	; ; ; ; ;( !%	) ) ) ) ).   . +/5 5 5 5 5nA A A A8 
 +/!#     [@ $(%); ; ; ; ;| BJ( ( ( ( (0    01V V V V Vr $%- - - - -, $%    , 01- - - - -. '++$ +$ +$ +$ +$Z"$ "$ "$ "$H(% (% (% (%T-
 -
 -
 -
^	
 	
 	
 	
	0 	0 	0 	04 4 4 48 8 8 8&H &H &H &H &H &Hr!   r   )"
__future__r   rW   r   rf  r   rH   r   
contextlibr   ior   typingr   r   r   r	   r
   r   r   r   r   langchain_core.embeddingsr   langchain_core.vectorstoresr   %langchain_community.docstore.documentr   psycopg2.extensionsr   PgConnectionr   PgCursorr   r    r!   r"   <module>rs     s   " " " " " "  



     % % % % % %      
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 1 0 0 0 0 0 3 3 3 3 3 3 : : : : : : 7>>>>>>666666lH lH lH lH lH+ lH lH lH lH lHr!   