
    NgY                       d dl mZ d dlZd dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZmZmZmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ  ej                    ZddZ G d de          Z G d de          Z G d de          ZdS )    )annotationsN)sha1)Thread)AnyDictIterableListOptionalTuple)Document)
Embeddings)VectorStore)BaseSettingsSettingsConfigDictsstrargsr   returnboolc                    |D ]	}|| vr dS 
dS )z
    Check if a string contains multiple substrings.
    Args:
        s: string to check.
        *args: substrings to check.

    Returns:
        True if all substrings are in the string, False otherwise.
    FT )r   r   as      d/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/vectorstores/myscale.pyhas_mul_sub_strr      s-       A::55 4    c                      e Zd ZU dZdZded<   dZded<   dZd	ed
<   dZd	ed<   dZ	ded<   dZ
ded<   dddddZded<   dZded<   dZded<   dZded<   d&d Z ed!d"d#d$%          ZdS )'MyScaleSettingsa  MyScale client configuration.

    Attribute:
        myscale_host (str) : An URL to connect to MyScale backend.
                             Defaults to 'localhost'.
        myscale_port (int) : URL port to connect with HTTP. Defaults to 8443.
        username (str) : Username to login. Defaults to None.
        password (str) : Password to login. Defaults to None.
        index_type (str): index type string.
        index_param (dict): index build parameter.
        database (str) : Database name to find the table. Defaults to 'default'.
        table (str) : Table name to operate on.
                      Defaults to 'vector_table'.
        metric (str) : Metric to compute distance,
                       supported are ('L2', 'Cosine', 'IP'). Defaults to 'Cosine'.
        column_map (Dict) : Column type map to project column name onto langchain
                            semantics. Must have keys: `text`, `id`, `vector`,
                            must be same size to number of columns. For example:
                            .. code-block:: python

                                {
                                    'id': 'text_id',
                                    'vector': 'text_embedding',
                                    'text': 'text_plain',
                                    'metadata': 'metadata_dictionary_in_json',
                                }

                            Defaults to identity map.

    	localhostr   hosti   intportNOptional[str]usernamepasswordMSTG
index_typezOptional[Dict[str, str]]index_paramidtextvectormetadata)r(   r)   r*   r+   zDict[str, str]
column_mapdefaultdatabase	langchaintableCosinemetricitemr   r   c                "    t          | |          S N)getattr)selfr3   s     r   __getitem__zMyScaleSettings.__getitem__U   s    tT"""r   z.envutf-8myscale_ignore)env_fileenv_file_encoding
env_prefixextra)r3   r   r   r   )__name__
__module____qualname____doc__r   __annotations__r!   r#   r$   r&   r'   r,   r.   r0   r2   r8   r   model_configr   r   r   r   r   !   s         > DD"H"""""H""""J,0K0000 	" "J     HEF# # # # &%!	  LLLr   r   c                       e Zd ZdZ	 d7d8 fdZed9d            Zd:dZd;dZd<dZ		 	 	 d=d>dZ
e	 	 	 	 d?d@d"            ZdAd#Z	 d7dBd)Z	 dCdDd.Z	 	 dCdEd/Z	 dCdFd1ZdGd2Z	 	 dHdId5ZedAd6            Z xZS )JMyScalea  `MyScale` vector store.

    You need a `clickhouse-connect` python package, and a valid account
    to connect to MyScale.

    MyScale can not only search with simple vector indexes.
    It also supports a complex query with multiple conditions,
    constraints and even sub-queries.

    For more information, please visit
        [myscale official site](https://docs.myscale.com/en/overview/)
    N	embeddingr   configOptional[MyScaleSettings]kwargsr   r   Nonec                   	 ddl m} n# t          $ r t          d          w xY w	 ddlm} || _        n# t          $ r d | _        Y nw xY wt                                                       ||| _        nt                      | _        | j        sJ | j        j	        r| j        j
        sJ | j        j        r$| j        j        r| j        j        r| j        j        sJ dD ]}|| j        j        v sJ | j        j                                        dv sJ | j        j        d	v rt                               d
           t%          |                    d                    }| j        j        r>dd                    d | j        j                                        D                       z   nd}d| j        j         d| j        j         d| j        j        d          d| j        j        d          d| j        j        d          d| j        j        d          d| j        j        d          d| d| j        j        d          d| j        j         d| j        j         d| d| j        j        d          d }	|| _        d!| _        d"| _        || _        | j        j                                        d#v rd$nd%| _         |d+| j        j	        | j        j
        | j        j        | j        j        d&|| _        	 | j                             d'           n?# tB          $ r2}
t           "                    d(| j        j#         d)           Y d}
~
nd}
~
ww xY w| j                             d*           | j                             |	           dS ),zMyScale Wrapper to LangChain

        embedding (Embeddings):
        config (MyScaleSettings): Configuration to MyScale Client
        Other keyword arguments will pass into
            [clickhouse-connect](https://docs.myscale.com/)
        r   )
get_clientzlCould not import clickhouse connect python package. Please install it with `pip install clickhouse-connect`.)tqdmc                    | S r5   r   )xs    r   <lambda>z"MyScale.__init__.<locals>.<lambda>   s    1 r   N)r(   r*   r)   r+   )IPCOSINEL2)ipcosinel2z_Lower case metric types will be deprecated the future. Please use one of ('IP', 'Cosine', 'L2')ztry this out, ,c                &    g | ]\  }}d | d| d S )'=r   ).0kvs      r   
<listcomp>z$MyScale.__init__.<locals>.<listcomp>   s*    VVVda\\\Q\\\VVVr    z(
            CREATE TABLE IF NOT EXISTS .z(
                r(   z String,
                r)   r*   z! Array(Float32),
                r+   zP JSON,
                CONSTRAINT cons_vec_len CHECK length(                    z) = z$,
                VECTOR INDEX vidx z                     TYPE z&(                        'metric_type=r\   z,)
            ) ENGINE = MergeTree ORDER BY z	
        \)rd   r\   )rT   rU   ASCDESC)r   r!   r#   r$   z"SET allow_experimental_json_type=1zClickhouse version=z6 - There is no allow_experimental_json_type parameter.z$SET allow_experimental_object_type=1r   )$clickhouse_connectrN   ImportErrorrO   pgbarsuper__init__rI   r   r   r!   r,   r.   r0   r2   upperloggerwarninglenembed_queryr'   joinitemsr&   dimBSmust_escape_embeddings
dist_orderr#   r$   clientcommand	Exceptiondebugserver_version)r7   rH   rI   rK   rN   rO   r_   rs   index_paramsschema__	__class__s              r   rk   zMyScale.__init__n   s   	5555555 	 	 	K  	
	%!!!!!!DJJ 	% 	% 	%$DJJJ	% 	 DKK)++DK{{4DK$4444K"	
$	
 !	
 "		
 	
 	
 6 	/ 	/A......{!''))-CCCCC;!777NNG   )''7788 {&D388VVdk6M6S6S6U6UVVVWWWW 	
(,(< ?C{?P '-  '/  '1	 
 '
3  [+H5  <?  $(;#9(#C  +0  '+k&8  <H  ,0;+A$+G   &$['--//3CCCEE 	
 !j 
!![)[)	
 

 
 
	K DEEEE 	 	 	LLFdk&@ F F F       	
 	BCCCG$$$$$s/   
 $6 A
A/L
 

M(MMc                    | j         S r5   )rv   r7   s    r   
embeddingszMyScale.embeddings   s    r   valuer   c                F     d                      fd|D                       S )Nrb   c              3  D   K   | ]}|j         v rj         | n|V  d S r5   )ru   rt   )r^   cr7   s     r   	<genexpr>z%MyScale.escape_str.<locals>.<genexpr>   s>      VV1!t/?*?*?$'1QVVVVVVr   )rq   )r7   r   s   ` r   
escape_strzMyScale.escape_str   s+    wwVVVVPUVVVVVVr   transacr   column_namesIterable[str]c           
          d                     |          }g }|D ]<}d                      fd|D                       }|                    d| d           =d j        j         d j        j         d| dd                     |           d	}|S )	NrZ   c                \    g | ](}d                      t          |                     d )S r\   )r   r   )r^   _nr7   s     r   ra   z'MyScale._build_istr.<locals>.<listcomp>   s7    FFFb9dooc"gg66999FFFr   ()z8
                INSERT INTO TABLE 
                    rc   z))
                VALUES
                z
                )rq   appendrI   r.   r0   )r7   r   r   ks_datani_strs   `      r   _build_istrzMyScale._build_istr   s    XXl## 	# 	#AFFFFAFFFGGALLQ""""[) ,0K,= @B  %	   r   c                f    |                      ||          }| j                            |           d S r5   )r   rx   ry   )r7   r   r   _i_strs       r   _insertzMyScale._insert   s3    !!'<88F#####r       texts	metadatasOptional[List[dict]]
batch_sizer    idsOptional[Iterable[str]]	List[str]c           	        |pd |D             }| j         j        }g }|d         ||d         ||d         t          | j        j        |          i}|pd |D             }t          t
          j        |          ||d         <   t          t          |          t          |          z
            dk    sJ t          |
                                 \  }	}
	 d}|                     t          |
 d	t          |          
          D ]}t          ||	                    | j         j        d                                      | j        k    sJ |                    |           t          |          |k    rD|r|                                 t!          | j        ||	g          }|                                 g }t          |          dk    r,|r|                                 |                     ||	           d |D             S # t&          $ rG}t(                              dt-          |           dt/          |           d           g cY d}~S d}~ww xY w)a  Run more texts through the embeddings and add to the vectorstore.

        Args:
            texts: Iterable of strings to add to the vectorstore.
            ids: Optional list of ids to associate with the texts.
            batch_size: Batch size of insertion
            metadata: Optional column data to be inserted

        Returns:
            List of ids from adding the texts into the vectorstore.

        c                v    g | ]6}t          |                    d                                                     7S )r9   )r   encode	hexdigest)r^   ts     r   ra   z%MyScale.add_texts.<locals>.<listcomp>   s8    IIIad188G,,--7799IIIr   r(   r)   r*   c                    g | ]}i S r   r   )r^   r   s     r   ra   z%MyScale.add_texts.<locals>.<listcomp>  s    !4!4!4"!4!4!4r   r+   r   NzInserting data...)desctotal)targetr   c                    g | ]}|S r   r   )r^   is     r   ra   z%MyScale.add_texts.<locals>.<listcomp>  s    ###!A###r   	[91m[1m
[0m [95m[0m)rI   r,   maprv   rp   jsondumpsro   setziprr   ri   indexrs   r   rq   r   r   startrz   rm   errortyper   )r7   r   r   r   r   rK   colmap_r   r   keysvaluesr   r`   es                 r   	add_textszMyScale.add_texts   sb   * III5III+(DM3FOUHs4#3#?GG

 4!4!4e!4!4!4	,/
I,F,FWZ()3w<<#l"3"33449999L..001f	AZZV#6c)nn     
! 
! 1TZZ(>x(HIIJKKtxWWWWq!!!w<<:-- !dl'4IIIAGGIII G7||a FFHHHWd+++##s#### 	 	 	LLS477SSCFFSSSTTTIIIIII	s   D*G< <
I<IIIOptional[List[Dict[Any, Any]]]text_idsc                L     | ||fi |}|                     ||||           |S )aZ  Create Myscale wrapper with existing texts

        Args:
            texts (Iterable[str]): List or tuple of strings to be added
            embedding (Embeddings): Function to extract text embedding
            config (MyScaleSettings, Optional): Myscale configuration
            text_ids (Optional[Iterable], optional): IDs for the texts.
                                                     Defaults to None.
            batch_size (int, optional): Batchsize when transmitting data to MyScale.
                                        Defaults to 32.
            metadata (List[dict], optional): metadata to texts. Defaults to None.
            Other keyword arguments will pass into
                [clickhouse-connect](https://clickhouse.com/docs/en/integrations/python#clickhouse-connect-driver-api)
        Returns:
            MyScale Index
        )r   r   r   )r   )	clsr   rH   r   rI   r   r   rK   ctxs	            r   
from_textszMyScale.from_texts  s<    6 c)V..v..ejIVVV
r   c                ~   d| j         j         d| j         j         d}|| j         j         d| j         j         dz  }|d| j         j         dz  }|dz  }| j                            d	| j         j         d| j         j                                                   D ]}|d
|d         dd|d         ddz  }|dz  }|S )zText representation for myscale, prints backends, username and schemas.
            Easy to use with `str(Myscale())`

        Returns:
            repr: string to show connection info and data schema
        z	[92m[1mrc   z @ :z[0m

z[1musername: z[0m

Table Schema:
z4---------------------------------------------------
zDESC z|[94mname24sz
[0m|[96mr   z[0m|
)	rI   r.   r0   r   r!   r#   rx   querynamed_results)r7   _reprrs      r   __repr__zMyScale.__repr__>  s     P$+"6OO9JOOODK$DDt{'7DDDDUT[%9UUUU "">DK(>>4;+<>>
 

-//	 	A TAfITTT1V9TTTTEE 	 r   q_embList[float]topk	where_strr"   c                8   d                     t          t          |                    }|rd| }nd}d| j        j        d          d| j        j        d          d| j        j         d	| j        j         d
| d| j        j        d          d| d| j         d| d
}|S )NrZ   	PREWHERE rb   
            SELECT r)   z, 
                r+   z, dist
            FROM rc   
            
            ORDER BY distance(r*   , []) 
                AS dist 
            LIMIT )rq   r   r   rI   r,   r.   r0   rw   r7   r   r   r   	q_emb_strq_strs         r   _build_qstrzMyScale._build_qstrR  s     HHSe__--	 	/I//IIIK*62 '
3  +&  *.):  	 
  ${5h? 
 EN       r      r   r_   List[Document]c                R     | j         | j                            |          ||fi |S )a  Perform a similarity search with MyScale

        Args:
            query (str): query string
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.
            where_str (Optional[str], optional): where condition string.
                                                 Defaults to None.

            NOTE: Please do not let end-user to fill this and always be aware
                  of SQL injection. When dealing with metadatas, remember to
                  use `{self.metadata_column}.attribute` instead of `attribute`
                  alone. The default name for it is `metadata`.

        Returns:
            List[Document]: List of Documents
        )similarity_search_by_vectorrv   rp   )r7   r   r_   r   rK   s        r   similarity_searchzMyScale.similarity_searchf  s@    & 0t/((//I
 
AG
 
 	
r   c           	     L                          |||          }	  fd j                            |                                          D             S # t          $ rG}t
                              dt          |           dt          |           d           g cY d}~S d}~ww xY w)  Perform a similarity search with MyScale by vectors

        Args:
            query (str): query string
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.
            where_str (Optional[str], optional): where condition string.
                                                 Defaults to None.

            NOTE: Please do not let end-user to fill this and always be aware
                  of SQL injection. When dealing with metadatas, remember to
                  use `{self.metadata_column}.attribute` instead of `attribute`
                  alone. The default name for it is `metadata`.

        Returns:
            List[Document]: List of (Document, similarity)
        c                    g | ]?}t          |j        j        d                   |j        j        d                            @S )r)   r+   page_contentr+   r   rI   r,   r^   r   r7   s     r   ra   z7MyScale.similarity_search_by_vector.<locals>.<listcomp>  s^       
 	 !"4;#9&#A!Bt{5jAB    r   r   r   r   N	r   rx   r   r   rz   rm   r   r   r   r7   rH   r_   r   rK   r   r   s   `      r   r   z#MyScale.similarity_search_by_vector}      .   Ay99
	   
 **511??AA     	 	 	LLS477SSCFFSSSTTTIIIIII	   7A 
B#<BB#B#List[Tuple[Document, float]]c           	     |                           j                            |          ||          }	  fd j                            |                                          D             S # t          $ rG}t                              dt          |           dt          |           d           g cY d}~S d}~ww xY w)/  Perform a similarity search with MyScale

        Args:
            query (str): query string
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.
            where_str (Optional[str], optional): where condition string.
                                                 Defaults to None.

            NOTE: Please do not let end-user to fill this and always be aware
                  of SQL injection. When dealing with metadatas, remember to
                  use `{self.metadata_column}.attribute` instead of `attribute`
                  alone. The default name for it is `metadata`.

        Returns:
            List[Document]: List of documents most similar to the query text
            and cosine distance in float for each.
            Lower score represents more similarity.
        c                    g | ]G}t          |j        j        d                   |j        j        d                            |d         fHS )r)   r+   r   distr   r   s     r   ra   zCMyScale.similarity_search_with_relevance_scores.<locals>.<listcomp>  sl     	 	 	  %&t{'=f'E%F!"4;#9*#E!F   fI	 	 	r   r   r   r   Nr   rv   rp   rx   r   r   rz   rm   r   r   r   r7   r   r_   r   rK   r   r   s   `      r   'similarity_search_with_relevance_scoresz/MyScale.similarity_search_with_relevance_scores      *   !1!=!=e!D!DaSS		 	 	 	 **511??AA	 	 	 	  	 	 	LLS477SSCFFSSSTTTIIIIII	   7A* *
B;4<B60B;6B;c                n    | j                             d| j        j         d| j        j                    dS )z,
        Helper function: Drop data
        zDROP TABLE IF EXISTS rc   N)rx   ry   rI   r.   r0   r   s    r   dropzMyScale.drop  sE     	NDK$8NN4;;LNN	
 	
 	
 	
 	
r   Optional[List[str]]Optional[bool]c                X   ||
J d            g }|r]t          |          dk    rJd                    d |D                       }|                    | j        j        d          d| d           |r|                    |           t          |          dk    sJ d	                    |          }d
| j        j         d| j        j         d| }	 | j                            |           dS # t          $ r2}t                              t          |                     Y d}~dS d}~ww xY w)a3  Delete by vector ID or other criteria.

        Args:
            ids: List of ids to delete.
            **kwargs: Other keyword arguments that subclasses might use.

        Returns:
            Optional[bool]: True if deletion is successful,
            False otherwise, None if not implemented.
        NzIYou need to specify where to be deleted! Either with `ids` or `where_str`r   rY   c                    g | ]}d | d 	S r   r   )r^   r(   s     r   ra   z"MyScale.delete.<locals>.<listcomp>  s      9 9 9rR 9 9 9r   r(   z IN (r   z AND zDELETE FROM rc   z WHERE TF)ro   rq   r   rI   r,   r.   r0   rx   ry   rz   rm   r   r   )	r7   r   r   rK   condsid_listwhere_str_finalqstrr   s	            r   deletezMyScale.delete  sc   " KI--V .-- 	K3s88a<<ii 9 9S 9 9 9::GLLDK248IIwIIIJJJ 	$LL###5zzA~~~~!,,u--'4;/ ' '$+2C ' '$' ' 		K%%%4 	 	 	LLQ   55555	s   C- -
D)7'D$$D)c                &    | j         j        d         S )Nr+   )rI   r,   r   s    r   metadata_columnzMyScale.metadata_column  s    {%j11r   r5   )rH   r   rI   rJ   rK   r   r   rL   )r   r   )r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   rL   )Nr   N)r   r   r   r   r   r    r   r   rK   r   r   r   )NNNr   )r   r   rH   r   r   r   rI   rJ   r   r   r   r    rK   r   r   rG   r   r   r   r   r   r    r   r"   r   r   r   N)
r   r   r_   r    r   r"   rK   r   r   r   
rH   r   r_   r    r   r"   rK   r   r   r   
r   r   r_   r    r   r"   rK   r   r   r   )r   rL   )NN)r   r   r   r"   rK   r   r   r   )r@   rA   rB   rC   rk   propertyr   r   r   r   r   classmethodr   r   r   r   r   r   r   r  r  __classcell__r   s   @r   rG   rG   `   s          -1^% ^% ^% ^% ^% ^% ^%@       X W W W W   $ $ $ $ +/'+6 6 6 6 6p 
 59,0,0    [<   * IM    * BF
 
 
 
 
4 #'	" " " " "J BF# # # # #J
 
 
 
 $(#'$ $ $ $ $L 2 2 2 X2 2 2 2 2r   rG   c                  h     e Zd ZdZdg fd fdZ	 dd dZ	 	 d!d"dZ	 d!d#dZed$d            Z	 xZ
S )%MyScaleWithoutJSONzsMyScale vector store without metadata column

    This is super handy if you are working to a SQL-native table
    NrH   r   rI   rJ   must_have_colsr   rK   r   r   rL   c                L     t                      j        ||fi | || _        dS )ag  Building a myscale vector store without metadata column

        embedding (Embeddings): embedding model
        config (MyScaleSettings): Configuration to MyScale Client
        must_have_cols (List[str]): column names to be included in query
        Other keyword arguments will pass into
            [clickhouse-connect](https://docs.myscale.com/)
        N)rj   rk   r  )r7   rH   rI   r  rK   r   s        r   rk   zMyScaleWithoutJSON.__init__  s4     	F55f555)7r   r   r   r   r    r   r"   r   c                H   d                     t          t          |                    }|rd| }nd}d| j        j        d          dd                     | j                   d| j        j         d| j        j         d	| d
| j        j        d          d| d| j         d| d	}|S )NrZ   r   rb   r   r)   z, dist, 
                z
            FROM rc   r   r   r*   r   r   r   )	rq   r   r   rI   r,   r  r.   r0   rw   r   s         r   r   zMyScaleWithoutJSON._build_qstr  s     HHSe__--	 	/I//IIIK*62 $-..  +&  *.):  	 
  ${5h? 
 EN       r   r   r_   r   c           	     L                          |||          }	  fd j                            |                                          D             S # t          $ rG}t
                              dt          |           dt          |           d           g cY d}~S d}~ww xY w)r   c                    g | ]:t          j        j        d                   fdj        D                       ;S )r)   c                "    i | ]}||         S r   r   r^   r_   r   s     r   
<dictcomp>zMMyScaleWithoutJSON.similarity_search_by_vector.<locals>.<listcomp>.<dictcomp>A  s    CCC!a1CCCr   r   r   rI   r,   r  r   s    @r   ra   zBMyScaleWithoutJSON.similarity_search_by_vector.<locals>.<listcomp>>  sg       
 	 !"4;#9&#A!BCCCCt/BCCC    r   r   r   r   Nr   r   s   `      r   r   z.MyScaleWithoutJSON.similarity_search_by_vector%  r   r   r   r   c           	     |                           j                            |          ||          }	  fd j                            |                                          D             S # t          $ rG}t                              dt          |           dt          |           d           g cY d}~S d}~ww xY w)r   c                    g | ]Bt          j        j        d                   fdj        D                       d         fCS )r)   c                "    i | ]}||         S r   r   r  s     r   r  zYMyScaleWithoutJSON.similarity_search_with_relevance_scores.<locals>.<listcomp>.<dictcomp>d  s    !G!G!Ga!QqT!G!G!Gr   r   r   r  r   s    @r   ra   zNMyScaleWithoutJSON.similarity_search_with_relevance_scores.<locals>.<listcomp>`  su     	 	 	  %&t{'=f'E%F!G!G!G!G43F!G!G!G   fI	 	 	r   r   r   r   Nr   r   s   `      r   r   z:MyScaleWithoutJSON.similarity_search_with_relevance_scoresI  r   r   c                    dS )Nrb   r   r   s    r   r  z"MyScaleWithoutJSON.metadata_columnn  s    rr   )
rH   r   rI   rJ   r  r   rK   r   r   rL   r5   r  r  r	  r
  r  )r@   rA   rB   rC   rk   r   r   r   r  r  r  r  s   @r   r  r    s          -1$&	8 8 8 8 8 8 8& IM    . #'	" " " " "J BF# # # # #J    X    r   r  )r   r   r   r   r   r   )
__future__r   r   logginghashlibr   	threadingr   typingr   r   r   r	   r
   r   langchain_core.documentsr   langchain_core.embeddingsr   langchain_core.vectorstoresr   pydantic_settingsr   r   	getLoggerrm   r   r   rG   r  r   r   r   <module>r(     s   " " " " " "               = = = = = = = = = = = = = = = = - - - - - - 0 0 0 0 0 0 3 3 3 3 3 3 > > > > > > > >				    < < < < <l < < <~V2 V2 V2 V2 V2k V2 V2 V2rw w w w w w w w w wr   