
    Ng9                        d dl Z d dlZd dlZd dlZd dlmZmZ d dlmZm	Z	m
Z
mZmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ  e j        e          Zd
ZdZdZ eddd           G d dee                      Z dS )    N)ThreadPoolExecutorwait)AnyDictListLiteralOptionalTuple)
deprecated)
Embeddings)create_base_retry_decorator)pre_init)_VertexAICommon)raise_vertex_import_errori N        z0.0.12z1.0z,langchain_google_vertexai.VertexAIEmbeddings)sinceremovalalternative_importc                       e Zd ZU dZi Zeeef         ed<   dZ	e
ed<   	 ededefd            Z	 	 	 	 	 	 d#dedee         dedededee         def fdZededee         fd            Zedee         dedeee                  fd            Z	 d$dee         dee         deee                  fdZ	 d$dee         dee         deeee                  eee                  f         fdZ	 	 d%dee         dedeed                  deee                  fd Z	 d&dee         dedeee                  fd!Zdedee         fd"Z xZS )'VertexAIEmbeddingsz'Google Cloud VertexAI embedding models.instanceFshow_progress_barvaluesreturnc                    |                      |           |d         dk    rt                              d           d|d<   	 ddlm} n# t
          $ r t                       Y nw xY w|                    |d                   |d<   |S )z8Validates that the python package exists in environment.
model_nametextembedding-gecko-defaultzModel_name will become a required arg for VertexAIEmbeddings starting from Feb-01-2024. Currently the default is set to textembedding-gecko@001ztextembedding-gecko@001r   )TextEmbeddingModelclient)_try_init_vertexailoggerwarningvertexai.language_modelsr   ImportErrorr   from_pretrained)clsr   r   s      c/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/embeddings/vertexai.pyvalidate_environmentz'VertexAIEmbeddings.validate_environment$   s     	v&&&,#@@@NN*  
 $=F< 	(CCCCCCC 	( 	( 	(%'''''	(-==f\>RSSxs   A	 	A$#A$r   Nus-central1r      r   projectlocationrequest_parallelismmax_retriescredentialskwargsc           
          t                      j        d||||||d| |                    dt                    | j        d<   | j        d         | j        d<   |                    dt
                    | j        d<   | j        d         | j        d<   t          j                    | j        d<   d| j        d<   t          |	          | j        d
<   | j	        j
                            d           | j        d<   dS )z$Initialize the sentence_transformer.)r,   r-   r0   r.   r/   r   max_batch_size
batch_sizemin_batch_sizemin_good_batch_sizelockFbatch_size_validated)max_workerstask_executorz/textembedding-gecko@001embeddings_task_type_supportedN )super__init__get_MAX_BATCH_SIZEr   _MIN_BATCH_SIZE	threadingLockr   r    _endpoint_nameendswith)	selfr   r,   r-   r.   r/   r0   r1   	__class__s	           r(   r>   zVertexAIEmbeddings.__init__6   s    	 	
# 3#!	
 	
 	
 	
 	
 +1**5E*W*W&'&*m4D&El#*0**5E*W*W&'/3}=M/N+, ) 0 0f05,-);+*
 *
 *
o&
 *334NOOO 	,	
 	
 	
    textc                 j    t           j        dz   }d| d}d t          j        ||           D             S )z9Splits a string by punctuation and whitespace characters.z	
 z([z])c                     g | ]}||S r<   r<   ).0segments     r(   
<listcomp>z<VertexAIEmbeddings._split_by_punctuation.<locals>.<listcomp>^   s    JJJG'JJJJrH   )stringpunctuationresplit)rI   split_bypatterns      r(   _split_by_punctuationz(VertexAIEmbeddings._split_by_punctuationX   sA     %/#x###JJrx'>'>JJJJrH   textsr4   c                    d}t          |           }d}g }g }|dk    rg S ||k     r| |         }t          t                              |                    dz  }d}	|t          k    r3t          |          dk    r|                    |           |g}|dz  }d}	nN||z   t          k    st          |          |k    rd}	n*||dz
  k    rd}	||z  }|                    |           |dz  }|	r|                    |           g }d}||k     |S )zlSplits texts in batches based on current maximum batch size
        and maximum tokens per request.
        r      F   T)lenr   rU   _MAX_TOKENS_PER_BATCHappend)
rV   r4   
text_index	texts_lenbatch_token_lenbatchescurrent_batchcurrent_textcurrent_text_token_cntend_of_batchs
             r(   _prepare_batchesz#VertexAIEmbeddings._prepare_batches`   sa   
 
JJ	#%#%>>I9$$ ,L &<<\JJKKaO # !L%(=== }%%))NN=111!-a
#"88;PPP}%%33#Q.. $(L#99$$\222a
 $}--- ""#K 9$$L rH   embeddings_typec                      ddl m}m}m}m} ||||g}t          | j                  }|dt          t                   dt          f fd            }	 |	|          S )z1Makes a Vertex AI model request with retry logic.r   )AbortedDeadlineExceededResourceExhaustedServiceUnavailable)error_typesr/   texts_to_processr   c                     r#j         d         rddlm fd| D             }n| }j                            |          }d |D             S )Nr;   r   )TextEmbeddingInputc                 *    g | ]} |           S ))rI   	task_typer<   )rL   tro   rf   s     r(   rN   zaVertexAIEmbeddings._get_embeddings_with_retry.<locals>._completion_with_retry.<locals>.<listcomp>   s9        '&AIII  rH   c                     g | ]	}|j         
S r<   )r   )rL   embss     r(   rN   zaVertexAIEmbeddings._get_embeddings_with_retry.<locals>._completion_with_retry.<locals>.<listcomp>   s    777DDK777rH   )r   r$   ro   r    get_embeddings)rm   requests
embeddingsro   rf   rF   s      @r(   _completion_with_retryzMVertexAIEmbeddings._get_embeddings_with_retry.<locals>._completion_with_retry   s     ,4=1Q#R ,GGGGGG    -  
 ,33H==J77J7777rH   )
google.api_core.exceptionsrh   ri   rj   rk   r   r/   r   strr   )
rF   rV   rf   rh   ri   rj   rk   errorsretry_decoratorrx   s
   ` `       r(   _get_embeddings_with_retryz-VertexAIEmbeddings._get_embeddings_with_retry   s    	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 6(
 
 

 
	8T#Y 	83 	8 	8 	8 	8 	8 	8 
	8 &%e,,,rH   c           	      >   ddl m} t                              || j        d                   }t          |d                   | j        d         k    rg |fS | j        d         5  | j        d         rht          |d                   | j        d         k    rg |fcddd           S g t                              || j        d                   fcddd           S |d         }g }d}	 	 |                     ||          }nd# |$ r[ d	}t          |          }|| j        d
         k    r t          | j        d
         t          |dz                      }|d|         }Y nw xY w|t          |          }t          | j        d         |          | j        d<   |s|| j        d         k    rT|| j        d<   d	| j        d<   || j        d         k    r.t                              ||d         | j        d                   }n
|dd         }ddd           n# 1 swxY w Y   ||fS )a  Prepares text batches with one-time validation of batch size.
        Batch size varies between GCP regions and individual project quotas.
        # Returns embeddings of the first text batch that went through,
        # and text batches for the rest of the texts.
        r   )InvalidArgumentr4   r6   r7   r8   NFTr5   rX   r3   rY   )	ry   r   r   re   r   rZ   r}   maxint)	rF   rV   rf   r   r`   first_batchfirst_resulthad_failurefirst_batch_lens	            r(   _prepare_and_validate_batchesz0VertexAIEmbeddings._prepare_and_validate_batches   s    	?>>>>>$554=.
 

 wqz??dm,ABBBw;]6" 1	& 1	& }34 wqz??dmL&AAAw;1	& 1	& 1	& 1	& 1	& 1	& 1	& 1	& 1BBt}\:    1	& 1	& 1	& 1	& 1	& 1	& 1	& 1	& "!*KLK@@#'#B#B#_$ $L & @ @ @"&K&)+&6&6O&$-8H*III&)&67_q=P9Q9Q' 'O #..>.>"?KKK@@ "+..O3634o4 4DM/0
  &o?O1PPP.=l+8<45 #dm4D&EEE0AAo../|1L G
 "!""+c1	& 1	& 1	& 1	& 1	& 1	& 1	& 1	& 1	& 1	& 1	& 1	& 1	& 1	& 1	&h W$$sD   "5H$'HH&C=<H=AEHEB'HHHr   embeddings_task_type)RETRIEVAL_QUERYRETRIEVAL_DOCUMENTSEMANTIC_SIMILARITYCLASSIFICATION
CLUSTERINGc                    t          |          dk    rg S g }g }|dk    rt                              ||          }n|                     ||          \  }}|                    |           g }| j        rA	 ddlm}  ||d          }	n.# t          $ r t          	                    d           |}	Y nw xY w|}	|	D ]=}
|
                    | j        d                             | j        |
|                     >t          |          dk    rt          |           |D ])}|                    |                                           *|S )a/  Embed a list of strings.

        Args:
            texts: List[str] The list of strings to embed.
            batch_size: [int] The batch size of embeddings to send to the model.
                If zero, then the largest batch size will be detected dynamically
                at the first request, starting from 250, down to 5.
            embeddings_task_type: [str] optional embeddings task type,
                one of the following
                    RETRIEVAL_QUERY	- Text is a query
                                      in a search/retrieval setting.
                    RETRIEVAL_DOCUMENT - Text is a document
                                         in a search/retrieval setting.
                    SEMANTIC_SIMILARITY - Embeddings will be used
                                          for Semantic Textual Similarity (STS).
                    CLASSIFICATION - Embeddings will be used for classification.
                    CLUSTERING - Embeddings will be used for clustering.

        Returns:
            List of embeddings, one for each text.
        r   )tqdmr   )desczgUnable to show progress bar because tqdm could not be imported. Please install with `pip install tqdm`.r:   )rV   rf   )rZ   r   re   r   extendr   r   r%   r"   r#   r\   r   submitr}   r   result)rF   rV   r4   r   rw   first_batch_resultr`   tasksr   iter_batchrr   s               r(   embedzVertexAIEmbeddings.embed  s   F u::??I(*
02>>(99%LLGG +/*L*L++ +'
 	,---! 		 %%%%%%W+?@@@      >      E 	 	ELLo.553$8 6      u::>>KKK 	* 	*Aahhjj))))s   4B &B10B1c                 0    |                      ||d          S )a  Embed a list of documents.

        Args:
            texts: List[str] The list of texts to embed.
            batch_size: [int] The batch size of embeddings to send to the model.
                If zero, then the largest batch size will be detected dynamically
                at the first request, starting from 250, down to 5.

        Returns:
            List of embeddings, one for each text.
        r   r   )rF   rV   r4   s      r(   embed_documentsz"VertexAIEmbeddings.embed_documentsO  s     zz%-ABBBrH   c                 B    |                      |gdd          }|d         S )zEmbed a text.

        Args:
            text: The text to embed.

        Returns:
            Embedding for the text.
        rY   r   r   r   )rF   rI   rw   s      r(   embed_queryzVertexAIEmbeddings.embed_query_  s&     ZZ+<==
!}rH   )r   Nr*   r   r+   N)N)r   N)r   )__name__
__module____qualname____doc__r   r   rz   r   __annotations__r   boolr   r)   r	   r   r>   staticmethodr   rU   re   floatr}   r
   r   r   r   r   r   __classcell__)rG   s   @r(   r   r      s"         21  "Hd38n!!!#t###J$ 4    X( 8!%%#$%) P  P  P #	 P
  P ! P  P c] P  P  P  P  P  P  PD KC KDI K K K \K 1S	 1s 1tDI 1 1 1 \1h BF$- $-#Y$-19#$-	d5k	$- $- $- $-N BFE% E%#YE%19#E%	tDK $tCy/1	2E% E% E% E%T  L LCyL L '
	L 
d5k	L L L L^ 34C C#YC,/C	d5k	C C C C 
 
U 
 
 
 
 
 
 
 
rH   r   )!loggingrQ   rO   rB   concurrent.futuresr   r   typingr   r   r   r   r	   r
   langchain_core._api.deprecationr   langchain_core.embeddingsr   #langchain_core.language_models.llmsr   langchain_core.utilsr   !langchain_community.llms.vertexair   &langchain_community.utilities.vertexair   	getLoggerr   r"   r[   r@   rA   r   r<   rH   r(   <module>r      sk    				      7 7 7 7 7 7 7 7 < < < < < < < < < < < < < < < < 6 6 6 6 6 6 0 0 0 0 0 0 K K K K K K ) ) ) ) ) ) = = = = = = L L L L L L		8	$	$  
E  
M M M M M* M M 
M M MrH   