
    Ng_                         d dl Z d dlmZmZ d dlmZmZmZ d dlZ	d dl
mZmZ d dlmZ d dlmZmZ d dlmZ dZd	Zd
ZdZdZdZerd dlmZ d dlmZ  G d de          Ze G d de                      ZdS )    N)	dataclassfield)TYPE_CHECKINGListOptional)Field	SecretStr)Element)BaseEmbeddingEncoderEmbeddingConfig)requires_dependenciesz@mixedbread-ai/unstructured   <      floatendMixedbreadAIRequestOptionsc                       e Zd ZU dZ ed           Zeed<    ed          Ze	ed<    e
dgd	
          dd            ZdS )MixedbreadAIEmbeddingConfigz
    Configuration class for Mixedbread AI Embedding Encoder.

    Attributes:
        api_key (str): API key for accessing Mixedbread AI..
        model_name (str): Name of the model to use for embeddings.
    c                  Z    t          t          j                            d                    S )NMXBAI_API_KEY)r	   osenvironget     [/var/www/html/ai-engine/env/lib/python3.11/site-packages/unstructured/embed/mixedbreadai.py<lambda>z$MixedbreadAIEmbeddingConfig.<lambda>#   s    	"*..*I*I J J r   )default_factoryapi_keyz"mixedbread-ai/mxbai-embed-large-v1)default
model_namemixedbread_aizembed-mixedbreadai)extrasreturnr   c                 T    ddl m}  || j                                                  S )zr
        Create the Mixedbread AI client.

        Returns:
            MixedbreadAI: Initialized client.
        r   r   )r#   )mixedbread_ai.clientr   r#   get_secret_value)selfr   s     r    
get_clientz&MixedbreadAIEmbeddingConfig.get_client*   s@     	655555|L1133
 
 
 	
r   N)r(   r   )__name__
__module____qualname____doc__r   r#   r	   __annotations__r%   strr   r-   r   r   r    r   r      s           JJ  GY    e4  J    	#  
 
 
	 
 
 
r   r   c                      e Zd ZU dZeed<    edd          Zee	e
                  ed<    edd          Zed         ed<   d	e	e
         fd
Zd Zed             Zed	efd            Zde	e         d	e	e	e
                  fdZede	e         de	e	e
                  d	e	e         fd            Zde	e         d	e	e         fdZded	e	e
         fdZdS )MixedbreadAIEmbeddingEncoderz
    Embedding encoder for Mixedbread AI.

    Attributes:
        config (MixedbreadAIEmbeddingConfig): Configuration for the embedding encoder.
    configFN)initr$   _exemplary_embeddingr   _request_optionsr(   c                 :    |                      dg          d         S )zJGet an exemplary embedding to determine dimensions and unit vector status.Qr   _embed)r,   s    r    get_exemplary_embeddingz4MixedbreadAIEmbeddingEncoder.get_exemplary_embeddingJ   s    {{C5!!!$$r   c                     | j         j        t          d          ddlm}  |t
          t          dt          i          | _        d S )NzThe Mixedbread AI API key must be specified.You either pass it in the constructor using 'api_key'or via the 'MXBAI_API_KEY' environment variable.r   r   z
User-Agent)max_retriestimeout_in_secondsadditional_headers)	r6   r#   
ValueErrormixedbread_ai.corer   MAX_RETRIESTIMEOUT
USER_AGENTr9   )r,   r   s     r    
initializez'MixedbreadAIEmbeddingEncoder.initializeN   sf    ;&E   	655555 .#& ,j9!
 !
 !
r   c                 R    |                                  }t          j        |          S )z0Get the number of dimensions for the embeddings.)r>   npshaper,   exemplary_embeddings     r    num_of_dimensionsz.MixedbreadAIEmbeddingEncoder.num_of_dimensions^   s'     #::<<x+,,,r   c                     |                                  }t          j        t          j                            |          d          S )z(Check if the embedding is a unit vector.g      ?)r>   rJ   iscloselinalgnormrL   s     r    is_unit_vectorz+MixedbreadAIEmbeddingEncoder.is_unit_vectord   s7     #::<<z")..)<==sCCCr   textsc           	      P   t           }t          dt          |          |          }g }| j                                        }|D ]X}||||z            }|                    | j        j        dt          t          | j	        |          }|
                    |           Yd |D             S )z
        Embed a list of texts using the Mixedbread AI API.

        Args:
            texts (List[str]): List of texts to embed.

        Returns:
            List[List[float]]: List of embeddings.
        r   T)model
normalizedencoding_formattruncation_strategyrequest_optionsinputc                 0    g | ]}|j         D ]	}|j        
S r   )data	embedding).0responseitems      r    
<listcomp>z7MixedbreadAIEmbeddingEncoder._embed.<locals>.<listcomp>   s)    SSS8X]SSTSSSSr   )
BATCH_SIZErangelenr6   r-   
embeddingsr%   ENCODING_FORMATTRUNCATION_STRATEGYr9   append)	r,   rT   
batch_size	batch_itr	responsesclientibatchr`   s	            r    r=   z#MixedbreadAIEmbeddingEncoder._embedj   s      
!SZZ44		'')) 
	' 
	'A!a*n,-E((k, /$7 $ 5 )  H X&&&&SSySSSSr   elementsrf   c                     t          |           t          |          k    sJ g }t          |           D ]'\  }}||         |_        |                    |           (| S )a  
        Add embeddings to elements.

        Args:
            elements (List[Element]): List of elements.
            embeddings (List[List[float]]): List of embeddings.

        Returns:
            List[Element]: Elements with embeddings added.
        )re   	enumeraterf   ri   )rp   rf   elements_w_embeddingrn   elements        r    _add_embeddings_to_elementsz8MixedbreadAIEmbeddingEncoder._add_embeddings_to_elements   sj     8}}J////!#H-- 	1 	1JAw!+AG ''0000r   c                 l    |                      d |D                       }|                     ||          S )z
        Embed a list of document elements.

        Args:
            elements (List[Element]): List of document elements.

        Returns:
            List[Element]: Elements with embeddings.
        c                 ,    g | ]}t          |          S r   )r3   )r_   es     r    rb   z@MixedbreadAIEmbeddingEncoder.embed_documents.<locals>.<listcomp>   s    !;!;!;Q#a&&!;!;!;r   )r=   ru   )r,   rp   rf   s      r    embed_documentsz,MixedbreadAIEmbeddingEncoder.embed_documents   s:     [[!;!;(!;!;!;<<
//*EEEr   queryc                 :    |                      |g          d         S )z
        Embed a query string.

        Args:
            query (str): Query string to embed.

        Returns:
            List[float]: Embedding of the query.
        r   r<   )r,   rz   s     r    embed_queryz(MixedbreadAIEmbeddingEncoder.embed_query   s     {{E7##A&&r   )r.   r/   r0   r1   r   r2   r   r8   r   r   r   r9   r>   rH   propertyrN   boolrS   r3   r=   staticmethodr
   ru   ry   r|   r   r   r    r5   r5   <   s          ('''27%UD2Q2Q2Q(4;/QQQ385eT3R3R3Rh/0RRR%e % % % %
 
 
  - - X-
 D D D D XD
TDI T$tE{*; T T T T8 w--1$u+->	g   \(FW F$w- F F F F
' 
'e 
' 
' 
' 
' 
' 
'r   r5   ) r   dataclassesr   r   typingr   r   r   numpyrJ   pydanticr   r	   unstructured.documents.elementsr
   unstructured.embed.interfacesr   r   unstructured.utilsr   rG   rc   rF   rE   rg   rh   r*   r   rD   r   r   r5   r   r   r    <module>r      sg   				 ( ( ( ( ( ( ( ( 0 0 0 0 0 0 0 0 0 0     % % % % % % % % 3 3 3 3 3 3 O O O O O O O O 4 4 4 4 4 4*


   2111111111111 
  
  
  
  
/  
  
  
F u' u' u' u' u'#7 u' u' u' u' u'r   