
    Ng=                     Z   d dl Z d dlmZmZ d dlmZmZmZmZm	Z	m
Z
mZmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZ d dlmZ d	ed
efdZddd
efdZeeeee         df         Z	  G d ded          Z G d ded          Z  G d ded          Z! G d ded          Z" G d ded          Z# G d ded          Z$ G d deed          Z% G d de%          Z& G d  d!e%          Z' G d" d#ed          Z( G d$ d%ed          Z)d&d'd(d)d*d+Z* G d, d-e          Z+dS ).    N)ABCabstractmethod)AnyCallableDictListLiteralOptionalSequenceUnion)CallbackManagerForRetrieverRun)Document)BaseRetriever)	BaseModelFieldmodel_validator	validator)	Annotatedexcerptreturnc                 b    | s| S t          j        dd|                               dd          }|S )zClean an excerpt from Kendra.

    Args:
        excerpt: The excerpt to clean.

    Returns:
        The cleaned excerpt.

    z\s+ z... )resubreplace)r   ress     a/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/retrievers/kendra.pyclean_excerptr      s9      
&g
&
&
.
.ub
9
9CJ    item
ResultItemc                     d}|                                  }|r	|d| dz  }t          |                                           }|r	|d| dz  }|S )zCombine a ResultItem title and excerpt into a single string.

    Args:
        item: the ResultItem of a Kendra search.

    Returns:
        A combined text of the title and excerpt of the given item.

    r   zDocument Title: 
zDocument Excerpt: 
)	get_titler   get_excerpt)r!   texttitler   s       r   combined_textr)   *   sr     DNNE -,5,,,,D,,..//G 32w2222Kr    c                   Z    e Zd ZU dZeed<   	 eed<   	 ee         ed<   	 ee         ed<   dS )	Highlightz8Information that highlights the keywords in the excerpt.BeginOffset	EndOffset	TopAnswerTypeN)	__name__
__module____qualname____doc__int__annotations__r
   boolstr r    r   r+   r+   F   sV         BBLNNNJ~7
3-<<r    r+   allow)extrac                   6    e Zd ZU dZeed<   	 ee         ed<   dS )TextWithHighLightszText with highlights.Text
HighlightsN)r0   r1   r2   r3   r7   r5   r
   r   r8   r    r   r<   r<   T   s4         
IIIr    r<   c                       e Zd ZU dZeed<   dS )AdditionalResultAttributeValuez(Value of an additional result attribute.TextWithHighlightsValueN)r0   r1   r2   r3   r<   r5   r8   r    r   r@   r@   ^   s&          32////))r    r@   c                   P    e Zd ZU dZeed<   	 ed         ed<   	 eed<   	 defdZdS )	AdditionalResultAttributezAdditional result attribute.KeyTEXT_WITH_HIGHLIGHTS_VALUE	ValueTypeValuer   c                 $    | j         j        j        S N)rG   rA   r=   selfs    r   get_value_textz(AdditionalResultAttribute.get_value_textr   s    z166r    N)	r0   r1   r2   r3   r7   r5   r	   r@   rL   r8   r    r   rC   rC   h   sg         &&	HHH#34444 ))))%7 7 7 7 7 7 7r    rC   c                       e Zd ZU dZee         ed<   	 ee         ed<   	 eee                  ed<   	 ee         ed<   	 e	de
fd            ZdS )	DocumentAttributeValuezValue of a document attribute.	DateValue	LongValueStringListValueStringValuer   c                 v    | j         r| j         S | j        r| j        S | j        r| j        S | j        r| j        S dS )zThe only defined document attribute value or None.
        According to Amazon Kendra, you can only provide one
        value for a document attribute.
        N)rO   rP   rQ   rR   rJ   s    r   valuezDocumentAttributeValue.value   sU     > 	">!> 	">! 	('' 	$##tr    N)r0   r1   r2   r3   r
   r7   r5   r4   r   propertyDocumentAttributeValueTyperT   r8   r    r   rN   rN   w   s         ((}3}d3i(((( #1    X  r    rN   c                   *    e Zd ZU dZeed<   	 eed<   dS )DocumentAttributezDocument attribute.rD   rG   N)r0   r1   r2   r3   r7   r5   rN   r8   r    r   rX   rX      s0         	HHH#!!!!%%r    rX   c                   .   e Zd ZU dZee         ed<   	 ee         ed<   	 ee         ed<   	 g Zeee	                  ed<   	 ee
         ed<   	 edefd            Zedefd	            Zde
fd
Zdeeef         fdZdefdZefded gef         defdZdS )r"   zBase class of a result item.Id
DocumentIdDocumentURIDocumentAttributesScoreAttributesr   c                     dS )zDocument title.Nr8   rJ   s    r   r%   zResultItem.get_title         r    c                     dS )zDDocument excerpt or passage original content as retrieved by Kendra.Nr8   rJ   s    r   r&   zResultItem.get_excerpt   r`   r    c                     i S )zDocument additional metadata dict.
        This returns any extra metadata except these:
            * result_id
            * document_id
            * source
            * title
            * excerpt
            * document_attributes
        r8   rJ   s    r   get_additional_metadataz"ResultItem.get_additional_metadata   s	     	r    c                 (    d | j         pg D             S )zDocument attributes dict.c                 2    i | ]}|j         |j        j        S r8   )rD   rG   rT   ).0attrs     r   
<dictcomp>z;ResultItem.get_document_attributes_dict.<locals>.<dictcomp>   s!    WWWt$**WWWr    )r]   rJ   s    r   get_document_attributes_dictz'ResultItem.get_document_attributes_dict   s    WW8O8USUWWWWr    c                 .    | j         | j         d         S dS )zDocument Score ConfidenceNScoreConfidenceNOT_AVAILABLE)r^   rJ   s    r   get_score_attributezResultItem.get_score_attribute   s    +'(9::"?r    page_content_formatterc           
      J    ||           }|                                  }|                    | j        | j        | j        |                                 |                                 |                                 |                                 d           t          ||          S )z!Converts this item to a Document.)	result_iddocument_idsourcer(   r   document_attributesscore)page_contentmetadata)
rc   updaterZ   r[   r\   r%   r&   ri   rm   r   )rK   rn   ru   rv   s       r   to_doczResultItem.to_doc   s     .-d33//11!W#*))++--'+'H'H'J'J1133 
	
 
	
 
	
 \HEEEEr    N)r0   r1   r2   r3   r
   r7   r5   r]   r   rX   dictr   r%   r&   rc   r   rV   ri   rm   r)   r   r   rx   r8   r    r   r"   r"      s~        &&-#<>&7!89>>>"d^###%3    ^ SS S S S ^S
 
 
 
 
Xd38R3R.S X X X X#S # # # # GTF F&.~s/B&CF	F F F F F Fr    c                       e Zd ZU dZeed<   	 ee         ed<   	 ee         ed<   	 ee         ed<   	 g Zee	e
                  ed<   	 ee         ed<   	 defd	Zdefd
ZdefdZdefdZdS )QueryResultItemzQuery API result item.DocumentTitleFeedbackTokenFormatr/   AdditionalAttributesDocumentExcerptr   c                     | j         j        S rI   )r|   r=   rJ   s    r   r%   zQueryResultItem.get_title   s    !&&r    c                 p    | j         sdS | j         d         sdS | j         d                                         S )Nr   r   )r   rL   rJ   s    r   get_attribute_valuez#QueryResultItem.get_attribute_value   sB    ( 	2(+ 	A2,Q/>>@@@r    c                     | j         r+| j         d         j        dk    r|                                 }n| j        r| j        j        }nd}|S )Nr   
AnswerTextr   )r   rD   r   r   r=   )rK   r   s     r   r&   zQueryResultItem.get_excerpt   sX    %	)!,0L@@..00GG! 	*/GGGr    c                     d| j         i}|S )Ntype)r/   )rK   additional_metadatas     r   rc   z'QueryResultItem.get_additional_metadata  s    %ty1""r    N)r0   r1   r2   r3   r<   r5   r
   r7   r   r   rC   r%   r   r&   ry   rc   r8   r    r   r{   r{      s          %%%%C=   ASM
 3-?FH(4(A#BCHHHG01111''3 ' ' ' 'AS A A A AS    # # # # # # #r    r{   c                   \    e Zd ZU dZee         ed<   	 ee         ed<   	 defdZdefdZdS )RetrieveResultItemzRetrieve API result item.r|   Contentr   c                     | j         pdS Nr   )r|   rJ   s    r   r%   zRetrieveResultItem.get_title  s    !'R'r    c                     | j         pdS r   )r   rJ   s    r   r&   zRetrieveResultItem.get_excerpt  s    |!r!r    N)	r0   r1   r2   r3   r
   r7   r5   r%   r&   r8   r    r   r   r     sx         ##C=   c]"(3 ( ( ( ("S " " " " " "r    r   c                   *    e Zd ZU dZee         ed<   dS )QueryResulta  `Amazon Kendra Query API` search result.

    It is composed of:
        * Relevant suggested answers: either a text excerpt or table excerpt.
        * Matching FAQs or questions-answer from your FAQ file.
        * Documents including an excerpt of each document with its title.
    ResultItemsN)r0   r1   r2   r3   r   r{   r5   r8   r    r   r   r   "  s/           o&&&&r    r   c                   6    e Zd ZU dZeed<   	 ee         ed<   dS )RetrieveResultz`Amazon Kendra Retrieve API` search result.

    It is composed of:
        * relevant passages or text excerpts given an input query.
    QueryIdr   N)r0   r1   r2   r3   r7   r5   r   r   r8   r    r   r   r   0  s<           LLL())))r    r           g      ?g      ?g      ?      ?)rl   LOWMEDIUMHIGH	VERY_HIGHc                   2   e Zd ZU dZeed<   dZee         ed<   dZee         ed<   dZ	e
ed<   dZee         ed<   dZeee                  ed	<   eZeegef         ed
<   eed<   dZee         ed<   eee          edd          f         ed<    ed          de
de
fd            Z ed          edeeef         defd                        Zdedee         fdZdee         dee         fdZ dee         dee         fdZ!dede"dee         fdZ#dS ) AmazonKendraRetrievera  `Amazon Kendra Index` retriever.

    Args:
        index_id: Kendra index id

        region_name: The aws region e.g., `us-west-2`.
            Fallsback to AWS_DEFAULT_REGION env variable
            or region specified in ~/.aws/config.

        credentials_profile_name: The name of the profile in the ~/.aws/credentials
            or ~/.aws/config files, which has either access keys or role information
            specified. If not specified, the default credential profile or, if on an
            EC2 instance, credentials from IMDS will be used.

        top_k: No of results to return

        attribute_filter: Additional filtering of results based on metadata
            See: https://docs.aws.amazon.com/kendra/latest/APIReference

        document_relevance_override_configurations: Overrides relevance tuning
            configurations of fields/attributes set at the index level
            See: https://docs.aws.amazon.com/kendra/latest/APIReference

        page_content_formatter: generates the Document page_content
            allowing access to all result item attributes. By default, it uses
            the item's title and excerpt.

        client: boto3 client for Kendra

        user_context: Provides information about the user context
            See: https://docs.aws.amazon.com/kendra/latest/APIReference

    Example:
        .. code-block:: python

            retriever = AmazonKendraRetriever(
                index_id="c0806df7-e76b-4bce-9b5c-d5582f6b1a03"
            )

    index_idNregion_namecredentials_profile_name   top_kattribute_filter*document_relevance_override_configurationsrn   clientuser_contextr   r   )gelemin_score_confidencerT   r   c                 8    |dk     rt          d| d          |S )Nr   top_k () cannot be negative.)
ValueError)clsrT   s     r   validate_top_kz$AmazonKendraRetriever.validate_top_k{  s*    199CuCCCDDDr    before)modevaluesc                    |                     d          }||dk     rt          d| d          |                     d          |S 	 dd l}|                     d          r|                    |d                   }n|                                }i }|                     d          r|d         |d<    |j        di ||d<   |S # t
          $ r t          d
          t          $ r}t          d          |d }~ww xY w)Nr   r   r   r   r   r   )profile_namer   kendrazRCould not import boto3 python package. Please install it with `pip install boto3`.zCould not load credentials to authenticate with AWS client. Please check that credentials in the specified profile name are valid.)r   )getr   boto3Sessionr   ImportError	Exception)r   r   r   r   sessionclient_paramses          r   create_clientz#AmazonKendraRetriever.create_client  sQ    

7##CuCCCDDD::h+M	LLLzz455 *--V<V5W-XX  --//Mzz-(( E/5m/Dm,-w~HH-HHF8M 	 	 	>    	 	 	*  		s   	A=C "C>)C99C>queryc                    | j         |                                dd         | j        d}| j        
| j        |d<   | j        
| j        |d<   | j        
| j        |d<    | j        j        di |}t          	                    |          }|j
        r|j
        S  | j        j        di |}t          	                    |          }|j
        S )Nr   i  )IndexId	QueryTextPageSizeAttributeFilter'DocumentRelevanceOverrideConfigurationsUserContextr8   )r   stripr   r   r   r   r   retriever   	parse_objr   r   r   )rK   r   kendra_kwargsresponser_resultq_results         r   _kendra_queryz#AmazonKendraRetriever._kendra_query  s    } qu-

 
  ,/3/DM+,:F? CD (+/+<M-('4;'88-88!++H55 	('' %4;$55}55((22##r    result_itemsc                 >      fd|d  j                  D             }|S )Nc                 D    g | ]}|                     j                  S r8   )rx   rn   rf   r!   rK   s     r   
<listcomp>z9AmazonKendraRetriever._get_top_k_docs.<locals>.<listcomp>  s8     
 
 
 KK344
 
 
r    )r   )rK   r   top_docss   `  r   _get_top_k_docsz%AmazonKendraRetriever._get_top_k_docs  s=    
 
 
 
$\tz\2
 
 
 r    docsc                 6      j         s|S  fd|D             }|S )zr
        Filter out the records that have a score confidence
        greater than the required threshold.
        c                     g | ]m}|j                             d           Qt          |j         d          t                    r1t                              |j         d          d          j        k    k|nS )rt   Nr   )rv   r   
isinstancer7   KENDRA_CONFIDENCE_MAPPINGr   r   s     r   r   zEAmazonKendraRetriever._filter_by_score_confidence.<locals>.<listcomp>  s     	
 	
 	
!!'**6t}W5s;; 7-11$-2H#NN,- - 
- - -r    )r   )rK   r   filtered_docss   `  r   _filter_by_score_confidencez1AmazonKendraRetriever._filter_by_score_confidence  sE    
 ( 	K	
 	
 	
 	
	
 	
 	
 r    run_managerc                    |                      |          }|                     |          }|                     |          S )zRun search on Kendra index and get top k documents

        Example:
        .. code-block:: python

            docs = retriever.invoke('This is my query')

        )r   r   r   )rK   r   r   r   
top_k_docss        r   _get_relevant_documentsz-AmazonKendraRetriever._get_relevant_documents  s>     ))%00)),77
//
;;;r    )$r0   r1   r2   r3   r7   r5   r   r
   r   r   r4   r   r   r   r   r)   rn   r   r"   r   r   r   floatr   r   r   r   classmethodr   r   r   r   r   r   r   r   r8   r    r   r   r   F  s*        ' 'R MMM!%K#%%%.2hsm222E3NNN'+htn+++GK.d0DKKK:GHj\3%67GGGKKK#'L(4.'''#HUOUUcc5J5J5J$JKKKKYw3 3    
 _(###"4S> "c " " " [ $#"H$3 $8J+? $ $ $ $6HZ,@ T(^    X 4>    &<< 4	<
 
h< < < < < <r    r   ),r   abcr   r   typingr   r   r   r   r	   r
   r   r   langchain_core.callbacksr   langchain_core.documentsr   langchain_core.retrieversr   pydanticr   r   r   r   typing_extensionsr   r7   r   r)   r4   rV   r+   r<   r@   rC   rN   rX   r"   r{   r   r   r   r   r   r8   r    r   <module>r      s   				 # # # # # # # #	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 D C C C C C - - - - - - 3 3 3 3 3 3            ( ' ' ' ' '3 3          ( #3T#Y#<= 
= 
= 
= 
= 
=	 
= 
= 
= 
=    '    * * * * *W* * * *7 7 7 7 7	 7 7 7 7    Yg    >& & & & &	 & & & &>F >F >F >F >FCw >F >F >F >FB.# .# .# .# .#j .# .# .#b" " " " " " " " 
 
 
 
 
)7 
 
 
 

 
 
 
 
Yg 
 
 
 
   f< f< f< f< f<M f< f< f< f< f<r    