
    Ngi                    ~    d dl mZ d dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
mZ d dlmZ d dlmZ  G d de          ZdS )    )annotationsN)AnyDictIteratorListOptionalTuple)Document)
BaseLoaderc                  P    e Zd ZdZ	 	 dddZddZddZddZddZd dZ	d!dZ
dS )"AthenaLoaderaY  Load documents from `AWS Athena`.

    Each document represents one row of the result.
    - By default, all columns are written into the `page_content` of the document
    and none into the `metadata` of the document.
    - If `metadata_columns` are provided then these columns are written
    into the `metadata` of the document while the rest of the columns
    are written into the `page_content` of the document.

    To authenticate, the AWS client uses this method to automatically load credentials:
    https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html

    If a specific credential profile should be used, you must pass
    the name of the profile from the ~/.aws/credentials file that is to be used.

    Make sure the credentials / roles used have the required policies to
    access the Amazon Textract service.
    Nquerystrdatabases3_output_uriprofile_nameOptional[str]metadata_columnsOptional[List[str]]c                   || _         || _        || _        ||ng | _        	 ddl}n# t
          $ r t          d          w xY w	 ||                    |          n|                                }n"# t          $ r}t          d          |d}~ww xY w|	                    d          | _
        |	                    d          | _        dS )ag  Initialize Athena document loader.

        Args:
            query: The query to run in Athena.
            database: Athena database.
            s3_output_uri: Athena output path.
            profile_name: Optional. AWS credential profile, if profiles are being used.
            metadata_columns: Optional. Columns written to Document `metadata`.
        Nr   zRCould not import boto3 python package. Please install it with `pip install boto3`.)r   zCould not load credentials to authenticate with AWS client. Please check that credentials in the specified profile name are valid.athenas3)r   r   r   r   boto3ImportErrorSession	Exception
ValueErrorclientathena_client	s3_client)	selfr   r   r   r   r   r   sessiones	            g/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/document_loaders/athena.py__init__zAthenaLoader.__init__!   s   " 
 *4D4P 0 0VX	LLLL 	 	 	>  		  + <888]]__ G
  	 	 	*  		 %^^H55 --s!   ' A,A2 2
B<BBreturnList[Dict[str, Any]]c                   | j                             | j        d| j        id| j        i          }|d         }	 | j                             |          }|d         d         d	         }|d
k    rnZ|dk    r*|d         d         }|d         }d| }t          |          |dk    rt          d          t          j        d           | 	                    |          }t          j        |                    d                    S )NDatabaseOutputLocation)QueryStringQueryExecutionContextResultConfigurationQueryExecutionIdT)r.   QueryExecutionStatusState	SUCCEEDEDFAILEDStateChangeReasonzQuery Failed: 	CANCELLEDz Query was cancelled by the user.   records)orient)r   start_query_executionr   r   r   get_query_executionr   timesleep_get_result_setjsonloadsto_json)r!   responsequery_execution_idstateresp_statusstate_change_reasonerr
result_sets           r$   _execute_queryzAthenaLoader._execute_queryO   s5   %;;
#-t}"=!143E F < 
 

 &&89	)==!3 >  H -.x8AE##(""&'78B&12E&F#<':<<nn$+%% BCCCJqMMM	  ))*<==
z*,,I,>>???    input_stringsuffixc                d    |r-|                     |          r|d t          |                    S |S N)endswithlenr!   rJ   rK   s      r$   _remove_suffixzAthenaLoader._remove_suffixi   s<     	0l++F33 	03v;;,//rI   c                b    |r,|                     |          r|t          |          d          S |S rM   )
startswithrO   rP   s      r$   _remove_prefixzAthenaLoader._remove_prefixn   s:     	/l--f55 	/F..rI   rB   r   c                   	 dd l }n# t          $ r t          d          w xY w| j        }|                     |                     |d          d                              d          }|d         }d                    |dd          |gz             dz   }| j                            ||          }|	                    t          j        |d                                                   d	
          }|S )Nr   zTCould not import pandas python package. Please install it with `pip install pandas`./zs3://r6   z.csv)BucketKeyBodyutf8)encoding)pandasr   r   rT   rQ   splitjoinr    
get_objectread_csvioBytesIOread)	r!   rB   pd
output_uritokensbucketkeyobjdfs	            r$   r=   zAthenaLoader._get_result_sets   s
   	 	 	 	?  	 '
$$
C00'
 

%** 	 hhvabbz%7$8899FBn''v3'??[[CK$4$4$6$677&[II	s    !query_resultTuple[List[str], List[str]]c                    g }g }t          |d                                                   }|D ]6}|| j        v r|                    |           !|                    |           7||fS )Nr   )listkeysr   append)r!   rk   content_columnsr   all_columnsrh   s         r$   _get_columnszAthenaLoader._get_columns   s     <?//1122 	, 	,Cd+++ '',,,,&&s++++ 000rI   Iterator[Document]c              #  @  K   |                                  }|                     |          \  |D ]j}d                    fd|                                D                       }fd|                                D             }t	          ||          }|V  kd S )N
c              3  4   K   | ]\  }}|v 	| d | V  dS )z: N ).0kvrq   s      r$   	<genexpr>z)AthenaLoader.lazy_load.<locals>.<genexpr>   sD       % % $1qO7K7K17K7K7K7K% %rI   c                (    i | ]\  }}|v 	|||S rM   rx   )ry   rz   r{   r   s      r$   
<dictcomp>z*AthenaLoader.lazy_load.<locals>.<dictcomp>   s2       A5E0E0E!-1---rI   )page_contentmetadata)rH   rs   r^   itemsr
   )r!   rk   rowr   r   docrq   r   s         @@r$   	lazy_loadzAthenaLoader.lazy_load   s      **,,,0,=,=l,K,K)) 	 	C99 % % % %(+		% % %  L   !$  H xHHHCIIII	 	rI   )NN)
r   r   r   r   r   r   r   r   r   r   )r&   r'   )rJ   r   rK   r   r&   r   )rB   r   r&   r   )rk   r'   r&   rl   )r&   rt   )__name__
__module____qualname____doc__r%   rH   rQ   rT   r=   rs   r   rx   rI   r$   r   r      s         0 '+04,. ,. ,. ,. ,.\@ @ @ @4   
   
   (1 1 1 1     rI   r   )
__future__r   ra   r>   r;   typingr   r   r   r   r   r	   langchain_core.documentsr
   )langchain_community.document_loaders.baser   r   rx   rI   r$   <module>r      s    " " " " " " 				   = = = = = = = = = = = = = = = = - - - - - - @ @ @ @ @ @S S S S S: S S S S SrI   