
    Ng`                    `    d dl mZ d dlmZmZmZmZmZ er
d dlm	Z	m
Z
mZ  G d d          ZdS )    )annotations)TYPE_CHECKINGAnyIterableListOptional)	DataFrameRowSparkSessionc                      e Zd ZdZ	 	 	 	 	 	 d-d.dZe	 d/d0d            Zd1dZd1dZd2dZ	d/d3dZ
d2dZd4d"Zd5d&Zd6d7d*Zd/d3d+Zd6d7d,ZdS )8SparkSQLz;SparkSQL is a utility class for interacting with Spark SQL.N   spark_sessionOptional[SparkSession]catalogOptional[str]schemaignore_tablesOptional[List[str]]include_tablessample_rows_in_table_infointc                X   	 ddl m} n# t          $ r t          d          w xY w|r|n|j                                        | _        || j        j                            |           || j        j                            |           t          | 
                                          | _        |rt          |          nt                      | _        | j        r$| j        | j        z
  }|rt          d| d          |rt          |          nt                      | _        | j        r$| j        | j        z
  }|rt          d| d          |                                 }	|	rt          |	          n| j        | _        t#          |t$                    st'          d          || _        dS )	a  Initialize a SparkSQL object.

        Args:
            spark_session: A SparkSession object.
              If not provided, one will be created.
            catalog: The catalog to use.
              If not provided, the default catalog will be used.
            schema: The schema to use.
              If not provided, the default schema will be used.
            ignore_tables: A list of tables to ignore.
              If not provided, all tables will be used.
            include_tables: A list of tables to include.
              If not provided, all tables will be used.
            sample_rows_in_table_info: The number of rows to include in the table info.
              Defaults to 3.
        r   r   Fpyspark is not installed. Please install it with `pip install pyspark`Nzinclude_tables  not found in databasezignore_tables z,sample_rows_in_table_info must be an integer)pyspark.sqlr   ImportErrorbuildergetOrCreate_sparkr   setCurrentCatalogsetCurrentDatabaseset_get_all_table_names_all_tables_include_tables
ValueError_ignore_tablesget_usable_table_names_usable_tables
isinstancer   	TypeError_sample_rows_in_table_info)
selfr   r   r   r   r   r   r   missing_tablesusable_tabless
             c/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/utilities/spark_sql.py__init__zSparkSQL.__init__   s   2	0000000 	 	 	X  	 +RMM0D0P0P0R0R 	 K11':::K226:::t88::;;6DOs>222#%% 	!1D4DDN  LnLLL   5BLc-000suu 	!043CCN  K^KKK   33554AWc-000tGW3S99 	LJKKK*C'''   	 #database_uristrengine_argsOptional[dict]kwargsr   returnc                    	 ddl m} n# t          $ r t          d          w xY w|j                            |                                          } | |fi |S )zzCreating a remote Spark Session via Spark connect.
        For example: SparkSQL.from_uri("sc://localhost:15002")
        r   r   r   )r   r   r   r   remoter    )clsr5   r7   r9   r   sparks         r2   from_urizSparkSQL.from_uriK   s    	0000000 	 	 	X  	
 $++L99EEGGs5##F###r4   Iterable[str]c                V    | j         r| j         S t          | j        | j        z
            S )zGet names of tables available.)r'   sortedr&   r)   )r/   s    r2   r*   zSparkSQL.get_usable_table_names\   s/     	(''d&)<<===    c                    | j                             d                              d                                          }t	          t          d |                    S )NzSHOW TABLES	tableNamec                    | j         S N)rE   )rows    r2   <lambda>z/SparkSQL._get_all_table_names.<locals>.<lambda>e   s    CM rC   )r!   sqlselectcollectlistmap)r/   rowss     r2   r%   zSparkSQL._get_all_table_namesc   sK    {}--44[AAIIKKC11488999rC   tablec                    | j                             d|                                           d         j        }|                    d          }|d |         dz   S )NzSHOW CREATE TABLE r   USING;)r!   rJ   rL   createtab_stmtfind)r/   rP   	statementusing_clause_indexs       r2   _get_create_table_stmtzSparkSQL._get_create_table_stmtg   s`    KOO88899AACCAFU 	 '^^G44,,,-33rC   table_namesc                   |                                  }|9t          |                              |          }|rt          d| d          |}g }|D ]Y}|                     |          }| j        r&|dz  }|d|                     |           dz  }|dz  }|                    |           Zd                    |          }|S )Nztable_names r   z

/*
z*/z

)	r*   r$   
differencer(   rX   r.   _get_sample_spark_rowsappendjoin)r/   rY   all_table_namesr0   tables
table_name
table_info	final_strs           r2   get_table_infozSparkSQL.get_table_infoo   s    5577" --88IIN X !V!V!V!VWWW)O) 	& 	&J44Z@@J. #h&
N4#>#>z#J#JNNNN
d"
MM*%%%%KK''	rC   c                z   d| d| j          }| j                            |          }d                    t	          t          d |j        j                                      }	 |                     |          }d                    d |D                       }n# t          $ r d}Y nw xY w| j          d| d	| d| S )
NzSELECT * FROM z LIMIT 	c                    | j         S rG   )name)fs    r2   rI   z1SparkSQL._get_sample_spark_rows.<locals>.<lambda>   s    16 rC   r[   c                8    g | ]}d                      |          S )rg   )r_   ).0rH   s     r2   
<listcomp>z3SparkSQL._get_sample_spark_rows.<locals>.<listcomp>   s"    (O(O(OC3(O(O(OrC    z rows from z table:
)
r.   r!   rJ   r_   rM   rN   r   fields_get_dataframe_results	Exception)r/   rP   querydfcolumns_strsample_rowssample_rows_strs          r2   r]   zSparkSQL._get_sample_spark_rows   s    PPPt/NPP[__U##iiS)9)929;K%L%L M MNN	!55b99K"ii(O(O;(O(O(OPPOO 	! 	! 	! OOO	! . ! !5 ! !! !! !	
s   $4B B('B(rH   r
   tuplec                    t          t          t          |                                                                                    S rG   )rw   rN   r6   asDictvalues)r/   rH   s     r2   _convert_row_as_tuplezSparkSQL._convert_row_as_tuple   s.    Scjjll113344555rC   rs   r	   rM   c                j    t          t          | j        |                                                    S rG   )rM   rN   r{   rL   )r/   rs   s     r2   rp   zSparkSQL._get_dataframe_results   s%    C2BJJLLAABBBrC   allcommandfetchc                    | j                             |          }|dk    r|                    d          }t          |                     |                    S )None   )r!   rJ   limitr6   rp   )r/   r~   r   rs   s       r2   runzSparkSQL.run   sI    [__W%%E>>!B4..r22333rC   c                h    	 |                      |          S # t          $ r}	 d| cY d}~S d}~ww xY w)af  Get information about specified tables.

        Follows best practices as specified in: Rajkumar et al, 2022
        (https://arxiv.org/abs/2204.00498)

        If `sample_rows_in_table_info`, the specified number of sample rows will be
        appended to each table description. This can increase performance as
        demonstrated in the paper.
        Error: N)re   r(   )r/   rY   es      r2   get_table_info_no_throwz SparkSQL.get_table_info_no_throw   sX    	!&&{333 	! 	! 	!* Q==      	!s    
1,11c                j    	 |                      ||          S # t          $ r}	 d| cY d}~S d}~ww xY w)a*  Execute a SQL command and return a string representing the results.

        If the statement returns rows, a string of the results is returned.
        If the statement returns no rows, an empty string is returned.

        If the statement throws an error, the error message is returned.
        r   N)r   rq   )r/   r~   r   r   s       r2   run_no_throwzSparkSQL.run_no_throw   sX    	!88GU+++ 	! 	! 	!* Q==      	!s    
2-22)NNNNNr   )r   r   r   r   r   r   r   r   r   r   r   r   rG   )r5   r6   r7   r8   r9   r   r:   r   )r:   r@   )rP   r6   r:   r6   )rY   r   r:   r6   )rH   r
   r:   rw   )rs   r	   r:   rM   )r}   )r~   r6   r   r6   r:   r6   )__name__
__module____qualname____doc__r3   classmethodr?   r*   r%   rX   re   r]   r{   rp   r   r   r    rC   r2   r   r   	   sU       EE 15!% $-1.2)*=D =D =D =D =D~ >B$ $ $ $ [$ > > > >: : : :4 4 4 4    $
 
 
 
"6 6 6 6C C C C4 4 4 4 4! ! ! ! ! ! ! ! ! ! ! !rC   r   N)
__future__r   typingr   r   r   r   r   r   r	   r
   r   r   r   rC   r2   <module>r      s    " " " " " " ? ? ? ? ? ? ? ? ? ? ? ? ? ? 98888888888q! q! q! q! q! q! q! q! q! q!rC   