
    Ng<                    J   d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
mZ d dlmZmZmZmZmZmZmZ d dlmZ d dlmZ d dlmZ erd dlmZ  ej        e          Z	  G d	 d
e          Z	  G d d          Z 	  G d de          Z! G d de          Z"dS )    )annotationsN)
HTMLParser)TYPE_CHECKINGAnyDictListOptionalTupleUnion)
BaseLoader)Document)TextSplitter)
Connectionc                  <     e Zd ZdZd fdZdd	ZddZddZ xZS )ParseOracleDocMetadatazParse Oracle doc metadata...returnNonec                    t                                                       |                                  d| _        i | _        d S )NF)super__init__resetmatchmetadata)self	__class__s    i/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/document_loaders/oracleai.pyr   zParseOracleDocMetadata.__init__%   s8    


(*    tagstrattrsList[Tuple[str, Optional[str]]]c                |    |dk    r&d}|D ]\  }}|dk    r|}|dk    r|r
|| j         |<    d S |dk    r	d| _        d S d S )Nmeta namecontenttitleT)r   r   )r   r   r    entryr%   values         r   handle_starttagz&ParseOracleDocMetadata.handle_starttag+   sx    &==#%E$ 5 5e6>>!E9$$ 5/4e,5 5 G^^DJJJ ^r   datac                :    | j         r|| j        d<   d| _         d S d S )Nr'   F)r   r   )r   r+   s     r   handle_dataz"ParseOracleDocMetadata.handle_data7   s-    : 	%)DM'"DJJJ	 	r   Dict[str, Any]c                    | j         S N)r   )r   s    r   get_metadataz#ParseOracleDocMetadata.get_metadata<   s
    }r   )r   r   )r   r   r    r!   r   r   )r+   r   r   r   )r   r.   )	__name__
__module____qualname____doc__r   r*   r-   r1   __classcell__r   s   @r   r   r   "   s        &&+ + + + + +
 
 
 
   
       r   r   c                  D    e Zd ZdZeddd            Zedd            ZdS )OracleDocReaderzRead a fileNinput_stringUnion[str, None]r   r   c                   d}d}| )d                     t          j        dd                    } t          t	          j                              }t          j        d|          }t          j        | 	                                          
                                }|d |         }t          j        dt          j        d                    }||z   |z   }|                                }|                    |          }|d |         }|S )N       r$   >abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789   )kz>I)joinrandomchoicesinttimestructpackhashlibsha256encodedigestgetrandbitshexzfill)	r:   
out_lengthhash_len	timestamptimestamp_binhashval_bincounter_bin	object_idobject_id_hexs	            r   generate_object_idz"OracleDocReader.generate_object_idF   s   
77T   L 	$$	D)44 n\%8%8%:%:;;BBDD!)8), k$(:2(>(>?? "K/+=	!%++
 
 &kzk2r   connr   	file_pathparamsdictUnion[Document, None]c                   i }	 ddl }n"# t          $ r}t          d          |d}~ww xY w	 d|j        _        |                                 }t          |d          5 }|                                }ddd           n# 1 swxY w Y   |t          d|          S |                    |j	                  }	|                    |j	                  }
|
                    d|t          j        |          |	|
	           |                                 |	i }nt          |	                                          }|                    d
          s|                    d          r7t#                      }|                    |           |                                }t(                              | j        dz   |z             }||d<   ||d<   |
t          d|          S t          t          |
                                          |          S # t.          $ rY}t0                              d|            t0                              d|            |                                 Y d}~dS d}~ww xY w)zRead a file using OracleReader
        Args:
            conn: Oracle Connection,
            file_path: Oracle Directory,
            params: ONNX file name.
        Returns:
            Plain text and metadata as Langchain Document.
        r   NIUnable to import oracledb, please install with `pip install -U oracledb`.Frbr$   page_contentr   a  
                declare
                    input blob;
                begin
                    input := :blob;
                    :mdata := dbms_vector_chain.utl_to_text(input, json(:pref));
                    :text := dbms_vector_chain.utl_to_text(input);
                end;)blobprefmdatatext<!DOCTYPE html<HTML>$_oid_fileAn exception occurred :: zSkip processing )oracledbImportErrordefaults
fetch_lobscursoropenreadr   varDB_TYPE_CLOBexecutejsondumpscloser   getvalue
startswithr   feedr1   r9   rX   username	Exceptionloggerinfo)rY   rZ   r[   r   rm   erq   fr+   re   rf   doc_datapdoc_idexs                  r   	read_filezOracleDocReader.read_filei   s    $&	OOOO 	 	 	-  	
4	+0H([[]]Fi&&  !vvxx                              |R(CCCCJJx455E::h344DNN Z''     LLNNN}u~~//00&&'788 0H<O<O= = 0 /00AFF8$$$ ~~//H$778Ki8WXXF%HV )HW|R(CCCCS-A-AHUUUU 	 	 	KK8B88999KK6966777LLNNN44444		sY   	 
(#(0H A=1H =BH BH D<H /H 
I+AI&&I+r0   )r:   r;   r   r   )rY   r   rZ   r   r[   r\   r   r]   )r2   r3   r4   r5   staticmethodrX   r    r   r   r9   r9   C   sd                \ D H H H \H H Hr   r9   c                  ,     e Zd ZdZd fdZddZ xZS )OracleDocLoaderzwRead documents using OracleDocLoader
    Args:
        conn: Oracle Connection,
        params: Loader parameters.
    rY   r   r[   r.   kwargsr   c                    || _         t          j        t          j        |                    | _         t                      j        di | d S )Nr   )rY   rw   loadsrx   r[   r   r   )r   rY   r[   r   r   s       r   r   zOracleDocLoader.__init__   sH    	jF!3!344""6"""""r   r   List[Document]c           	        	 ddl }n"# t          $ r}t          d          |d}~ww xY wd}g }i }ddi}	 | j        | j                            d          | _        | j                            d          | _        | j                            d          | _        | j                            d	          | _        | j                            d
          | _        nt          d          d|j
        _        | j        r?t                              | j        | j        |          }||S |                    |           | j        rd}t!          j        | j                  D ]}	t           j                            | j        |	          }
t           j                            |
          r\t                              | j        |
|          }|$|dz   }t*                              d| d           |                    |           | j        r	 | j        | j        t          d          | j                                        }| j                            d          | _        | j        t3          | j                  dk    rt          d          d}|                    || j                                        | j                                                   |                                }|D ]*}|d         | j        v r|d         dvrt          d          +d| _        | j        | j        D ]}| j        dz   |z   | _        d| j        z   dz   t=          j        |          z   dz   | j        z   dz   | j        z   dz   | j        z   dz   | j        z   d z   }|                    |           |D ]}i }|ot                               | j        j!        d!z   | j        z   d!z   | j        z   d!z   | j        z             }||d"<   |                    tE          d#|$                     v|d         vtG          |d                   }|$                    d%          s|$                    d&          r7tK                      }|&                    |           |'                                }t                               | j        j!        d!z   | j        z   d!z   | j        z   d!z   | j        z   d!z   tG          |d'                   z             }||d"<   |d'         |d(<   | j        t3          | j                  }tQ          d|          D ]}||d'z            || j        |         <   |d         &|                    tE          d#|$                     |                    tE          tG          |d                   |$                     nW# t          $ rJ}t*                              d)|            tS          j*                     |+                                  d}~ww xY w|S # t          $ r6}t*                              d)|            tS          j*                      d}~ww xY w)*z,Load data into LangChain Document objects...r   Nr_   	plaintextfalsefiledirowner	tablenamecolnamezMissing loader parametersF   zTotal skipped: 
z%Missing owner or column name or both.
mdata_cols   z?Exceeds the max number of columns you can request for metadata.zgselect column_name, data_type from all_tab_columns where owner = :ownername and table_name = :tablename)	ownernamer   )NUMBERBINARY_DOUBLEBINARY_FLOATLONGDATE	TIMESTAMPVARCHAR2zDThe datatype for the column requested for metadata is not supported.z, rowidz, z'select dbms_vector_chain.utl_to_text(t.z, json('z+')) mdata, dbms_vector_chain.utl_to_text(t.z) textz from .z tri   rj   r$   ra   rg   rh      _rowidrl   ),rm   rn   r[   getr   r   r   r   r   r~   ro   rp   r9   r   rY   appendoslistdirpathrB   isfiler   r   rq   r   lenrv   upperfetchallmdata_cols_sqlrw   rx   rX   r}   r   r   r{   r   r|   r1   range	traceback	print_excry   )r   rm   r   ncolsresultsr   m_paramsdoc
skip_count	file_namerZ   rq   sqlrowsrowcolr   r+   r   ir   s                        r   loadzOracleDocLoader.load   s.   	OOOO 	 	 	-  	 "$#%)g	{& KOOF33	;??511![__W55
!%!=!=#{y99 ;<<<+0H(y $%//	49hOO;"Ns###x 0
!#DH!5!5 	0 	0I "TXy A AIw~~i00 0-77	9hWW;)3aJ"KK(H*(H(H(HIIII#NN3///~ ~z)T\-A'(OPPP!Y--//F&*kool&C&CDO2t//!33"+!B# # 8 
 &*j&6&6&8&8&*n&:&:&<&< '     &00#' & &C"1v88#&q6 2" $" $" +4)K+& +& %&
 +4D'2#'? S SC262E2Ls2RD// B,'$% *X../ H	H
 ,' ## -. ## *	% 
 .)    NN3'''% 7" 7"#%;%4%G%G $	 2"%!&"&*!- #&!& #'.	!1
 #&!& #',!/& &F 06HV,#NN8h+W+W+WXXXX"1v1'*3q6{{#'??3C#D#D !@$,I" I" !@ )?(@(@A$%FF4LLL/0~~/?/?H%4%G%G $	 2"%!&"&*!- #&!& #'.	!1
 #&!& #',!/ #&!& #&c!f++!.
& 
&F 06HV,14QHX.  $:(+DO(<(<%*1e__ J J?B1q5z); < <"1v~ '$,"x$P$P$P!" !" !" !" !($,58Q[[8%& %& %&!" !" !" !"g7"p !   KK @B @ @AAA')))LLNNN	 N 	 	 	KK8B88999!!!	sU    
&!&C.X #C"X N.V5 4X 5
X	?AXX		X 
Y1Y		Y)rY   r   r[   r.   r   r   )r   r   )r2   r3   r4   r5   r   r   r6   r7   s   @r   r   r      sc         # # # # # #
u u u u u u u ur   r   c                  ,     e Zd ZdZd fd
ZddZ xZS )OracleTextSplitterz$Splitting text using Oracle chunker.rY   r   r[   r.   r   r   r   r   c                   || _         || _         t                      j        di | 	 ddl}	 ddl}n"# t          $ r}t          d          |d}~ww xY w|| _        || _        dS # t          $ r t          d          w xY w)zInitialize.r   Nr_   zjoracledb or json or both are not installed. Please install them. Recommendations: `pip install oracledb`. r   )	rY   r[   r   r   rw   rm   rn   	_oracledb_json)r   rY   r[   r   rw   rm   r   r   s          r   r   zOracleTextSplitter.__init__  s    	""6"""	KKK   !1   &DNDJJJ 	 	 	>  	s*   A& 4 A& 
AAAA& &B rf   r   	List[str]c                ~   	 ddl }n"# t          $ r}t          d          |d}~ww xY wg }	 d| j        j        _        | j                                        }|                    |j                   |	                    d|| j
                            | j                             	 |                                }|n<| j
                            |d                   }|                    |d	                    S|S # t           $ r6}t"                              d
|            t'          j                      d}~ww xY w)z&Split incoming text and return chunks.r   Nr_   F)r&   zUselect t.column_value from dbms_vector_chain.utl_to_chunks(:content, json(:params)) t)r&   r[   T
chunk_datarl   )rm   rn   r   ro   rp   rY   rq   setinputsizesCLOBrv   r   rx   r[   fetchoner   r   r~   r   r   r   r   )	r   rf   rm   r   splitsrq   r   dr   s	            r   
split_textzOracleTextSplitter.split_text  sn   	OOOO 	 	 	-  	 	16DN#.Y%%''F   777NNOz''44	    /oo'';J$$SV,,ao.../ M 	 	 	KK8B88999!!!	s(    
&!&CC< <
D<1D77D<)rY   r   r[   r.   r   r   r   r   )rf   r   r   r   )r2   r3   r4   r5   r   r   r6   r7   s   @r   r   r   |  sW        ..     2' ' ' ' ' ' ' 'r   r   )#
__future__r   rI   rw   loggingr   rC   rG   rF   r   html.parserr   typingr   r   r   r   r	   r
   r   langchain_core.document_loadersr   langchain_core.documentsr   langchain_text_splittersr   rm   r   	getLoggerr2   r   r   r9   r   r   r   r   r   <module>r      s   # " " " " "    				        " " " " " " I I I I I I I I I I I I I I I I I I 6 6 6 6 6 6 - - - - - - 1 1 1 1 1 1 $######		8	$	$ "    Z   < o o o o o o o od A A A A Aj A A AHC C C C C C C C C Cr   