
    Ngf                        d dl mZ d dlZd dlmZ d dlmZmZmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZ  G d	 d
e          ZdS )    )annotationsN)Path)AnyDictIterableListOptional)CallbackManagerForRetrieverRunDocument)BaseRetriever)
ConfigDictc                      e Zd ZU dZdZded<   	 ded<   	 dZded<   	 dZd	ed
<   	  ed          Z	e
	 	 d)d*d            Ze
ddd+d            Zd,dZ	 d-d.d#Ze
d$dd%d/d(            ZdS )0TFIDFRetrieverz`TF-IDF` retriever.

    Largely based on
    https://github.com/asvskartheek/Text-Retrieval/blob/master/TF-IDF%20Search%20Engine%20(SKLEARN).ipynb
    Nr   
vectorizerList[Document]docstfidf_array   intkT)arbitrary_types_allowedtextsIterable[str]	metadatasOptional[Iterable[dict]]tfidf_paramsOptional[Dict[str, Any]]kwargsreturnc                    	 ddl m} n# t          $ r t          d          w xY w|pi } |di |}|                    |          }|pd |D             }d t	          ||          D             } | d|||d|S )Nr   )TfidfVectorizerzNCould not import scikit-learn, please install with `pip install scikit-learn`.c              3     K   | ]}i V  d S N ).0_s     `/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/retrievers/tfidf.py	<genexpr>z,TFIDFRetriever.from_texts.<locals>.<genexpr>4   s"      !4!4"!4!4!4!4!4!4    c                6    g | ]\  }}t          ||           S )page_contentmetadatar   )r&   tms      r(   
<listcomp>z-TFIDFRetriever.from_texts.<locals>.<listcomp>5   s)    WWWAa!444WWWr*   r   r   r   r%   )sklearn.feature_extraction.textr"   ImportErrorfit_transformzip)	clsr   r   r   r   r"   r   r   r   s	            r(   
from_textszTFIDFRetriever.from_texts!   s    	GGGGGGG 	 	 	!  	 $)r$_44|44
 ..u554!4!4e!4!4!4	WWUIAVAVWWWsWjtWWPVWWWs   	 #)r   	documentsIterable[Document]c               P    t          d |D              \  }} | j        d|||d|S )Nc              3  2   K   | ]}|j         |j        fV  d S r$   r,   )r&   ds     r(   r)   z0TFIDFRetriever.from_documents.<locals>.<genexpr>@   s+       Q Q!!.!*!= Q Q Q Q Q Qr*   )r   r   r   r%   )r6   r8   )r7   r9   r   r   r   r   s         r(   from_documentszTFIDFRetriever.from_documents8   sQ      Q Qy Q Q QRys~ 
li
 
KQ
 
 	
r*   querystrrun_managerr
   c                    ddl m}  j                            |g          } | j        |                              d          } fd|                                 j         d          d d d         D             }|S )Nr   )cosine_similarity)c                *    g | ]}j         |         S r%   )r   )r&   iselfs     r(   r1   z:TFIDFRetriever._get_relevant_documents.<locals>.<listcomp>P   s    PPPty|PPPr*   rD   )sklearn.metrics.pairwiserC   r   	transformr   reshapeargsortr   )rG   r?   rA   rC   	query_vecresultsreturn_docss   `      r(   _get_relevant_documentsz&TFIDFRetriever._get_relevant_documentsE   s     	?>>>>>O--G
 
	 $#D$4i@@HH
 
 QPPPW__->->wyy-I$$B$-OPPPr*   tfidf_vectorizerfolder_path	file_nameNonec                z   	 dd l }n# t          $ r t          d          w xY wt          |          }|                    dd           |                    | j        || dz             t          || dz  d          5 }t          j        | j        | j	        f|           d d d            d S # 1 swxY w Y   d S )Nr   BCould not import joblib, please install with `pip install joblib`.T)exist_okparents.joblib.pklwb)
joblibr4   r   mkdirdumpr   openpickler   r   )rG   rQ   rR   r[   pathfs         r(   
save_localzTFIDFRetriever.save_localS   s*   
	MMMM 	 	 	T  	
 K  

D$
/// 	DOTy,A,A,A%ABBB $I++++T22 	:aKD$45q999	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	:s    !"B00B47B4F)allow_dangerous_deserializationrR   rc   boolc               d   	 ddl }n# t          $ r t          d          w xY w|st          d          t          |          }|                    || dz            }t          || dz  d          5 }t          j        |          \  }}	ddd           n# 1 swxY w Y    | |||	          S )	a  Load the retriever from local storage.

        Args:
            folder_path: Folder path to load from.
            allow_dangerous_deserialization: Whether to allow dangerous deserialization.
                Defaults to False.
                The deserialization relies on .joblib and .pkl files, which can be
                modified to deliver a malicious payload that results in execution of
                arbitrary code on your machine. You will need to set this to `True` to
                use deserialization. If you do this, make sure you trust the source of
                the file.
            file_name: File name to load from. Defaults to "tfidf_vectorizer".

        Returns:
            TFIDFRetriever: Loaded retriever.
        r   NrU   a  The de-serialization of this retriever is based on .joblib and .pkl files.Such files can be modified to deliver a malicious payload that results in execution of arbitrary code on your machine.You will need to set `allow_dangerous_deserialization` to `True` to load this retriever. If you do this, make sure you trust the source of the file, and you are responsible for validating the file came from a trusted source.rX   rY   rbr2   )r[   r4   
ValueErrorr   loadr^   r_   )
r7   rQ   rc   rR   r[   r`   r   ra   r   r   s
             r(   
load_localzTFIDFRetriever.load_locali   s2   0	MMMM 	 	 	T  	
 / 
	.	 	 	 K   [[9(=(=(=!=>>
 $I++++T22 	/a !'AD+	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/
 sjtMMMMs    !5BB B)NN)
r   r   r   r   r   r   r   r   r    r   )r9   r:   r   r   r   r   r    r   )r?   r@   rA   r
   r    r   )rP   )rQ   r@   rR   r@   r    rS   )rQ   r@   rc   rd   rR   r@   r    r   )__name__
__module____qualname____doc__r   __annotations__r   r   r   model_configclassmethodr8   r>   rO   rb   ri   r%   r*   r(   r   r      sO          JKAJJJJ(: $  L  /315	X X X X [X, 
 26	

 

 

 

 

 [

   " ,: : : : :, 
 16+5N 5N 5N 5N 5N [5N 5N 5Nr*   r   )
__future__r   r_   pathlibr   typingr   r   r   r   r	   langchain_core.callbacksr
   langchain_core.documentsr   langchain_core.retrieversr   pydanticr   r   r%   r*   r(   <module>rx      s    " " " " " "        6 6 6 6 6 6 6 6 6 6 6 6 6 6 C C C C C C - - - - - - 3 3 3 3 3 3      RN RN RN RN RN] RN RN RN RN RNr*   