
    Ngw#                        d dl mZ d dlZd dlZd dlmZ d dlmZmZm	Z	m
Z
mZ d dlmZ d dlmZ er
d dlZd dlmZ ddZ G d de          ZddZ G d de          ZeZdS )    )annotationsN)Path)TYPE_CHECKINGDictListOptionalUnionDocument)
BaseLoader)
EntityLikerowdictreturnstrc                H    | d         }| d         }| d         }| d| d| dS )zBCombine message information in a readable format ready to be used.datefromtextz on z: 

 )r   r   senderr   s       i/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/document_loaders/telegram.pyconcatenate_rowsr      s<    v;D[Fv;D,,$,,$,,,,    c                  "    e Zd ZdZd	dZd
dZdS )TelegramChatFileLoaderzLoad from `Telegram chat` dump.pathUnion[str, Path]c                    || _         dS )zInitialize with a path.N)	file_path)selfr   s     r   __init__zTelegramChatFileLoader.__init__   s    r   r   List[Document]c                4   t          | j                  }t          |d          5 }t          j        |          }ddd           n# 1 swxY w Y   d                    d |d         D                       }dt          |          i}t          ||          gS )	Load documents.utf8encodingN c              3     K   | ]:}|d          dk    t          |d         t                    )t          |          V  ;dS )typemessager   N)
isinstancer   r   ).0r-   s     r   	<genexpr>z.TelegramChatFileLoader.load.<locals>.<genexpr>'   sX       
 
v)++
76?C0P0P+ W%%++++
 
r   messagessourcepage_contentmetadata)r   r!   openjsonloadjoinr   r   )r"   pfdr   r5   s         r   r8   zTelegramChatFileLoader.load    s      !f%%% 		!A	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ww 
 
Z=
 
 
 
 

 c!ff%dX>>>??s   AAAN)r   r   r   r$   )__name__
__module____qualname____doc__r#   r8   r   r   r   r   r      sH        ))   @ @ @ @ @ @r   r   r   Union[str, List[str]]r$   c                   ddl m}  |dg dd          }t          | t                    r| g} d | D             }t	          |          D ]\  }}|dz   |j        d	<   g }|D ]}|                    |j                  }t	          |          D ]^\  }}t          ||j        d	         |d
          }|j        d	          d|j        d          |j        d<   |	                    |           _|S )zIConvert a string or list of strings to a list of Documents with metadata.r   )RecursiveCharacterTextSplitteri   )r   
.!?, r*      )
chunk_size
separatorschunk_overlapc                .    g | ]}t          |           S ))r4   r
   )r/   pages     r   
<listcomp>z text_to_docs.<locals>.<listcomp>>   s#    >>>t,,,>>>r      rP   )rP   chunkr3   -rS   r2   )
langchain_text_splittersrD   r.   r   	enumerater5   
split_textr4   r   append)	r   rD   text_splitter	page_docsidoc
doc_chunkschunksrS   s	            r   text_to_docsr_   1   sV   GGGGGG22>>>  M $ v>>>>>I I&& % %3 1uV J # #))#*:;;!&)) 	# 	#HAu"cl66JUV-W-W  C ),V(<%V%Vs|G?T%V%VCL"c""""	# r   c                  F    e Zd ZdZ	 	 	 	 	 dddZddZddZddZddZdS ) TelegramChatApiLoaderz)Load `Telegram` chat json directory dump.Ntelegram_data.jsonchat_entityOptional[EntityLike]api_idOptional[int]api_hashOptional[str]usernamer!   r   c                L    || _         || _        || _        || _        || _        dS )aI  Initialize with API parameters.

        Args:
            chat_entity: The chat entity to fetch data from.
            api_id: The API ID.
            api_hash: The API hash.
            username: The username.
            file_path: The file path to save the data to. Defaults to
                 "telegram_data.json".
        N)rc   re   rg   ri   r!   )r"   rc   re   rg   ri   r!   s         r   r#   zTelegramChatApiLoader.__init__V   s,    $ '  "r   r   Nonec                4  K   ddl m} g } || j        | j        | j                  4 d{V }|                    | j                  2 3 d{V }|j        du}|r|j        j        nd}|	                    |j
        |j        |j                                        |j        ||d           c6 	 ddd          d{V  n# 1 d{V swxY w Y   t          | j        dd          5 }t#          j        ||dd	
           ddd           dS # 1 swxY w Y   dS )z8Fetch data from Telegram API and save it as a JSON file.r   )TelegramClientN)	sender_idr   r   
message.idis_replyreply_to_idwzutf-8r(   F   )ensure_asciiindent)telethon.syncrm   ri   re   rg   iter_messagesrc   reply_toreply_to_msg_idrX   rn   r   r   	isoformatidr6   r!   r7   dump)r"   rm   dataclientr-   rp   rq   r;   s           r   fetch_data_from_telegramz.TelegramChatApiLoader.fetch_data_from_telegramn   s(     000000!>$-dmLL 	 	 	 	 	 	 	PV!'!5!5d6F!G!G       g"+47BJTg.>>PT%,%6 ' ' 6 6 8 8&-j$,'2 	 	 	 	 "H!G	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 $.#888 	=AIdAE!<<<<	= 	= 	= 	= 	= 	= 	= 	= 	= 	= 	= 	= 	= 	= 	= 	= 	= 	=s0   B?B+AB??
C	C	'DDDr}   pd.DataFramer   c                    dfd||d                   }||d                                       d	g
          d	                             t                    d	<   fd|d         D             }|S )a
  Create a dictionary of message threads from the given data.

        Args:
            data (pd.DataFrame): A DataFrame containing the conversation                 data with columns:
                - message.sender_id
                - text
                - date
                - message.id
                - is_reply
                - reply_to_id

        Returns:
            dict: A dictionary where the key is the parent message ID and                 the value is a list of message IDs in ascending order.
        	parent_idint
reply_datar   r   	List[int]c                    ||d         | k             d                                          }g }|D ]}||g ||          z   z  }|S )a^  
            Recursively find all replies to a given parent message ID.

            Args:
                parent_id (int): The parent message ID.
                reply_data (pd.DataFrame): A DataFrame containing reply messages.

            Returns:
                list: A list of message IDs that are replies to the parent message ID.
            rq   ro   )tolist)r   r   direct_repliesall_repliesreply_idfind_repliess        r   r   z@TelegramChatApiLoader._get_message_threads.<locals>.find_replies   sj     (
=(AY(NOfhh 
 K* O OzLL:,N,NNNr   rp   rq   )subsetc                2    i | ]}||g |          z   S r   r   )r/   r   r   reply_messagess     r   
<dictcomp>z>TelegramChatApiLoader._get_message_threads.<locals>.<dictcomp>   s>     
 
 
 	{\\)^%L%LL
 
 
r   ro   )r   r   r   r   r   r   )dropnaastyper   )r"   r}   parent_messagesmessage_threadsr   r   s       @@r   _get_message_threadsz*TelegramChatApiLoader._get_message_threads   s    $	 	 	 	 	 	0 Z 001 d:./66}o6NN )7}(E(L(LS(Q(Q}%
 
 
 
 
,\:
 
 

 r   r   Dict[int, List[int]]c                J   d}|                                 D ]y\  }}||d                             |                                       d          d                                         }d |D             }|d                    |          dz   z  }z|                                S )	aw  
        Combine the message texts for each parent message ID based             on the list of message threads.

        Args:
            message_threads (dict): A dictionary where the key is the parent message                 ID and the value is a list of message IDs in ascending order.
            data (pd.DataFrame): A DataFrame containing the conversation data:
                - message.sender_id
                - text
                - date
                - message.id
                - is_reply
                - reply_to_id

        Returns:
            str: A combined string of message texts sorted by date.
        r*   ro   r   )byr   c                ,    g | ]}t          |          S r   )r   )r/   elems     r   rQ   z@TelegramChatApiLoader._combine_message_texts.<locals>.<listcomp>   s    AAA4SYYAAAr   rJ   z.
)itemsisinsort_valuesr   r9   strip)r"   r   r}   combined_textr   message_idsmessage_textss          r   _combine_message_textsz,TelegramChatApiLoader._combine_message_texts   s    *  '6&;&;&=&= 
	= 
	="I{ T,',,[99:''0 
 BA=AAAM SXXm44u<<MM""$$$r   r$   c                f   | j         ]	 ddl}|                                 t          j        |                                            n# t          $ r t          d          w xY wt          | j                  }t          |d          5 }t          j        |          }ddd           n# 1 swxY w Y   	 ddl}n# t          $ r t          d          w xY w |j        |          } |j        |          }|                     |          }|                     ||          }	t#          |	          S )r&   Nr   zy`nest_asyncio` package not found.
                    please install with `pip install nest_asyncio`
                    r'   r(   zf`pandas` package not found. 
                please install with `pip install pandas`
                )rc   nest_asyncioapplyasynciorunr   ImportErrorr   r!   r6   r7   r8   pandasjson_normalize	DataFramer   r   r_   )
r"   r   r:   r;   r<   pdnormalized_messagesdfr   combined_textss
             r   r8   zTelegramChatApiLoader.load   s    '
####""$$$D99;;<<<<   !     !f%%% 		!A	 	 	 	 	 	 	 	 	 	 	 	 	 	 		 	 	 	  	 0b/22R\-..33B7744_bIIN+++s)   >A A"
B++B/2B/7B< <C)NNNNrb   )
rc   rd   re   rf   rg   rh   ri   rh   r!   r   )r   rk   )r}   r   r   r   )r   r   r}   r   r   r   r=   )	r>   r?   r@   rA   r#   r   r   r   r8   r   r   r   ra   ra   S   s        33 -1 $"&"&-# # # # #0= = = =.9 9 9 9v$% $% $% $%L", ", ", ", ", ",r   ra   )r   r   r   r   )r   rB   r   r$   )
__future__r   r   r7   pathlibr   typingr   r   r   r   r	   langchain_core.documentsr   )langchain_community.document_loaders.baser   r   r   telethon.hintsr   r   r   r_   ra   TelegramChatLoaderr   r   r   <module>r      sK   " " " " " "         = = = = = = = = = = = = = = - - - - - - @ @ @ @ @ @ *))))))- - - -@ @ @ @ @Z @ @ @0   Du, u, u, u, u,J u, u, u,r ,   r   