import json
import logging
import os
import tempfile
import zipfile
from pathlib import Path
from typing import Iterator, List, Union

from langchain_core.chat_loaders import BaseChatLoader
from langchain_core.chat_sessions import ChatSession
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage

logger = logging.getLogger(__name__)


class TelegramChatLoader(BaseChatLoader):
    """Load `telegram` conversations to LangChain chat messages.

    To export, use the Telegram Desktop app from
    https://desktop.telegram.org/, select a conversation, click the three dots
    in the top right corner, and select "Export chat history". Then select
    "Machine-readable JSON" (preferred) to export. Note: the 'lite' versions of
    the desktop app (like "Telegram for MacOS") do not support exporting chat
    history.
    """

    def __init__(
        self,
        path: Union[str, Path],
    ):
        """Initialize the TelegramChatLoader.

        Args:
            path (Union[str, Path]): Path to the exported Telegram chat zip,
                 directory, json, or HTML file.
        """
        self.path = path if isinstance(path, str) else str(path)

    @staticmethod
    def _load_single_chat_session_html(file_path: str) -> ChatSession:
        """Load a single chat session from an HTML file.

        Args:
            file_path (str): Path to the HTML file.

        Returns:
            ChatSession: The loaded chat session.
        """
        try:
            from bs4 import BeautifulSoup
        except ImportError:
            raise ImportError(
                "Please install the 'beautifulsoup4' package to load"
                " Telegram HTML files. You can do this by running"
                "'pip install beautifulsoup4' in your terminal."
            )
        with open(file_path, "r", encoding="utf-8") as file:
            soup = BeautifulSoup(file, "html.parser")

        results: List[Union[HumanMessage, AIMessage]] = []
        previous_sender = None
        for message in soup.select(".message.default"):
            timestamp = message.select_one(".pull_right.date.details")["title"]
            from_name_element = message.select_one(".from_name")
            if from_name_element is None and previous_sender is None:
                logger.debug("from_name not found in message")
                continue
            elif from_name_element is None:
                from_name = previous_sender
            else:
                from_name = from_name_element.text.strip()
            text = message.select_one(".text").text.strip()
            results.append(
                HumanMessage(
                    content=text,
                    additional_kwargs={
                        "sender": from_name,
                        "events": [{"message_time": timestamp}],
                    },
                )
            )
            previous_sender = from_name

        return ChatSession(messages=results)

    @staticmethod
    def _load_single_chat_session_json(file_path: str) -> ChatSession:
        """Load a single chat session from a JSON file.

        Args:
            file_path (str): Path to the JSON file.

        Returns:
            ChatSession: The loaded chat session.
        """
        with open(file_path, "r", encoding="utf-8") as file:
            data = json.load(file)

        messages = data.get("messages", [])
        results: List[BaseMessage] = []
        for message in messages:
            text = message.get("text", "")
            timestamp = message.get("date", "")
            from_name = message.get("from", "")

            results.append(
                HumanMessage(
                    content=text,
                    additional_kwargs={
                        "sender": from_name,
                        "events": [{"message_time": timestamp}],
                    },
                )
            )

        return ChatSession(messages=results)

    @staticmethod
    def _iterate_files(path: str) -> Iterator[str]:
        """Iterate over files in a directory or zip file.

        Args:
            path (str): Path to the directory or zip file.

        Yields:
            str: Path to each file.
        """
        if os.path.isfile(path) and path.endswith((".html", ".json")):
            yield path
        elif os.path.isdir(path):
            for root, _, files in os.walk(path):
                for file in files:
                    if file.endswith((".html", ".json")):
                        yield os.path.join(root, file)
        elif zipfile.is_zipfile(path):
            with zipfile.ZipFile(path) as zip_file:
                for file in zip_file.namelist():
                    if file.endswith((".html", ".json")):
                        with tempfile.TemporaryDirectory() as temp_dir:
                            yield zip_file.extract(file, path=temp_dir)

    def lazy_load(self) -> Iterator[ChatSession]:
        """Lazy load the messages from the chat file and yield them
        in as chat sessions.

        Yields:
            ChatSession: The loaded chat session.
        """
        for file_path in self._iterate_files(self.path):
            if file_path.endswith(".html"):
                yield self._load_single_chat_session_html(file_path)
            elif file_path.endswith(".json"):
                yield self._load_single_chat_session_json(file_path)
