#! /usr/bin/env python3

from __future__ import annotations

import concurrent.futures
import json
import logging
import os
import sys
from abc import ABC, abstractmethod
from dataclasses import dataclass
from pathlib import Path
from typing import List, Optional, Union

import numpy as np
import pandas as pd
from tqdm import tqdm

from unstructured.metrics.element_type import (
    calculate_element_type_percent_match,
    get_element_type_frequency,
)
from unstructured.metrics.object_detection import (
    ObjectDetectionEvalProcessor,
)
from unstructured.metrics.table.table_eval import TableEvalProcessor
from unstructured.metrics.text_extraction import calculate_accuracy, calculate_percent_missing_text
from unstructured.metrics.utils import (
    _count,
    _display,
    _format_grouping_output,
    _mean,
    _prepare_output_cct,
    _pstdev,
    _read_text_file,
    _rename_aggregated_columns,
    _stdev,
    _write_to_file,
)

logger = logging.getLogger("unstructured.eval")
handler = logging.StreamHandler()
handler.name = "eval_log_handler"
formatter = logging.Formatter("%(asctime)s %(processName)-10s %(levelname)-8s %(message)s")
handler.setFormatter(formatter)

# Only want to add the handler once
if "eval_log_handler" not in [h.name for h in logger.handlers]:
    logger.addHandler(handler)

logger.setLevel(logging.DEBUG)

AGG_HEADERS = ["metric", "average", "sample_sd", "population_sd", "count"]
AGG_HEADERS_MAPPING = {
    "index": "metric",
    "_mean": "average",
    "_stdev": "sample_sd",
    "_pstdev": "population_sd",
    "_count": "count",
}
OUTPUT_TYPE_OPTIONS = ["json", "txt"]


@dataclass
class BaseMetricsCalculator(ABC):
    """Foundation class for specialized metrics calculators.

    It provides a common interface for calculating metrics based on outputs and ground truths.
    Those can be provided as either directories or lists of files.
    """

    documents_dir: str | Path
    ground_truths_dir: str | Path

    def __post_init__(self):
        """Discover all files in the provided directories."""
        self.documents_dir = Path(self.documents_dir).resolve()
        self.ground_truths_dir = Path(self.ground_truths_dir).resolve()

        # -- auto-discover all files in the directories --
        self._document_paths = [
            path.relative_to(self.documents_dir)
            for path in self.documents_dir.glob("*")
            if path.is_file()
        ]
        self._ground_truth_paths = [
            path.relative_to(self.ground_truths_dir)
            for path in self.ground_truths_dir.glob("*")
            if path.is_file()
        ]

    @property
    @abstractmethod
    def default_tsv_name(self):
        """Default name for the per-document metrics TSV file."""

    @property
    @abstractmethod
    def default_agg_tsv_name(self):
        """Default name for the aggregated metrics TSV file."""

    @abstractmethod
    def _generate_dataframes(self, rows: list) -> tuple[pd.DataFrame, pd.DataFrame]:
        """Generates pandas DataFrames from the list of rows.

        The first DF (index 0) is a dataframe containing metrics per file.
        The second DF (index 1) is a dataframe containing the aggregated
            metrics.
        """

    def on_files(
        self,
        document_paths: Optional[list[str | Path]] = None,
        ground_truth_paths: Optional[list[str | Path]] = None,
    ) -> BaseMetricsCalculator:
        """Overrides the default list of files to process."""
        if document_paths:
            self._document_paths = [Path(p) for p in document_paths]

        if ground_truth_paths:
            self._ground_truth_paths = [Path(p) for p in ground_truth_paths]

        return self

    def calculate(
        self,
        executor: Optional[concurrent.futures.Executor] = None,
        export_dir: Optional[str | Path] = None,
        visualize_progress: bool = True,
        display_agg_df: bool = True,
    ) -> pd.DataFrame:
        """Calculates metrics for each document using the provided executor.

        * Optionally, the results can be exported and displayed.
        * It loops through the list of structured output from all of `documents_dir` or
        selected files from `document_paths`, and compares them with gold-standard
        of the same file name under `ground_truths_dir` or selected files from `ground_truth_paths`.

        Args:
            executor: concurrent.futures.Executor instance
            export_dir: directory to export the results
            visualize_progress: whether to display progress bar
            display_agg_df: whether to display the aggregated results

        Returns:
            Metrics for each document as a pandas DataFrame
        """
        if executor is None:
            executor = self._default_executor()
        rows = self._process_all_documents(executor, visualize_progress)
        df, agg_df = self._generate_dataframes(rows)

        if export_dir is not None:
            _write_to_file(export_dir, self.default_tsv_name, df)
            _write_to_file(export_dir, self.default_agg_tsv_name, agg_df)

        if display_agg_df is True:
            _display(agg_df)
        return df

    @classmethod
    def _default_executor(cls):
        max_processors = int(os.environ.get("MAX_PROCESSES", os.cpu_count()))
        logger.info(f"Configuring a pool of {max_processors} processors for parallel processing.")
        return cls._get_executor_class()(max_workers=max_processors)

    @classmethod
    def _get_executor_class(
        cls,
    ) -> type[concurrent.futures.ThreadPoolExecutor] | type[concurrent.futures.ProcessPoolExecutor]:
        return concurrent.futures.ProcessPoolExecutor

    def _process_all_documents(
        self, executor: concurrent.futures.Executor, visualize_progress: bool
    ) -> list:
        """Triggers processing of all documents using the provided executor.

        Failures are omitted from the returned result.
        """
        with executor:
            return [
                row
                for row in tqdm(
                    executor.map(self._try_process_document, self._document_paths),
                    total=len(self._document_paths),
                    leave=False,
                    disable=not visualize_progress,
                )
                if row is not None
            ]

    def _try_process_document(self, doc: Path) -> Optional[list]:
        """Safe wrapper around the document processing method."""
        logger.info(f"Processing {doc}")
        try:
            return self._process_document(doc)
        except Exception as e:
            logger.error(f"Failed to process document {doc}: {e}")
            return None

    @abstractmethod
    def _process_document(self, doc: Path) -> Optional[list]:
        """Should return all metadata and metrics for a single document."""


@dataclass
class TableStructureMetricsCalculator(BaseMetricsCalculator):
    """Calculates the following metrics for tables:
        - tables found accuracy
        - table-level accuracy
        - element in column index accuracy
        - element in row index accuracy
        - element's column content accuracy
        - element's row content accuracy
    It also calculates the aggregated accuracy.
    """

    cutoff: Optional[float] = None
    weighted_average: bool = True
    include_false_positives: bool = True

    def __post_init__(self):
        super().__post_init__()

    @property
    def supported_metric_names(self):
        return [
            "total_tables",
            "table_level_acc",
            "table_detection_recall",
            "table_detection_precision",
            "table_detection_f1",
            "composite_structure_acc",
            "element_col_level_index_acc",
            "element_row_level_index_acc",
            "element_col_level_content_acc",
            "element_row_level_content_acc",
        ]

    @property
    def default_tsv_name(self):
        return "all-docs-table-structure-accuracy.tsv"

    @property
    def default_agg_tsv_name(self):
        return "aggregate-table-structure-accuracy.tsv"

    def _process_document(self, doc: Path) -> Optional[list]:
        doc_path = Path(doc)
        out_filename = doc_path.stem
        doctype = Path(out_filename).suffix[1:]
        src_gt_filename = out_filename + ".json"
        connector = doc_path.parts[-2] if len(doc_path.parts) > 1 else None

        if src_gt_filename in self._ground_truth_paths:  # type: ignore
            return None

        prediction_file = self.documents_dir / doc
        if not prediction_file.exists():
            logger.warning(f"Prediction file {prediction_file} does not exist, skipping")
            return None

        ground_truth_file = self.ground_truths_dir / src_gt_filename
        if not ground_truth_file.exists():
            logger.warning(f"Ground truth file {ground_truth_file} does not exist, skipping")
            return None

        processor_from_text_as_html = TableEvalProcessor.from_json_files(
            prediction_file=prediction_file,
            ground_truth_file=ground_truth_file,
            cutoff=self.cutoff,
            source_type="html",
        )
        report_from_html = processor_from_text_as_html.process_file()
        return [
            out_filename,
            doctype,
            connector,
            report_from_html.total_predicted_tables,
        ] + [getattr(report_from_html, metric) for metric in self.supported_metric_names]

    def _generate_dataframes(self, rows):
        headers = [
            "filename",
            "doctype",
            "connector",
            "total_predicted_tables",
        ] + self.supported_metric_names

        df = pd.DataFrame(rows, columns=headers)
        df["_table_weights"] = df["total_tables"]

        if self.include_false_positives:
            # we give false positive tables a 1 table worth of weight in computing table level acc
            df["_table_weights"][df.total_tables.eq(0) & df.total_predicted_tables.gt(0)] = 1

        # filter down to only those with actual and/or predicted tables
        has_tables_df = df[df["_table_weights"] > 0]

        if not self.weighted_average:
            # for all non zero elements assign them value 1
            df["_table_weights"] = df["_table_weights"].apply(
                lambda table_weight: 1 if table_weight != 0 else 0
            )

        if has_tables_df.empty:
            agg_df = pd.DataFrame(
                [[metric, None, None, None, 0] for metric in self.supported_metric_names]
            ).reset_index()
        else:
            element_metrics_results = {}
            for metric in self.supported_metric_names:
                metric_df = has_tables_df[has_tables_df[metric].notnull()]
                agg_metric = metric_df[metric].agg([_stdev, _pstdev, _count]).transpose()
                if metric.startswith("total_tables"):
                    agg_metric["_mean"] = metric_df[metric].mean()
                elif metric.startswith("table_level_acc"):
                    agg_metric["_mean"] = np.round(
                        np.average(metric_df[metric], weights=metric_df["_table_weights"]),
                        3,
                    )
                else:
                    # false positive tables do not contribute to table structure and content
                    # extraction metrics
                    agg_metric["_mean"] = np.round(
                        np.average(metric_df[metric], weights=metric_df["total_tables"]),
                        3,
                    )
                if agg_metric.empty:
                    element_metrics_results[metric] = pd.Series(
                        data=[None, None, None, 0], index=["_mean", "_stdev", "_pstdev", "_count"]
                    )
                else:
                    element_metrics_results[metric] = agg_metric
            agg_df = pd.DataFrame(element_metrics_results).transpose().reset_index()
        agg_df = agg_df.rename(columns=AGG_HEADERS_MAPPING)
        return df, agg_df


@dataclass
class TextExtractionMetricsCalculator(BaseMetricsCalculator):
    """Calculates text accuracy and percent missing between document and ground truth texts.

    It also calculates the aggregated accuracy and percent missing.
    """

    group_by: Optional[str] = None
    weights: tuple[int, int, int] = (1, 1, 1)
    document_type: str = "json"

    def __post_init__(self):
        super().__post_init__()
        self._validate_inputs()

    @property
    def default_tsv_name(self) -> str:
        return "all-docs-cct.tsv"

    @property
    def default_agg_tsv_name(self) -> str:
        return "aggregate-scores-cct.tsv"

    def calculate(
        self,
        executor: Optional[concurrent.futures.Executor] = None,
        export_dir: Optional[str | Path] = None,
        visualize_progress: bool = True,
        display_agg_df: bool = True,
    ) -> pd.DataFrame:
        """See the parent class for the method's docstring."""
        df = super().calculate(
            executor=executor,
            export_dir=export_dir,
            visualize_progress=visualize_progress,
            display_agg_df=display_agg_df,
        )

        if export_dir is not None and self.group_by:
            get_mean_grouping(self.group_by, df, export_dir, "text_extraction")
        return df

    def _validate_inputs(self):
        if not self._document_paths:
            logger.info("No output files to calculate to edit distances for, exiting")
            sys.exit(0)
        if self.document_type not in OUTPUT_TYPE_OPTIONS:
            raise ValueError(
                "Specified file type under `documents_dir` or `output_list` should be one of "
                f"`json` or `txt`. The given file type is {self.document_type}, exiting."
            )
        for path in self._document_paths:
            try:
                path.suffixes[-1]
            except IndexError:
                logger.error(f"File {path} does not have a suffix, skipping")
                continue
            if path.suffixes[-1] != f".{self.document_type}":
                logger.warning(
                    "The directory contains file type inconsistent with the given input. "
                    "Please note that some files will be skipped."
                )
        if not all(path.suffixes[-1] == f".{self.document_type}" for path in self._document_paths):
            logger.warning(
                "The directory contains file type inconsistent with the given input. "
                "Please note that some files will be skipped."
            )

    def _process_document(self, doc: Path) -> Optional[list]:
        filename = doc.stem
        doctype = doc.suffixes[0]
        connector = doc.parts[0] if len(doc.parts) > 1 else None

        output_cct, source_cct = self._get_ccts(doc)
        # NOTE(amadeusz): Levenshtein distance calculation takes too long
        # skip it if file sizes differ wildly
        if 0.5 < len(output_cct.encode()) / len(source_cct.encode()) < 2.0:
            accuracy = round(calculate_accuracy(output_cct, source_cct, self.weights), 3)
        else:
            # 0.01 to distinguish it was set manually
            accuracy = 0.01
        percent_missing = round(calculate_percent_missing_text(output_cct, source_cct), 3)
        return [filename, doctype, connector, accuracy, percent_missing]

    def _get_ccts(self, doc: Path) -> tuple[str, str]:
        output_cct = _prepare_output_cct(
            docpath=self.documents_dir / doc, output_type=self.document_type
        )
        source_cct = _read_text_file(self.ground_truths_dir / doc.with_suffix(".txt"))

        return output_cct, source_cct

    def _generate_dataframes(self, rows):
        headers = ["filename", "doctype", "connector", "cct-accuracy", "cct-%missing"]
        df = pd.DataFrame(rows, columns=headers)

        acc = df[["cct-accuracy"]].agg([_mean, _stdev, _pstdev, _count]).transpose()
        miss = df[["cct-%missing"]].agg([_mean, _stdev, _pstdev, _count]).transpose()
        if acc.shape[1] == 0 and miss.shape[1] == 0:
            agg_df = pd.DataFrame(columns=AGG_HEADERS)
        else:
            agg_df = pd.concat((acc, miss)).reset_index()
            agg_df.columns = AGG_HEADERS

        return df, agg_df


@dataclass
class ElementTypeMetricsCalculator(BaseMetricsCalculator):
    """
    Calculates element type frequency accuracy, percent missing and
    aggregated accuracy between document and ground truth.
    """

    group_by: Optional[str] = None

    def calculate(
        self,
        executor: Optional[concurrent.futures.Executor] = None,
        export_dir: Optional[str | Path] = None,
        visualize_progress: bool = True,
        display_agg_df: bool = False,
    ) -> pd.DataFrame:
        """See the parent class for the method's docstring."""
        df = super().calculate(
            executor=executor,
            export_dir=export_dir,
            visualize_progress=visualize_progress,
            display_agg_df=display_agg_df,
        )

        if export_dir is not None and self.group_by:
            get_mean_grouping(self.group_by, df, export_dir, "element_type")
        return df

    @property
    def default_tsv_name(self) -> str:
        return "all-docs-element-type-frequency.tsv"

    @property
    def default_agg_tsv_name(self) -> str:
        return "aggregate-scores-element-type.tsv"

    def _process_document(self, doc: Path) -> Optional[list]:
        filename = doc.stem
        doctype = doc.suffixes[0]
        connector = doc.parts[0] if len(doc.parts) > 1 else None

        output = get_element_type_frequency(_read_text_file(self.documents_dir / doc))
        source = get_element_type_frequency(
            _read_text_file(self.ground_truths_dir / doc.with_suffix(".json"))
        )
        accuracy = round(calculate_element_type_percent_match(output, source), 3)
        return [filename, doctype, connector, accuracy]

    def _generate_dataframes(self, rows):
        headers = ["filename", "doctype", "connector", "element-type-accuracy"]
        df = pd.DataFrame(rows, columns=headers)
        if df.empty:
            agg_df = pd.DataFrame(["element-type-accuracy", None, None, None, 0]).transpose()
        else:
            agg_df = df.agg({"element-type-accuracy": [_mean, _stdev, _pstdev, _count]}).transpose()
            agg_df = agg_df.reset_index()

        agg_df.columns = AGG_HEADERS

        return df, agg_df


def get_mean_grouping(
    group_by: str,
    data_input: Union[pd.DataFrame, str],
    export_dir: str,
    eval_name: str,
    agg_name: Optional[str] = None,
    export_filename: Optional[str] = None,
) -> None:
    """Aggregates accuracy and missing metrics by column name 'doctype' or 'connector',
    or 'all' for all rows. Export to TSV.
    If `all`, passing export_name is recommended.

    Args:
        group_by (str): Grouping category ('doctype' or 'connector' or 'all').
        data_input (Union[pd.DataFrame, str]): DataFrame or path to a CSV/TSV file.
        export_dir (str): Directory for the exported TSV file.
        eval_name (str): Evaluated metric ('text_extraction' or 'element_type').
        agg_name (str, optional): String to use with export filename. Default is `cct` for
            group_by `text_extraction` and `element-type` for `element_type`
        export_name (str, optional): Export filename.
    """
    if group_by not in ("doctype", "connector") and group_by != "all":
        raise ValueError("Invalid grouping category. Returning a non-group evaluation.")

    if eval_name == "text_extraction":
        agg_fields = ["cct-accuracy", "cct-%missing"]
        agg_name = "cct"
    elif eval_name == "element_type":
        agg_fields = ["element-type-accuracy"]
        agg_name = "element-type"
    elif eval_name == "object_detection":
        agg_fields = ["f1_score", "m_ap"]
        agg_name = "object-detection"
    else:
        raise ValueError(
            f"Unknown metric for eval {eval_name}. "
            f"Expected `text_extraction` or `element_type` or `table_extraction`."
        )

    if isinstance(data_input, str):
        if not os.path.exists(data_input):
            raise FileNotFoundError(f"File {data_input} not found.")
        if data_input.endswith(".csv"):
            df = pd.read_csv(data_input, header=None)
        elif data_input.endswith(".tsv"):
            df = pd.read_csv(data_input, sep="\t")
        elif data_input.endswith(".txt"):
            df = pd.read_csv(data_input, sep="\t", header=None)
        else:
            raise ValueError("Please provide a .csv or .tsv file.")
    else:
        df = data_input

    if df.empty:
        raise SystemExit("Data is empty. Exiting.")
    elif group_by != "all" and (group_by not in df.columns or df[group_by].isnull().all()):
        raise SystemExit(
            f"Data cannot be aggregated by `{group_by}`."
            f" Check if it's empty or the column is missing/empty."
        )

    grouped_df = []
    if group_by and group_by != "all":
        for field in agg_fields:
            grouped_df.append(
                _rename_aggregated_columns(
                    df.groupby(group_by).agg({field: [_mean, _stdev, _pstdev, _count]})
                )
            )
    if group_by == "all":
        df["grouping_key"] = 0
        for field in agg_fields:
            grouped_df.append(
                _rename_aggregated_columns(
                    df.groupby("grouping_key").agg({field: [_mean, _stdev, _pstdev, _count]})
                )
            )
    grouped_df = _format_grouping_output(*grouped_df)
    if "grouping_key" in grouped_df.columns.get_level_values(0):
        grouped_df = grouped_df.drop("grouping_key", axis=1, level=0)

    if export_filename:
        if not export_filename.endswith(".tsv"):
            export_filename = export_filename + ".tsv"
        _write_to_file(export_dir, export_filename, grouped_df)
    else:
        _write_to_file(export_dir, f"all-{group_by}-agg-{agg_name}.tsv", grouped_df)


def filter_metrics(
    data_input: Union[str, pd.DataFrame],
    filter_list: Union[str, List[str]],
    filter_by: str = "filename",
    export_filename: Optional[str] = None,
    export_dir: str = "metrics",
    return_type: str = "file",
) -> Optional[pd.DataFrame]:
    """Reads the data_input file and filter only selected row available in filter_list.

    Args:
        data_input (str, dataframe): the source data, path to file or dataframe
        filter_list (str, list): the filter, path to file or list of string
        filter_by (str): data_input's column to filter the filter_list to
        export_filename (str, optional): export filename. required when return_type is "file"
        export_dir (str, optional): export directory. default to <current directory>/metrics
        return_type (str): "file" or "dataframe"
    """
    if isinstance(data_input, str):
        if not os.path.exists(data_input):
            raise FileNotFoundError(f"File {data_input} not found.")
        if data_input.endswith(".csv"):
            df = pd.read_csv(data_input, header=None)
        elif data_input.endswith(".tsv"):
            df = pd.read_csv(data_input, sep="\t")
        elif data_input.endswith(".txt"):
            df = pd.read_csv(data_input, sep="\t", header=None)
        else:
            raise ValueError("Please provide a .csv or .tsv file.")
    else:
        df = data_input

    if isinstance(filter_list, str):
        if not os.path.exists(filter_list):
            raise FileNotFoundError(f"File {filter_list} not found.")
        if filter_list.endswith(".csv"):
            filter_df = pd.read_csv(filter_list, header=None)
        elif filter_list.endswith(".tsv"):
            filter_df = pd.read_csv(filter_list, sep="\t")
        elif filter_list.endswith(".txt"):
            filter_df = pd.read_csv(filter_list, sep="\t", header=None)
        else:
            raise ValueError("Please provide a .csv or .tsv file.")
        filter_list = filter_df.iloc[:, 0].astype(str).values.tolist()
    elif not isinstance(filter_list, list):
        raise ValueError("Please provide a List of strings or path to file.")

    if filter_by not in df.columns:
        raise ValueError("`filter_by` key does not exists in the data provided.")

    res = df[df[filter_by].isin(filter_list)]

    if res.empty:
        raise SystemExit("No common file names between data_input and filter_list. Exiting.")

    if return_type == "dataframe":
        return res
    elif return_type == "file" and export_filename:
        _write_to_file(export_dir, export_filename, res)
    elif return_type == "file" and not export_filename:
        raise ValueError("Please provide `export_filename`.")
    else:
        raise ValueError("Return type must be either `dataframe` or `file`.")


@dataclass
class ObjectDetectionMetricsCalculatorBase(BaseMetricsCalculator, ABC):
    """
    Calculates object detection metrics for each document:
    - f1 score
    - precision
    - recall
    - average precision (mAP)
    It also calculates aggregated metrics.
    """

    def __post_init__(self):
        super().__post_init__()
        self._document_paths = [
            path.relative_to(self.documents_dir)
            for path in self.documents_dir.rglob("analysis/*/layout_dump/object_detection.json")
            if path.is_file()
        ]

    @property
    def supported_metric_names(self):
        return ["f1_score", "precision", "recall", "m_ap"]

    @property
    def default_tsv_name(self):
        return "all-docs-object-detection-metrics.tsv"

    @property
    def default_agg_tsv_name(self):
        return "aggregate-object-detection-metrics.tsv"

    def _find_file_in_ground_truth(self, file_stem: str) -> Optional[Path]:
        """Find the file corresponding to OD model dump file among the set of ground truth files

        The files in ground truth paths keep the original extension and have .json suffix added,
        e.g.:
        some_document.pdf.json
        poster.jpg.json

        To compare to `file_stem` we need to take the prefix part of the file, thus double-stem
        is applied.
        """
        for path in self._ground_truth_paths:
            if Path(path.stem).stem == file_stem:
                return path
        return None

    def _get_paths(self, doc: Path) -> tuple(str, Path, Path):
        """Resolves ground doctype, prediction file path and ground truth path.

        As OD dump directory structure differes from other simple outputs, it needs
        a specific processing to match the output OD dump file with corresponding
        OD GT file.

        The outputs are placed in a dicrectory structure:

        analysis
        |- document_name
            |- layout_dump
                |- object_detection.json
            |- bboxes # not used in this evaluation

        and the GT file is pleced in od_gt directory for given dataset

        dataset_name
        |- od_gt
            |- document_name.pdf.json

        Args:
            doc (Path): path to the OD dump file

        Returns:
            tuple: doctype, prediction file path, ground truth path
        """
        od_dump_path = Path(doc)
        file_stem = od_dump_path.parts[-3]  # we take the `document_name` - so the filename stem

        src_gt_filename = self._find_file_in_ground_truth(file_stem)

        if src_gt_filename not in self._ground_truth_paths:
            raise ValueError(f"Ground truth file {src_gt_filename} not found in list of GT files")

        doctype = Path(src_gt_filename.stem).suffix[1:]

        prediction_file = self.documents_dir / doc
        if not prediction_file.exists():
            logger.warning(f"Prediction file {prediction_file} does not exist, skipping")
            raise ValueError(f"Prediction file {prediction_file} does not exist")

        ground_truth_file = self.ground_truths_dir / src_gt_filename
        if not ground_truth_file.exists():
            logger.warning(f"Ground truth file {ground_truth_file} does not exist, skipping")
            raise ValueError(f"Ground truth file {ground_truth_file} does not exist")

        return doctype, prediction_file, ground_truth_file

    def _generate_dataframes(self, rows) -> tuple[pd.DataFrame, pd.DataFrame]:
        headers = ["filename", "doctype", "connector"] + self.supported_metric_names
        df = pd.DataFrame(rows, columns=headers)

        if df.empty:
            agg_df = pd.DataFrame(columns=AGG_HEADERS)
        else:
            element_metrics_results = {}
            for metric in self.supported_metric_names:
                metric_df = df[df[metric].notnull()]
                agg_metric = metric_df[metric].agg([_mean, _stdev, _pstdev, _count]).transpose()
                if agg_metric.empty:
                    element_metrics_results[metric] = pd.Series(
                        data=[None, None, None, 0], index=["_mean", "_stdev", "_pstdev", "_count"]
                    )
                else:
                    element_metrics_results[metric] = agg_metric
            agg_df = pd.DataFrame(element_metrics_results).transpose().reset_index()
        agg_df.columns = AGG_HEADERS

        return df, agg_df


class ObjectDetectionPerClassMetricsCalculator(ObjectDetectionMetricsCalculatorBase):

    def __post_init__(self):
        super().__post_init__()
        self.per_class_metric_names: list[str] | None = None
        self._set_supported_metrics()

    @property
    def supported_metric_names(self):
        if self.per_class_metric_names:
            return self.per_class_metric_names
        else:
            raise ValueError("per_class_metrics not initialized - cannot get class names")

    @property
    def default_tsv_name(self):
        return "all-docs-object-detection-metrics-per-class.tsv"

    @property
    def default_agg_tsv_name(self):
        return "aggregate-object-detection-metrics-per-class.tsv"

    def _process_document(self, doc: Path) -> Optional[list]:
        """Calculate both class-aggregated and per-class metrics for a single document.

        Args:
            doc (Path): path to the OD dump file

        Returns:
            tuple: a tuple of aggregated and per-class metrics for a single document
        """
        try:
            doctype, prediction_file, ground_truth_file = self._get_paths(doc)
        except ValueError as e:
            logger.error(f"Failed to process document {doc}: {e}")
            return None

        processor = ObjectDetectionEvalProcessor.from_json_files(
            prediction_file_path=prediction_file,
            ground_truth_file_path=ground_truth_file,
        )
        _, per_class_metrics = processor.get_metrics()

        per_class_metrics_row = [
            ground_truth_file.stem,
            doctype,
            None,  # connector
        ]

        for combined_metric_name in self.supported_metric_names:
            metric = "_".join(combined_metric_name.split("_")[:-1])
            class_name = combined_metric_name.split("_")[-1]
            class_metrics = getattr(per_class_metrics, metric)
            per_class_metrics_row.append(class_metrics[class_name])
        return per_class_metrics_row

    def _set_supported_metrics(self):
        """Sets the supported metrics based on the classes found in the ground truth files.
        The difference between per class and aggregated calculator is that the list of classes
        (so the metrics) bases on the contents of the GT / prediction files.
        """
        metrics = ["f1_score", "precision", "recall", "m_ap"]
        classes = set()
        for gt_file in self._ground_truth_paths:
            gt_file_path = self.ground_truths_dir / gt_file
            with open(gt_file_path) as f:
                gt = json.load(f)
                gt_classes = gt["object_detection_classes"]
                classes.update(gt_classes)
        per_class_metric_names = []
        for metric in metrics:
            for class_name in classes:
                per_class_metric_names.append(f"{metric}_{class_name}")
        self.per_class_metric_names = sorted(per_class_metric_names)


class ObjectDetectionAggregatedMetricsCalculator(ObjectDetectionMetricsCalculatorBase):
    """Calculates object detection metrics for each document and aggregates by all classes"""

    @property
    def supported_metric_names(self):
        return ["f1_score", "precision", "recall", "m_ap"]

    @property
    def default_tsv_name(self):
        return "all-docs-object-detection-metrics.tsv"

    @property
    def default_agg_tsv_name(self):
        return "aggregate-object-detection-metrics.tsv"

    def _process_document(self, doc: Path) -> Optional[list]:
        """Calculate both class-aggregated and per-class metrics for a single document.

        Args:
            doc (Path): path to the OD dump file

        Returns:
            list: a list of aggregated metrics for a single document
        """
        try:
            doctype, prediction_file, ground_truth_file = self._get_paths(doc)
        except ValueError as e:
            logger.error(f"Failed to process document {doc}: {e}")
            return None

        processor = ObjectDetectionEvalProcessor.from_json_files(
            prediction_file_path=prediction_file,
            ground_truth_file_path=ground_truth_file,
        )
        metrics, _ = processor.get_metrics()

        return [
            ground_truth_file.stem,
            doctype,
            None,  # connector
        ] + [getattr(metrics, metric) for metric in self.supported_metric_names]