from __future__ import annotations

import gzip

from . import InputExample


class PairedFilesReader:
    """Reads in the a Pair Dataset, split in two files"""

    def __init__(self, filepaths):
        self.filepaths = filepaths

    def get_examples(self, max_examples=0):
        fIns = []
        for filepath in self.filepaths:
            fIn = (
                gzip.open(filepath, "rt", encoding="utf-8")
                if filepath.endswith(".gz")
                else open(filepath, encoding="utf-8")
            )
            fIns.append(fIn)

        examples = []

        eof = False
        while not eof:
            texts = []
            for fIn in fIns:
                text = fIn.readline()

                if text == "":
                    eof = True
                    break

                texts.append(text)

            if eof:
                break

            examples.append(InputExample(guid=str(len(examples)), texts=texts, label=1))
            if max_examples > 0 and len(examples) >= max_examples:
                break

        return examples
