/*
 * Decompiled with CFR 0.152.
 */
package ai.djl.basicdataset;

import ai.djl.Application;
import ai.djl.basicdataset.BasicDatasets;
import ai.djl.basicdataset.TextDataset;
import ai.djl.modality.nlp.embedding.EmbeddingException;
import ai.djl.ndarray.NDList;
import ai.djl.ndarray.NDManager;
import ai.djl.repository.Artifact;
import ai.djl.repository.MRL;
import ai.djl.repository.Repository;
import ai.djl.repository.dataset.ZooDataset;
import ai.djl.training.dataset.Dataset;
import ai.djl.training.dataset.Record;
import java.io.BufferedReader;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;

public class TatoebaEnglishFrenchDataset
extends TextDataset
implements ZooDataset {
    private static final String VERSION = "1.0";
    private static final String ARTIFACT_ID = "tatoeba-en-fr";
    private Repository repository;
    private Artifact artifact;
    private Dataset.Usage usage;
    private boolean prepared;

    protected TatoebaEnglishFrenchDataset(Builder builder) {
        super(builder);
        this.repository = builder.repository;
        this.artifact = builder.artifact;
        this.usage = builder.usage;
    }

    public static Builder builder() {
        return new Builder();
    }

    public MRL getMrl() {
        return MRL.dataset((Application)Application.NLP.MACHINE_TRANSLATION, (String)"ai.djl.basicdataset", (String)ARTIFACT_ID);
    }

    public Repository getRepository() {
        return this.repository;
    }

    public Artifact getArtifact() {
        return this.artifact;
    }

    public Dataset.Usage getUsage() {
        return this.usage;
    }

    public boolean isPrepared() {
        return this.prepared;
    }

    public void setPrepared(boolean prepared) {
        this.prepared = prepared;
    }

    public void useDefaultArtifact() throws IOException {
        this.artifact = this.repository.resolve(this.getMrl(), VERSION, null);
    }

    public void prepareData(Dataset.Usage usage) throws IOException {
        Path usagePath;
        Path root = this.repository.getResourceDirectory(this.artifact);
        switch (usage) {
            case TRAIN: {
                usagePath = Paths.get("fra-eng-train.txt", new String[0]);
                break;
            }
            case TEST: {
                usagePath = Paths.get("fra-eng-test.txt", new String[0]);
                break;
            }
            default: {
                throw new UnsupportedOperationException("Validation data not available.");
            }
        }
        usagePath = root.resolve(usagePath);
        ArrayList<String> sourceTextData = new ArrayList<String>();
        ArrayList<String> targetTextData = new ArrayList<String>();
        try (BufferedReader reader = Files.newBufferedReader(usagePath);){
            String row;
            while ((row = reader.readLine()) != null) {
                String[] text = row.split("\t");
                sourceTextData.add(text[0]);
                targetTextData.add(text[1]);
            }
        }
        try {
            this.preprocess(sourceTextData, true);
            this.preprocess(targetTextData, false);
        }
        catch (EmbeddingException e) {
            throw new IOException(e.getMessage(), e);
        }
    }

    public Record get(NDManager manager, long index) {
        NDList data = new NDList();
        NDList labels = new NDList();
        data.add((Object)this.sourceTextData.getEmbedding(manager, index));
        labels.add((Object)this.targetTextData.getEmbedding(manager, index));
        return new Record(data, labels);
    }

    protected long availableSize() {
        return this.sourceTextData.getSize();
    }

    public static class Builder
    extends TextDataset.Builder<Builder> {
        private Repository repository = BasicDatasets.REPOSITORY;
        private Artifact artifact;
        private Dataset.Usage usage = Dataset.Usage.TRAIN;

        Builder() {
        }

        public Builder self() {
            return this;
        }

        public Builder optUsage(Dataset.Usage usage) {
            this.usage = usage;
            return this.self();
        }

        public Builder optRepository(Repository repository) {
            this.repository = repository;
            return this.self();
        }

        public Builder optArtifact(Artifact artifact) {
            this.artifact = artifact;
            return this.self();
        }

        public TatoebaEnglishFrenchDataset build() {
            return new TatoebaEnglishFrenchDataset(this);
        }
    }
}

