/*
 * Decompiled with CFR 0.152.
 */
package com.yahoo.vespasignificance;

import ai.vespa.vespasignificance.generate.FormatStrategy;
import ai.vespa.vespasignificance.generate.JsonlDocumentFormatStrategy;
import ai.vespa.vespasignificance.generate.VstsvFormatStrategy;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectWriter;
import com.yahoo.language.Language;
import com.yahoo.language.opennlp.OpenNlpLinguistics;
import com.yahoo.language.process.Tokenizer;
import com.yahoo.language.significance.impl.DocumentFrequencyFile;
import com.yahoo.language.significance.impl.SignificanceModelFile;
import com.yahoo.vespasignificance.ClientParameters;
import com.yahoo.vespasignificance.CommandLineOptions;
import io.airlift.compress.zstd.ZstdInputStream;
import io.airlift.compress.zstd.ZstdOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.stream.Collectors;

public class SignificanceModelGenerator {
    private final ClientParameters clientParameters;
    private FormatStrategy formatStrategy;
    private ObjectMapper objectMapper;
    private InputFormat format;
    private String outputFile;
    private boolean useZstCompression;
    private static final String VERSION = "1.0";
    private static final String ID = "1";
    private static final String SIGNIFICANCE_DESCRIPTION = "Significance model for input file";
    private static final String DOC_FREQ_DESCRIPTION = "Document frequency for language";

    public SignificanceModelGenerator(ClientParameters clientParameters) {
        this.clientParameters = clientParameters;
    }

    public int run() {
        try {
            this.setFormat();
            if (this.format == InputFormat.jsonl) {
                this.validateJsonlFormatRequiredFields();
            }
            this.resolveAndValidateOutputFile();
            String language = Objects.requireNonNullElse(this.clientParameters.language, "un");
            List<Language> languageKeyParts = Arrays.stream(language.split(",")).map(Language::fromLanguageTag).collect(Collectors.toList());
            Language tokenizationLanguage = (Language)languageKeyParts.get(0);
            OpenNlpLinguistics openNlpLinguistics = new OpenNlpLinguistics();
            Tokenizer tokenizer = openNlpLinguistics.getTokenizer();
            this.objectMapper = new ObjectMapper();
            this.useZstCompression = this.clientParameters.zstCompression;
            Path input = Paths.get(this.clientParameters.inputFile, new String[0]);
            if (!Files.exists(input, new LinkOption[0])) {
                System.err.println("Error: input file " + String.valueOf(input) + " does not exist");
                return 1;
            }
            switch (this.format) {
                case jsonl: {
                    this.formatStrategy = new JsonlDocumentFormatStrategy(input, tokenizer, tokenizationLanguage, languageKeyParts, this.clientParameters.field);
                    break;
                }
                case vstsv: {
                    this.formatStrategy = new VstsvFormatStrategy(input);
                }
            }
            this.generate();
        }
        catch (GenerateFailure f) {
            return 1;
        }
        catch (IOException e) {
            System.err.println("I/O error: " + e.getMessage());
            return 1;
        }
        return 0;
    }

    private void setFormat() {
        try {
            String fmt = Objects.requireNonNullElse(this.clientParameters.format, InputFormat.jsonl.toString());
            this.format = InputFormat.valueOf(fmt.toLowerCase(Locale.ROOT));
        }
        catch (IllegalArgumentException ignored) {
            System.err.println("Error: invalid format specified: " + this.clientParameters.format);
            CommandLineOptions.printGenerateHelp();
            System.err.println("Use --format FORMAT to specify format.");
            System.err.println("Allowed formats: " + InputFormat.allowed());
            throw new GenerateFailure();
        }
    }

    private void validateJsonlFormatRequiredFields() {
        ArrayList<String> fieldsMissing = new ArrayList<String>();
        if (this.clientParameters.language == null) {
            fieldsMissing.add("language");
        }
        if (this.clientParameters.inputFile == null) {
            fieldsMissing.add("in");
        }
        if (this.clientParameters.outputFile == null) {
            fieldsMissing.add("out");
        }
        if (this.clientParameters.field == null) {
            fieldsMissing.add("field");
        }
        if (!fieldsMissing.isEmpty()) {
            System.err.println("Missing required options: " + String.join((CharSequence)", ", fieldsMissing));
            CommandLineOptions.printGenerateHelp();
            throw new GenerateFailure();
        }
    }

    private void resolveAndValidateOutputFile() {
        Object outputFile;
        if (this.clientParameters.outputFile == null) {
            outputFile = "model.json";
            if (this.clientParameters.zstCompression) {
                outputFile = (String)outputFile + ".zst";
            }
        } else {
            outputFile = this.clientParameters.outputFile;
        }
        if (this.format == InputFormat.jsonl) {
            if (this.clientParameters.zstCompression && !((String)outputFile).endsWith(".zst")) {
                System.err.println("Output file must have .zst extension when using zst compression");
                CommandLineOptions.printGenerateHelp();
                throw new GenerateFailure();
            }
            if (!this.clientParameters.zstCompression && ((String)outputFile).endsWith(".zst")) {
                System.err.println("Output file must not have .zst extension when not using zst compression");
                CommandLineOptions.printGenerateHelp();
                throw new GenerateFailure();
            }
        }
        this.outputFile = outputFile;
    }

    private void generate() throws IOException {
        SignificanceModelFile modelFile;
        FormatStrategy.Result res = this.formatStrategy.build();
        SortedMap<String, Long> df = res.termDf();
        final long pageCount = res.documentCount();
        final String languagesKey = this.formatStrategy.languageKey();
        final Map finalDf = df.entrySet().stream().filter(e -> (Long)e.getValue() > 1L).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (a, b) -> a, TreeMap::new));
        System.out.println("Total documents processed: " + pageCount + ", unique words: " + finalDf.size());
        File outputFile = Paths.get(this.outputFile, new String[0]).toFile();
        if (outputFile.exists()) {
            try (Object in = outputFile.toString().endsWith(".zst") ? new ZstdInputStream((InputStream)new FileInputStream(outputFile)) : new FileInputStream(outputFile);){
                modelFile = (SignificanceModelFile)this.objectMapper.readValue((InputStream)in, SignificanceModelFile.class);
            }
            modelFile.addLanguage(languagesKey, new DocumentFrequencyFile(DOC_FREQ_DESCRIPTION, pageCount, finalDf));
        } else {
            HashMap<String, DocumentFrequencyFile> languagesMap = new HashMap<String, DocumentFrequencyFile>(){
                {
                    this.put(languagesKey, new DocumentFrequencyFile(SignificanceModelGenerator.DOC_FREQ_DESCRIPTION, pageCount, finalDf));
                }
            };
            modelFile = new SignificanceModelFile(VERSION, ID, "Significance model for input file " + this.clientParameters.inputFile, (HashMap)languagesMap);
        }
        try (Object out = this.useZstCompression ? new ZstdOutputStream((OutputStream)new FileOutputStream(outputFile)) : new FileOutputStream(outputFile);){
            ObjectWriter writer = this.objectMapper.writerWithDefaultPrettyPrinter();
            writer.writeValue((OutputStream)out, (Object)modelFile);
        }
    }

    private static enum InputFormat {
        jsonl,
        vstsv;


        private static String allowed() {
            Map<String, InputFormat> byName = Arrays.stream(InputFormat.values()).collect(Collectors.toMap(f -> f.name().toLowerCase(Locale.ROOT), f -> f));
            return String.join((CharSequence)", ", byName.keySet());
        }
    }

    static final class GenerateFailure
    extends RuntimeException {
        GenerateFailure() {
        }
    }
}

