/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.dev.eval;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import org.apache.commons.io.IOUtils;
import org.languagetool.JLanguageTool;
import org.languagetool.Language;
import org.languagetool.language.AmericanEnglish;
import org.languagetool.language.BritishEnglish;
import org.languagetool.language.English;
import org.languagetool.languagemodel.LanguageModel;
import org.languagetool.languagemodel.LuceneLanguageModel;
import org.languagetool.rules.ConfusionPair;
import org.languagetool.rules.ConfusionSetLoader;
import org.languagetool.rules.Rule;
import org.languagetool.rules.RuleMatch;
import org.languagetool.rules.en.EnglishConfusionProbabilityRule;
import org.languagetool.rules.ngrams.ConfusionProbabilityRule;

class RealWordFalseAlarmEvaluator {
    private static final boolean EVAL_MODE = true;
    private static final int MAX_SENTENCES = 1000;
    private static final int MAX_ERROR_DISPLAY = 50;
    private static final int MIN_SENTENCES = 0;
    private static final float MAX_ERROR_RATE = 10.0f;
    private final JLanguageTool lt;
    private final ConfusionProbabilityRule confusionRule;
    private final Map<String, List<ConfusionPair>> confusionPairs;
    private final LanguageModel languageModel;
    private int globalSentenceCount;
    private int globalRuleMatches;

    RealWordFalseAlarmEvaluator(File languageModelIndexDir) throws IOException {
        English lang = AmericanEnglish.getInstance();
        try (InputStream inputStream = JLanguageTool.getDataBroker().getFromResourceDirAsStream("/en/confusion_sets.txt");){
            ConfusionSetLoader confusionSetLoader = new ConfusionSetLoader((Language)lang);
            this.confusionPairs = confusionSetLoader.loadConfusionPairs(inputStream);
        }
        this.lt = new JLanguageTool((Language)BritishEnglish.getInstance());
        List rules = this.lt.getAllActiveRules();
        for (Rule rule : rules) {
            this.lt.disableRule(rule.getId());
        }
        this.languageModel = new LuceneLanguageModel(languageModelIndexDir);
        this.confusionRule = new EnglishConfusionProbabilityRule(JLanguageTool.getMessageBundle(), this.languageModel, (Language)lang);
        this.lt.addRule((Rule)this.confusionRule);
    }

    void close() {
        if (this.languageModel != null) {
            this.languageModel.close();
        }
    }

    void run(File dir) throws IOException {
        System.out.println("Running in eval mode, no 'DATA' lines will be printed, only a subset of the homophones will be used.");
        File[] files = dir.listFiles();
        int fileCount = 1;
        for (File file : files) {
            if (!file.getName().endsWith(".txt")) {
                System.out.println("Ignoring " + file + ", does not match *.txt");
                continue;
            }
            try (FileInputStream fis = new FileInputStream(file);){
                System.out.println("===== Working on " + file.getName() + " (" + fileCount + "/" + files.length + ") =====");
                this.checkLines(IOUtils.readLines((InputStream)fis), file.getName().replace(".txt", ""));
                ++fileCount;
            }
        }
        System.out.println("==============================");
        System.out.println(this.globalSentenceCount + " sentences checked");
        System.out.println(this.globalRuleMatches + " errors found");
        float percentage = (float)this.globalRuleMatches / (float)this.globalSentenceCount * 100.0f;
        System.out.printf("%.2f%% of sentences have a match\n", Float.valueOf(percentage));
    }

    private void checkLines(List<String> lines, String name) throws IOException {
        List<ConfusionPair> subConfusionPair = this.confusionPairs.get(name);
        if (subConfusionPair == null) {
            System.out.println("Skipping '" + name + "', homophone not loaded");
            return;
        }
        if (subConfusionPair.size() > 1) {
            System.err.println("WARN: will only use first confusion set of " + subConfusionPair.size() + ": " + subConfusionPair.get(0));
        }
        this.confusionRule.setConfusionPair(subConfusionPair.get(0));
        int sentenceCount = 0;
        int ruleMatches = 0;
        for (String line : lines) {
            List matches = this.lt.check(line);
            ++sentenceCount;
            ++this.globalSentenceCount;
            if (matches.size() > 0) {
                HashSet suggestions = new HashSet();
                for (RuleMatch match : matches) {
                    suggestions.addAll(match.getSuggestedReplacements());
                    ++ruleMatches;
                    ++this.globalRuleMatches;
                }
                if (ruleMatches <= 50) {
                    System.out.println("[" + name + "] " + line + " => " + suggestions);
                }
            }
            if (sentenceCount <= 1000) continue;
            System.out.println("Max sentences (1000) reached, stopping");
            break;
        }
        System.out.println(sentenceCount + " sentences checked");
        System.out.println(ruleMatches + " errors found");
        float percentage = (float)ruleMatches / (float)sentenceCount * 100.0f;
        System.out.printf("%.2f%% of sentences have a match\n", Float.valueOf(percentage));
    }

    public static void main(String[] args) throws IOException {
        if (args.length != 2) {
            System.out.println("Usage: " + RealWordFalseAlarmEvaluator.class.getSimpleName() + " <languageModel> <sentenceDirectory>");
            System.out.println("   <languageModel> is a Lucene index with ngram frequency information");
            System.out.println("   <sentenceDirectory> is a directory with filenames like 'xx.txt' where 'xx' is the homophone");
            System.exit(1);
        }
        RealWordFalseAlarmEvaluator evaluator = new RealWordFalseAlarmEvaluator(new File(args[0]));
        File dir = new File(args[1]);
        if (!dir.isDirectory()) {
            throw new RuntimeException("Not a directory: " + dir);
        }
        evaluator.run(dir);
        evaluator.close();
    }
}

