/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.dev.bigdata;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.jetbrains.annotations.NotNull;
import org.languagetool.AnalyzedSentence;
import org.languagetool.JLanguageTool;
import org.languagetool.Language;
import org.languagetool.Languages;
import org.languagetool.chunking.Chunker;
import org.languagetool.dev.bigdata.RuleEvalResult;
import org.languagetool.dev.bigdata.RuleEvalValues;
import org.languagetool.dev.dumpcheck.MixingSentenceSource;
import org.languagetool.dev.dumpcheck.PlainTextSentenceSource;
import org.languagetool.dev.dumpcheck.Sentence;
import org.languagetool.dev.dumpcheck.SentenceSource;
import org.languagetool.dev.eval.FMeasure;
import org.languagetool.language.English;
import org.languagetool.languagemodel.LanguageModel;
import org.languagetool.languagemodel.LuceneLanguageModel;
import org.languagetool.rules.ConfusionPair;
import org.languagetool.rules.Rule;
import org.languagetool.rules.RuleMatch;
import org.languagetool.rules.ngrams.ConfusionProbabilityRule;
import org.languagetool.tagging.Tagger;
import org.languagetool.tagging.xx.DemoTagger;
import org.languagetool.tools.StringTools;

class ConfusionRuleEvaluator {
    private static final boolean CASE_SENSITIVE = false;
    private static final List<Long> EVAL_FACTORS = Arrays.asList(10L, 100L, 1000L, 10000L, 100000L, 1000000L, 10000000L);
    private static final int MAX_SENTENCES = 1000;
    private final Language language;
    private final boolean caseSensitive;
    private final boolean bothDirections;
    private final ConfusionProbabilityRule rule;
    private final Map<Long, RuleEvalValues> evalValues = new HashMap<Long, RuleEvalValues>();
    private boolean verbose = true;

    ConfusionRuleEvaluator(Language language, LanguageModel languageModel, boolean caseSensitive, boolean bothDirections) {
        this.language = language;
        this.caseSensitive = caseSensitive;
        this.bothDirections = bothDirections;
        try {
            List rules = language.getRelevantLanguageModelRules(JLanguageTool.getMessageBundle(), languageModel, null);
            if (rules == null) {
                throw new RuntimeException("Language " + language + " doesn't seem to support a language model");
            }
            ConfusionProbabilityRule foundRule = null;
            for (Rule rule : rules) {
                if (!rule.getId().equals("CONFUSION_RULE")) continue;
                foundRule = (ConfusionProbabilityRule)rule;
                break;
            }
            if (foundRule == null) {
                throw new RuntimeException("Language " + language + " has no language model rule with id CONFUSION_RULE");
            }
            this.rule = foundRule;
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    void setVerboseMode(boolean verbose) {
        this.verbose = verbose;
    }

    Map<Long, RuleEvalResult> run(List<String> inputsOrDir, String token, String homophoneToken, int maxSentences, List<Long> evalFactors, Map<String, Integer> sourcesForToken, Map<String, Integer> sourcesForHomophone) throws IOException {
        for (Long evalFactor : evalFactors) {
            this.evalValues.put(evalFactor, new RuleEvalValues());
        }
        List<Sentence> allTokenSentences = this.getRelevantSentences(inputsOrDir, token, maxSentences);
        List<Sentence> allHomophoneSentences = this.getRelevantSentences(inputsOrDir, homophoneToken, maxSentences);
        this.evaluate(allTokenSentences, true, token, homophoneToken, evalFactors);
        if (this.bothDirections) {
            this.evaluate(allTokenSentences, false, homophoneToken, token, evalFactors);
        }
        this.evaluate(allHomophoneSentences, false, token, homophoneToken, evalFactors);
        if (this.bothDirections) {
            this.evaluate(allHomophoneSentences, true, homophoneToken, token, evalFactors);
        }
        return this.printEvalResult(allTokenSentences, allHomophoneSentences, inputsOrDir, token, homophoneToken, sourcesForToken, sourcesForHomophone);
    }

    private void evaluate(List<Sentence> sentences, boolean isCorrect, String token, String homophoneToken, List<Long> evalFactors) throws IOException {
        this.println("======================");
        this.printf("Starting evaluation on " + sentences.size() + " " + (isCorrect ? "correct" : "incorrect") + " sentences with %s/%s:\n", token, homophoneToken);
        JLanguageTool lt = new JLanguageTool(this.language);
        List allActiveRules = lt.getAllActiveRules();
        for (Rule activeRule : allActiveRules) {
            lt.disableRule(activeRule.getId());
        }
        for (Sentence sentence : sentences) {
            String textToken = isCorrect ? token : homophoneToken;
            String plainText = sentence.getText();
            String replacement = plainText.indexOf(textToken) == 0 ? StringTools.uppercaseFirstChar((String)token) : token;
            String replacedTokenSentence = isCorrect ? plainText : plainText.replaceFirst("(?i)\\b" + textToken + "\\b", replacement);
            AnalyzedSentence analyzedSentence = lt.getAnalyzedSentence(replacedTokenSentence);
            for (Long factor : evalFactors) {
                this.rule.setConfusionPair(new ConfusionPair(token, homophoneToken, factor, this.bothDirections));
                RuleMatch[] matches = this.rule.match(analyzedSentence);
                boolean consideredCorrect = matches.length == 0;
                String displayStr = plainText.replaceFirst("(?i)\\b" + textToken + "\\b", "**" + replacement + "**");
                if (consideredCorrect && isCorrect) {
                    ++this.evalValues.get((Object)factor).trueNegatives;
                    continue;
                }
                if (!consideredCorrect && isCorrect) {
                    ++this.evalValues.get((Object)factor).falsePositives;
                    this.println("false positive with factor " + factor + ": " + displayStr);
                    continue;
                }
                if (consideredCorrect && !isCorrect) {
                    ++this.evalValues.get((Object)factor).falseNegatives;
                    continue;
                }
                ++this.evalValues.get((Object)factor).truePositives;
            }
        }
    }

    private Map<Long, RuleEvalResult> printEvalResult(List<Sentence> allTokenSentences, List<Sentence> allHomophoneSentences, List<String> inputsOrDir, String token, String homophoneToken, Map<String, Integer> sourcesForToken, Map<String, Integer> sourcesForHomophone) {
        LinkedHashMap<Long, RuleEvalResult> results = new LinkedHashMap<Long, RuleEvalResult>();
        int sentences = allTokenSentences.size() + allHomophoneSentences.size();
        System.out.println("\nEvaluation results for " + token + "/" + homophoneToken + " with " + sentences + " sentences as of " + new Date() + ":");
        System.out.printf(Locale.ENGLISH, "Inputs:       %s\n", inputsOrDir);
        System.out.printf(Locale.ENGLISH, "Case sensit.: %s\n", this.caseSensitive);
        List factors = this.evalValues.keySet().stream().sorted().collect(Collectors.toList());
        for (Long factor : factors) {
            RuleEvalValues evalValues = this.evalValues.get(factor);
            float precision = (float)evalValues.truePositives / (float)(evalValues.truePositives + evalValues.falsePositives);
            float specificity = (float)evalValues.trueNegatives / (float)(evalValues.trueNegatives + evalValues.falsePositives);
            float recall = (float)evalValues.truePositives / (float)(evalValues.truePositives + evalValues.falseNegatives);
            String date = new SimpleDateFormat("yyyy-MM-dd").format(new Date());
            String spaces = StringUtils.repeat((String)" ", (int)(82 - Long.toString(factor).length()));
            String word1 = token;
            String word2 = homophoneToken;
            String delimiter = " -> ";
            if (this.bothDirections) {
                delimiter = "; ";
                if (word1.compareTo(word2) > 0) {
                    String temp = word1;
                    word1 = word2;
                    word2 = temp;
                }
            }
            float fMeasureBeta = 0.5f;
            String summary = String.format(Locale.ENGLISH, "%s%s%s; %d; %s # p=%.3f, r=%.3f, f%.1f=%.3f, s=%.3f, %d+%d, %dgrams, %s, fp=%d, fn=%d, tp=%d, tn=%d, %s, %s", word1, delimiter, word2, factor, spaces, Float.valueOf(precision), Float.valueOf(recall), Float.valueOf(fMeasureBeta), FMeasure.getFMeasure(precision, recall, fMeasureBeta), Float.valueOf(specificity), allTokenSentences.size(), allHomophoneSentences.size(), this.rule.getNGrams(), date, evalValues.falsePositives, evalValues.falseNegatives, evalValues.truePositives, evalValues.trueNegatives, sourcesForToken, sourcesForHomophone);
            results.put(factor, new RuleEvalResult(summary, precision, recall));
            if (!this.verbose) continue;
            System.out.println();
            System.out.printf(Locale.ENGLISH, "Factor: %d - %d false positives, %d false negatives, %d true positives, %d true negatives\n", factor, evalValues.falsePositives, evalValues.falseNegatives, evalValues.truePositives, evalValues.trueNegatives);
            System.out.printf(summary + "\n", new Object[0]);
        }
        return results;
    }

    private List<Sentence> getRelevantSentences(List<String> inputs, String token, int maxSentences) throws IOException {
        List<Sentence> sentences = new ArrayList<Sentence>();
        for (String input : inputs) {
            if (new File(input).isDirectory()) {
                File file = new File(input, token + ".txt");
                if (!file.exists()) {
                    throw new RuntimeException("File with example sentences not found: " + file);
                }
                FileInputStream fis = new FileInputStream(file);
                try {
                    PlainTextSentenceSource sentenceSource = new PlainTextSentenceSource((InputStream)fis, this.language);
                    sentences = this.getSentencesFromSource(inputs, token, maxSentences, (SentenceSource)sentenceSource);
                    continue;
                }
                finally {
                    fis.close();
                    continue;
                }
            }
            MixingSentenceSource sentenceSource = MixingSentenceSource.create(inputs, (Language)this.language);
            sentences = this.getSentencesFromSource(inputs, token, maxSentences, (SentenceSource)sentenceSource);
        }
        return sentences;
    }

    private List<Sentence> getSentencesFromSource(List<String> inputs, String token, int maxSentences, SentenceSource sentenceSource) {
        ArrayList<Sentence> sentences = new ArrayList<Sentence>();
        Pattern pattern = Pattern.compile(".*\\b" + (this.caseSensitive ? token : token.toLowerCase()) + "\\b.*");
        while (sentenceSource.hasNext()) {
            Sentence sentence = sentenceSource.next();
            String sentenceText = this.caseSensitive ? sentence.getText() : sentence.getText().toLowerCase();
            Matcher matcher = pattern.matcher(sentenceText);
            if (!matcher.matches()) continue;
            sentences.add(sentence);
            if (sentences.size() % 250 == 0) {
                this.println("Loaded sentence " + sentences.size() + " with '" + token + "' from " + inputs);
            }
            if (sentences.size() < maxSentences) continue;
            break;
        }
        this.println("Loaded " + sentences.size() + " sentences with '" + token + "' from " + inputs);
        return sentences;
    }

    private void println(String msg) {
        if (this.verbose) {
            System.out.println(msg);
        }
    }

    private void printf(String msg, String ... args) {
        if (this.verbose) {
            System.out.printf(msg, args);
        }
    }

    public static void main(String[] args) throws IOException {
        if (args.length < 5 || args.length > 6) {
            System.err.println("Usage: " + ConfusionRuleEvaluator.class.getSimpleName() + " <token> <homophoneToken> <langCode> <languageModelTopDir> <wikipediaXml|tatoebaFile|plainTextFile|dir>...");
            System.err.println("   <languageModelTopDir> is a directory with sub-directories like 'en' which then again contain '1grams',");
            System.err.println("                      '2grams', and '3grams' sub directories with Lucene indexes");
            System.err.println("                      See https://dev.languagetool.org/finding-errors-using-n-gram-data");
            System.err.println("   <wikipediaXml|tatoebaFile|plainTextFile|dir> either a Wikipedia XML dump, or a Tatoeba file, or");
            System.err.println("                      a plain text file with one sentence per line, or a directory with");
            System.err.println("                      example sentences (where <word>.txt contains only the sentences for <word>).");
            System.err.println("                      You can specify both a Wikipedia file and a Tatoeba file.");
            System.exit(1);
        }
        long startTime = System.currentTimeMillis();
        String token = args[0];
        String homophoneToken = args[1];
        String langCode = args[2];
        Object lang = "en".equals(langCode) ? new EnglishLight() : Languages.getLanguageForShortCode((String)langCode);
        LuceneLanguageModel languageModel = new LuceneLanguageModel(new File(args[3], lang.getShortCode()));
        ArrayList<String> inputsFiles = new ArrayList<String>();
        inputsFiles.add(args[4]);
        if (args.length >= 6) {
            inputsFiles.add(args[5]);
        }
        boolean bothDirections = true;
        System.out.println("NOTE: assuming pair works in both directions (A -> B and B -> A)");
        ConfusionRuleEvaluator generator = new ConfusionRuleEvaluator((Language)lang, (LanguageModel)languageModel, false, bothDirections);
        generator.run(inputsFiles, token, homophoneToken, 1000, EVAL_FACTORS, Collections.emptyMap(), Collections.emptyMap());
        long endTime = System.currentTimeMillis();
        System.out.println("\nTime: " + (endTime - startTime) + "ms");
    }

    static class EnglishLight
    extends English {
        private DemoTagger tagger;

        EnglishLight() {
        }

        public String getName() {
            return "English Light";
        }

        @NotNull
        public Tagger createDefaultTagger() {
            return new DemoTagger();
        }

        public Chunker createDefaultChunker() {
            return null;
        }
    }
}

