/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.dev.bigdata;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.languagetool.AnalyzedSentence;
import org.languagetool.JLanguageTool;
import org.languagetool.Language;
import org.languagetool.Languages;
import org.languagetool.dev.bigdata.RuleEvalResult;
import org.languagetool.dev.bigdata.RuleEvalValues;
import org.languagetool.dev.dumpcheck.MixingSentenceSource;
import org.languagetool.dev.dumpcheck.PlainTextSentenceSource;
import org.languagetool.dev.dumpcheck.Sentence;
import org.languagetool.dev.dumpcheck.SentenceSource;
import org.languagetool.languagemodel.LanguageModel;
import org.languagetool.languagemodel.LuceneLanguageModel;
import org.languagetool.rules.ConfusionPair;
import org.languagetool.rules.ConfusionSetLoader;
import org.languagetool.rules.ConfusionString;
import org.languagetool.rules.Rule;
import org.languagetool.rules.RuleMatch;
import org.languagetool.rules.de.ProhibitedCompoundRule;

class ProhibitedCompoundRuleEvaluator {
    private static final List<Long> EVAL_FACTORS = Arrays.asList(10L);
    private static final int MAX_SENTENCES = 10;
    private final Language language;
    private final ProhibitedCompoundRule rule;
    private final Map<Long, RuleEvalValues> evalValues = new HashMap<Long, RuleEvalValues>();
    private boolean verbose = true;

    ProhibitedCompoundRuleEvaluator(Language language, LanguageModel languageModel) {
        this.language = language;
        try {
            List rules = language.getRelevantLanguageModelRules(JLanguageTool.getMessageBundle(), languageModel, null);
            if (rules == null) {
                throw new RuntimeException("Language " + language + " doesn't seem to support a language model");
            }
            ProhibitedCompoundRule foundRule = null;
            for (Rule rule : rules) {
                if (!rule.getId().equals("DE_PROHIBITED_COMPOUNDS")) continue;
                foundRule = (ProhibitedCompoundRule)rule;
                break;
            }
            if (foundRule == null) {
                throw new RuntimeException("Language " + language + " has no language model rule with id DE_PROHIBITED_COMPOUNDS");
            }
            this.rule = foundRule;
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    void setVerboseMode(boolean verbose) {
        this.verbose = verbose;
    }

    Map<Long, RuleEvalResult> run(List<String> inputsOrDir, String token, String homophoneToken, int maxSentences, List<Long> evalFactors) throws IOException {
        for (Long evalFactor : evalFactors) {
            this.evalValues.put(evalFactor, new RuleEvalValues());
        }
        List<Map.Entry<Sentence, Map.Entry<Integer, Integer>>> allTokenSentences = this.getRelevantSentences(inputsOrDir, token, maxSentences);
        List<Map.Entry<Sentence, Map.Entry<Integer, Integer>>> allHomophoneSentences = this.getRelevantSentences(inputsOrDir, homophoneToken, maxSentences);
        this.evaluate(allTokenSentences, true, token, homophoneToken, evalFactors);
        this.evaluate(allTokenSentences, false, homophoneToken, token, evalFactors);
        this.evaluate(allHomophoneSentences, false, token, homophoneToken, evalFactors);
        this.evaluate(allHomophoneSentences, true, homophoneToken, token, evalFactors);
        return this.printRuleEvalResult(allTokenSentences, allHomophoneSentences, inputsOrDir, token, homophoneToken);
    }

    private void evaluate(List<Map.Entry<Sentence, Map.Entry<Integer, Integer>>> sentences, boolean isCorrect, String token, String homophoneToken, List<Long> evalFactors) throws IOException {
        this.println("======================");
        this.printf("Starting evaluation on " + sentences.size() + " sentences with %s/%s (%s):\n", token, homophoneToken, String.valueOf(isCorrect));
        JLanguageTool lt = new JLanguageTool(this.language);
        List allActiveRules = lt.getAllActiveRules();
        for (Rule rule : allActiveRules) {
            lt.disableRule(rule.getId());
        }
        for (Map.Entry entry : sentences) {
            int matchEnd;
            int matchStart;
            Sentence sentence = (Sentence)entry.getKey();
            String plainText = sentence.getText();
            String match = plainText.substring(matchStart = ((Integer)((Map.Entry)entry.getValue()).getKey()).intValue(), matchEnd = ((Integer)((Map.Entry)entry.getValue()).getValue()).intValue());
            String textToken = Character.isUpperCase(match.charAt(0)) ? StringUtils.capitalize((String)token) : StringUtils.uncapitalize((String)token);
            Object evaluated = plainText;
            if (!isCorrect) {
                evaluated = plainText.substring(0, matchStart) + textToken + plainText.substring(matchEnd);
            }
            AnalyzedSentence analyzedSentence = lt.getAnalyzedSentence((String)evaluated);
            for (Long factor : evalFactors) {
                boolean consideredCorrect;
                this.rule.setConfusionPair(new ProhibitedCompoundRule.Pair(homophoneToken, "", token, ""));
                RuleMatch[] matches = this.rule.match(analyzedSentence);
                String displayStr = plainText.substring(0, matchStart) + token.toUpperCase() + plainText.substring(matchStart + (isCorrect ? token.length() : homophoneToken.length()));
                boolean bl = consideredCorrect = matches.length == 0;
                if (consideredCorrect && isCorrect) {
                    ++this.evalValues.get((Object)factor).trueNegatives;
                    continue;
                }
                if (!consideredCorrect && isCorrect) {
                    ++this.evalValues.get((Object)factor).falsePositives;
                    continue;
                }
                if (consideredCorrect && !isCorrect) {
                    ++this.evalValues.get((Object)factor).falseNegatives;
                    continue;
                }
                ++this.evalValues.get((Object)factor).truePositives;
            }
        }
    }

    private Map<Long, RuleEvalResult> printRuleEvalResult(List<Map.Entry<Sentence, Map.Entry<Integer, Integer>>> allTokenSentences, List<Map.Entry<Sentence, Map.Entry<Integer, Integer>>> allHomophoneSentences, List<String> inputsOrDir, String token, String homophoneToken) {
        HashMap<Long, RuleEvalResult> results = new HashMap<Long, RuleEvalResult>();
        int sentences = allTokenSentences.size() + allHomophoneSentences.size();
        System.out.println("\nEvaluation results for " + token + "/" + homophoneToken + " with " + sentences + " sentences as of " + new Date() + ":");
        System.out.printf(Locale.ENGLISH, "Inputs:       %s\n", inputsOrDir);
        List factors = this.evalValues.keySet().stream().sorted().collect(Collectors.toList());
        for (Long factor : factors) {
            RuleEvalValues evalValues = this.evalValues.get(factor);
            float precision = (float)evalValues.truePositives / (float)(evalValues.truePositives + evalValues.falsePositives);
            float recall = (float)evalValues.truePositives / (float)(evalValues.truePositives + evalValues.falseNegatives);
            String date = new SimpleDateFormat("yyyy-MM-dd").format(new Date());
            String spaces = StringUtils.repeat((String)" ", (int)(82 - Long.toString(factor).length()));
            String secondToken = homophoneToken;
            String firstToken = token;
            if (secondToken.compareTo(firstToken) < 0) {
                String tmp = firstToken;
                firstToken = secondToken;
                secondToken = tmp;
            }
            String summary = String.format(Locale.ENGLISH, "%s; %s; %d; %s # p=%.3f, r=%.3f, %d+%d, %s", firstToken, secondToken, factor, spaces, Float.valueOf(precision), Float.valueOf(recall), allTokenSentences.size(), allHomophoneSentences.size(), date);
            results.put(factor, new RuleEvalResult(summary, precision, recall));
            if (!this.verbose) continue;
            System.out.println();
            System.out.printf(Locale.ENGLISH, "Factor: %d - %d false positives, %d false negatives, %d true positives, %d true negatives\n", factor, evalValues.falsePositives, evalValues.falseNegatives, evalValues.truePositives, evalValues.trueNegatives);
            System.out.printf(summary + "\n", new Object[0]);
        }
        return results;
    }

    private List<Map.Entry<Sentence, Map.Entry<Integer, Integer>>> getRelevantSentences(List<String> inputs, String token, int maxSentences) throws IOException {
        List<Map.Entry<Sentence, Map.Entry<Integer, Integer>>> sentences = new ArrayList<Map.Entry<Sentence, Map.Entry<Integer, Integer>>>();
        for (String input : inputs) {
            if (new File(input).isDirectory()) {
                File file = new File(input, token + ".txt");
                if (!file.exists()) {
                    throw new RuntimeException("File with example sentences not found: " + file);
                }
                FileInputStream fis = new FileInputStream(file);
                try {
                    PlainTextSentenceSource sentenceSource = new PlainTextSentenceSource((InputStream)fis, this.language);
                    sentences = this.getSentencesFromSource(inputs, token, maxSentences, (SentenceSource)sentenceSource);
                    continue;
                }
                finally {
                    fis.close();
                    continue;
                }
            }
            MixingSentenceSource sentenceSource = MixingSentenceSource.create(inputs, (Language)this.language);
            sentences = this.getSentencesFromSource(inputs, token, maxSentences, (SentenceSource)sentenceSource);
        }
        return sentences;
    }

    private List<Map.Entry<Sentence, Map.Entry<Integer, Integer>>> getSentencesFromSource(List<String> inputs, String token, int maxSentences, SentenceSource sentenceSource) {
        ArrayList<Map.Entry<Sentence, Map.Entry<Integer, Integer>>> sentences = new ArrayList<Map.Entry<Sentence, Map.Entry<Integer, Integer>>>();
        Pattern pattern = Pattern.compile("(?iu)\\b(" + token.toLowerCase() + ")\\p{Alpha}+\\b|\\b\\p{Alpha}+(" + token.toLowerCase() + ")\\b");
        while (sentenceSource.hasNext()) {
            Sentence sentence = sentenceSource.next();
            Matcher matcher = pattern.matcher(sentence.getText());
            if (!matcher.find() || !Character.isUpperCase(matcher.group().charAt(0))) continue;
            AbstractMap.SimpleEntry<Integer, Integer> range = new AbstractMap.SimpleEntry<Integer, Integer>(Math.max(matcher.start(1), matcher.start(2)), Math.max(matcher.end(1), matcher.end(2)));
            sentences.add(new AbstractMap.SimpleEntry<Sentence, AbstractMap.SimpleEntry<Integer, Integer>>(sentence, range));
            if (sentences.size() < maxSentences) continue;
            break;
        }
        this.println("Loaded " + sentences.size() + " sentences with '" + token + "' from " + inputs);
        return sentences;
    }

    private void println(String msg) {
        if (this.verbose) {
            System.out.println(msg);
        }
    }

    private void printf(String msg, String ... args) {
        if (this.verbose) {
            System.out.printf(msg, args);
        }
    }

    public static void main(String[] args) throws IOException {
        if (args.length < 4 || args.length > 5) {
            System.err.println("Usage: " + ProhibitedCompoundRuleEvaluator.class.getSimpleName() + " <tokens> <langCode> <languageModelTopDir> <wikipediaXml|tatoebaFile|plainTextFile|dir>...");
            System.err.println("   <tokens> is confusion set file with token/homophone pairs");
            System.err.println("   <languageModelTopDir> is a directory with sub-directories like 'en' which then again contain '1grams',");
            System.err.println("                      '2grams', and '3grams' sub directories with Lucene indexes");
            System.err.println("                      See https://dev.languagetool.org/finding-errors-using-n-gram-data");
            System.err.println("   <wikipediaXml|tatoebaFile|plainTextFile|dir> either a Wikipedia XML dump, or a Tatoeba file, or");
            System.err.println("                      a plain text file with one sentence per line, or a directory with");
            System.err.println("                      example sentences (where <word>.txt contains only the sentences for <word>).");
            System.err.println("                      You can specify both a Wikipedia file and a Tatoeba file.");
            System.exit(1);
        }
        long startTime = System.currentTimeMillis();
        String confusionSetFile = args[0];
        String langCode = args[1];
        Language lang = Languages.getLanguageForShortCode((String)langCode);
        ConfusionSetLoader loader = new ConfusionSetLoader(lang);
        Map confusionSet = loader.loadConfusionPairs((InputStream)new FileInputStream(confusionSetFile));
        LuceneLanguageModel languageModel = new LuceneLanguageModel(new File(args[2], lang.getShortCode()));
        ArrayList<String> inputsFiles = new ArrayList<String>();
        inputsFiles.add(args[3]);
        if (args.length >= 5) {
            inputsFiles.add(args[4]);
        }
        ProhibitedCompoundRuleEvaluator generator = new ProhibitedCompoundRuleEvaluator(lang, (LanguageModel)languageModel);
        for (List entries : confusionSet.values()) {
            for (ConfusionPair pair : entries) {
                ConfusionString[] words = pair.getTerms().toArray(new ConfusionString[0]);
                if (words.length < 2) {
                    throw new RuntimeException("Invalid confusion set entry: " + pair);
                }
                generator.run(inputsFiles, words[0].getString(), words[1].getString(), 10, EVAL_FACTORS);
            }
        }
        long endTime = System.currentTimeMillis();
        System.out.println("\nTime: " + (endTime - startTime) + "ms");
    }
}

