/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.dev.bigdata;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.languagetool.dev.bigdata.AutomaticConfusionRuleEvaluator;

final class AutomaticConfusionRuleEvaluatorFilter {
    private static final float MIN_PRECISION = 0.99f;
    private static final int MIN_OCCURRENCES = 25;

    private AutomaticConfusionRuleEvaluatorFilter() {
    }

    private static String reformat(String s) {
        int spaceStart = s.indexOf("0;");
        if (spaceStart == -1) {
            spaceStart = s.indexOf("1;");
        }
        int spaceEnd = s.indexOf(35);
        if (spaceStart > 0 && spaceEnd > 0) {
            String spaces = StringUtils.repeat((String)" ", (int)(52 - spaceStart));
            return s.substring(0, spaceStart + 2) + spaces + s.substring(spaceEnd);
        }
        return s;
    }

    public static void main(String[] args) throws IOException {
        if (args.length != 1) {
            System.out.println("Usage: " + AutomaticConfusionRuleEvaluatorFilter.class.getSimpleName() + " <file>");
            System.out.println("       <file> is the output of " + AutomaticConfusionRuleEvaluator.class.getName());
            System.exit(0);
        }
        List<String> lines = Files.readAllLines(Paths.get(args[0], new String[0]), StandardCharsets.UTF_8);
        String prevKey = null;
        int skippedCount = 0;
        int lowPrecisionCount = 0;
        int lowOccurrenceCount = 0;
        int usedCount = 0;
        boolean skipping = false;
        for (String line : lines) {
            boolean bothDirections;
            String[] parts;
            if (!line.startsWith("=>")) continue;
            String cleanLine = StringUtils.replaceOnce((String)line, (String)"=> ", (String)"").replaceFirst("; \\d.*", "");
            if (cleanLine.contains("->")) {
                parts = cleanLine.split("\\s*->\\s*");
                bothDirections = false;
            } else {
                parts = cleanLine.split(";\\s*");
                bothDirections = true;
            }
            String key = parts[0] + ";" + parts[1];
            Pattern data = Pattern.compile("^(.+?)(?:;| ->) (.+?);.*p=(\\d\\.\\d+), r=(\\d\\.\\d+), f0.5=\\d\\.\\d+, (\\d+)\\+(\\d+),.*");
            Matcher m = data.matcher(StringUtils.replaceOnce((String)line, (String)"=> ", (String)""));
            m.find();
            String word1 = m.group(1);
            String word2 = m.group(2);
            String delim = bothDirections ? "; " : " -> ";
            String wordGroup = word1 + delim + word2;
            if (word1.compareTo(word2) > 0 && bothDirections) {
                wordGroup = word2 + delim + word1;
            }
            float precision = Float.parseFloat(m.group(3));
            int occ1 = Integer.parseInt(m.group(5));
            int occ2 = Integer.parseInt(m.group(6));
            if (key.equals(prevKey)) {
                if (skipping) {
                    // empty if block
                }
            } else {
                if (precision < 0.99f) {
                    ++lowPrecisionCount;
                    ++skippedCount;
                    skipping = true;
                    continue;
                }
                if (occ1 < 25 || occ2 < 25) {
                    ++lowOccurrenceCount;
                    ++skippedCount;
                    skipping = true;
                    continue;
                }
                System.out.println(AutomaticConfusionRuleEvaluatorFilter.reformat(line.replaceFirst("=> .+?(;| ->) .+?; ", wordGroup + "; ")));
                skipping = false;
                ++usedCount;
            }
            prevKey = key;
        }
        System.err.println("Skipped: " + skippedCount);
        System.err.println("lowPrecisionCount: " + lowPrecisionCount);
        System.err.println("lowOccurrences: " + lowOccurrenceCount);
        System.err.println("Used: " + usedCount);
    }
}

