/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.dev;

import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import morfologik.stemming.Dictionary;
import morfologik.stemming.DictionaryLookup;
import morfologik.stemming.WordData;
import org.languagetool.JLanguageTool;
import org.languagetool.languagemodel.LuceneLanguageModel;
import org.languagetool.tools.StringTools;

public class GermanCaseAmbiguityFinder {
    private static final String NGRAMS = "/home/dnaber/data/google-ngram-index/de";

    public static void main(String[] args) throws IOException {
        LuceneLanguageModel lm = new LuceneLanguageModel(new File(NGRAMS));
        Dictionary dictionary = Dictionary.read((URL)JLanguageTool.getDataBroker().getFromResourceDirAsUrl("/de/german.dict"));
        DictionaryLookup dl = new DictionaryLookup(dictionary);
        HashMap<String, String> lc = new HashMap<String, String>();
        HashMap<String, String> uc = new HashMap<String, String>();
        System.out.println("Iterating...");
        for (WordData wd : dl) {
            String tag;
            String word = wd.getWord().toString();
            String base = wd.getStem().toString();
            if (StringTools.startsWithLowercase((String)word) && StringTools.startsWithUppercase((String)base) || StringTools.startsWithUppercase((String)word) && StringTools.startsWithLowercase((String)base) || (tag = wd.getTag().toString()).endsWith(":INF") || tag.endsWith(":ADJ") || !tag.startsWith("VER:") && !tag.startsWith("SUB:")) continue;
            if (StringTools.startsWithUppercase((String)word)) {
                uc.put(word, tag);
                continue;
            }
            if (!StringTools.startsWithLowercase((String)word)) continue;
            lc.put(word, tag);
        }
        System.out.println("Done. lc=" + lc.size() + ", uc=" + uc.size());
        for (Map.Entry entry : uc.entrySet()) {
            String key = StringTools.lowercaseFirstChar((String)((String)entry.getKey()));
            if (!lc.containsKey(key)) continue;
            long lcCount = lm.getCount(StringTools.lowercaseFirstChar((String)((String)entry.getKey())));
            long ucCount = lm.getCount(StringTools.uppercaseFirstChar((String)((String)entry.getKey())));
            long sum = lcCount + ucCount;
            System.out.println(sum + "\t" + lcCount + "\t" + ucCount + "\t" + (String)entry.getKey());
        }
    }
}

