/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.dev;

import com.google.common.base.Charsets;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import morfologik.fsa.FSA;
import org.languagetool.JLanguageTool;
import org.languagetool.tools.StringTools;

public class ExportGermanNouns {
    private static final String DICT_FILENAME = "/de/german.dict";
    private static final String ADDED_DICT_FILENAME = "languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/added.txt";

    private ExportGermanNouns() {
    }

    private List<String> getSortedWords() throws IOException {
        Set<String> words1 = this.getBinaryDictWords();
        Set<String> words2 = this.getAddedDictWords();
        ArrayList<String> sortedWords = new ArrayList<String>();
        sortedWords.addAll(words1);
        sortedWords.addAll(words2);
        Collections.sort(sortedWords);
        return sortedWords;
    }

    private Set<String> getBinaryDictWords() throws IOException {
        FSA fsa = FSA.read((InputStream)JLanguageTool.getDataBroker().getFromResourceDirAsStream(DICT_FILENAME));
        HashSet<String> set = new HashSet<String>();
        for (ByteBuffer buffer : fsa) {
            byte[] sequence = new byte[buffer.remaining()];
            buffer.get(sequence);
            String output = new String(sequence, StandardCharsets.UTF_8);
            if (!this.isRelevantNoun(output)) continue;
            String[] parts = output.split("_");
            String term = parts[0].toLowerCase();
            set.add(term);
        }
        return set;
    }

    private Set<String> getAddedDictWords() throws IOException {
        HashSet<String> set = new HashSet<String>();
        List<String> lines = Files.readAllLines(FileSystems.getDefault().getPath(ADDED_DICT_FILENAME, new String[0]), Charsets.UTF_8);
        for (String line : lines) {
            if (!this.isRelevantNoun(line)) continue;
            String[] parts = line.split("\t");
            String term = parts[0].toLowerCase();
            set.add(term);
        }
        return set;
    }

    private boolean isRelevantNoun(String output) {
        boolean isNoun = output.contains("SUB:") || output.contains("EIG:") && output.contains("COU");
        return isNoun && !output.contains(":ADJ") && !StringTools.isAllUppercase((String)output);
    }

    public static void main(String[] args) throws IOException {
        ExportGermanNouns prg = new ExportGermanNouns();
        List<String> words = prg.getSortedWords();
        System.out.println("# DO NOT MODIFY - automatically exported");
        System.out.println("# Exporting class: " + ExportGermanNouns.class.getName());
        System.out.println("# Export date: " + new Date());
        System.out.println("# LanguageTool: " + JLanguageTool.VERSION + " (" + JLanguageTool.BUILD_DATE + ")");
        System.out.println("# Potential German compound parts.");
        System.out.println("# Data from Morphy (https://danielnaber.de/download/wklassen.pdf)");
        System.out.println("# with extensions by LanguageTool (https://languagetool.org)");
        System.out.println("# License: Creative Commons Attribution-Share Alike 4.0, http://creativecommons.org/licenses/by-sa/4.0/");
        for (String word : words) {
            System.out.println(word);
        }
    }
}

