/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.tagging.eo;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.languagetool.AnalyzedToken;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.JLanguageTool;
import org.languagetool.tagging.ManualTagger;
import org.languagetool.tagging.Tagger;

public class EsperantoTagger
implements Tagger {
    private ManualTagger manualTagger = null;
    private Set<String> setTransitiveVerbs = null;
    private Set<String> setIntransitiveVerbs = null;
    private static final Pattern patternVerb = Pattern.compile("(..+)(as|os|is|us|u|i)$");
    private static final Pattern patternPrefix = Pattern.compile("^(?:mal|mis|ek|re|fi|ne)(.*)");
    private static final Pattern patternSuffix = Pattern.compile("(.*)(?:ad|a\u0109|eg|et)i$");
    private static final Pattern patternParticiple = Pattern.compile("((..+)([aio])(n?)t)([aoe])(j?)(n?)$");
    private Set<String> setNonParticiple;
    private static final Pattern patternTabelvorto = Pattern.compile("^(i|ti|ki|\u0109i|neni)(?:(?:([uoae])(j?)(n?))|(am|al|es|el|om))$");
    private static final Pattern patternTabelvortoAdverb = Pattern.compile("^(?:ti|i|\u0109i|neni)(?:am|om|el|e)$");

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private Set<String> loadWords(InputStream file) throws IOException {
        InputStreamReader isr = null;
        BufferedReader br = null;
        HashSet<String> words = new HashSet<String>();
        try {
            String line;
            isr = new InputStreamReader(file, "UTF-8");
            br = new BufferedReader(isr);
            while ((line = br.readLine()) != null) {
                if ((line = line.trim()).length() < 1 || line.charAt(0) == '#') continue;
                words.add(line);
            }
        }
        finally {
            if (br != null) {
                br.close();
            }
            if (isr != null) {
                isr.close();
            }
        }
        return words;
    }

    private void lazyInit() throws IOException {
        if (this.manualTagger != null) {
            return;
        }
        this.manualTagger = new ManualTagger(JLanguageTool.getDataBroker().getFromResourceDirAsStream("/eo/manual-tagger.txt"));
        this.setTransitiveVerbs = this.loadWords(JLanguageTool.getDataBroker().getFromRulesDirAsStream("/eo/verb-tr.txt"));
        this.setIntransitiveVerbs = this.loadWords(JLanguageTool.getDataBroker().getFromRulesDirAsStream("/eo/verb-ntr.txt"));
        this.setNonParticiple = this.loadWords(JLanguageTool.getDataBroker().getFromRulesDirAsStream("/eo/root-ant-at.txt"));
    }

    private String findTransitivity(String verb) {
        if (verb.endsWith("i\u011di")) {
            return "nt";
        }
        if (verb.endsWith("igi")) {
            return verb.equals("memmortigi") ? "nt" : "tr";
        }
        while (true) {
            boolean isTransitive = this.setTransitiveVerbs.contains(verb);
            boolean isIntransitive = this.setIntransitiveVerbs.contains(verb);
            if (isTransitive) {
                return isIntransitive ? "tn" : "tr";
            }
            if (isIntransitive) {
                return "nt";
            }
            Matcher matcherPrefix = patternPrefix.matcher(verb);
            if (matcherPrefix.find()) {
                verb = matcherPrefix.group(1);
                continue;
            }
            Matcher matcherSuffix = patternSuffix.matcher(verb);
            if (!matcherSuffix.find()) break;
            verb = matcherSuffix.group(1) + "i";
        }
        return "xx";
    }

    public List<AnalyzedTokenReadings> tag(List<String> sentenceTokens) throws IOException {
        this.lazyInit();
        ArrayList<AnalyzedTokenReadings> tokenReadings = new ArrayList<AnalyzedTokenReadings>();
        for (String word : sentenceTokens) {
            ArrayList<AnalyzedToken> l = new ArrayList<AnalyzedToken>();
            if (word.length() > 1) {
                String lWord = word.toLowerCase();
                String[] manualTags = this.manualTagger.lookup(lWord);
                if (manualTags != null) {
                    for (int i = 0; i < manualTags.length; i += 2) {
                        String lemma = manualTags[2 * i];
                        String postag = manualTags[2 * i + 1];
                        l.add(new AnalyzedToken(word, postag, lemma));
                    }
                } else {
                    Matcher matcher = patternTabelvorto.matcher(lWord);
                    if (matcher.find()) {
                        String accusative;
                        String type1Group = matcher.group(1).substring(0, 1).toLowerCase();
                        String type2Group = matcher.group(2);
                        String plGroup = matcher.group(3);
                        String accGroup = matcher.group(4);
                        String type3Group = matcher.group(5);
                        if (accGroup == null) {
                            accusative = "xxx";
                        } else {
                            String string = accusative = accGroup.equalsIgnoreCase("n") ? "akz" : "nak";
                        }
                        String plural = plGroup == null ? " pn " : (plGroup.equalsIgnoreCase("j") ? " pl " : " np ");
                        String type = (type2Group == null ? type3Group : type2Group).toLowerCase();
                        l.add(new AnalyzedToken(word, "T " + accusative + plural + type1Group + " " + type, null));
                        matcher = patternTabelvortoAdverb.matcher(lWord);
                        if (matcher.find()) {
                            l.add(new AnalyzedToken(word, "E nak", lWord));
                        }
                    } else if (lWord.endsWith("o")) {
                        l.add(new AnalyzedToken(word, "O nak np", lWord));
                    } else if (lWord.length() >= 2 && lWord.endsWith("'")) {
                        l.add(new AnalyzedToken(word, "O nak np", lWord.substring(0, lWord.length() - 1) + "o"));
                    } else if (lWord.endsWith("oj")) {
                        l.add(new AnalyzedToken(word, "O nak pl", lWord.substring(0, lWord.length() - 1)));
                    } else if (lWord.endsWith("on")) {
                        l.add(new AnalyzedToken(word, "O akz np", lWord.substring(0, lWord.length() - 1)));
                    } else if (lWord.endsWith("ojn")) {
                        l.add(new AnalyzedToken(word, "O akz pl", lWord.substring(0, lWord.length() - 2)));
                    } else if (lWord.endsWith("a")) {
                        l.add(new AnalyzedToken(word, "A nak np", lWord));
                    } else if (lWord.endsWith("aj")) {
                        l.add(new AnalyzedToken(word, "A nak pl", lWord.substring(0, lWord.length() - 1)));
                    } else if (lWord.endsWith("an")) {
                        l.add(new AnalyzedToken(word, "A akz np", lWord.substring(0, lWord.length() - 1)));
                    } else if (lWord.endsWith("ajn")) {
                        l.add(new AnalyzedToken(word, "A akz pl", lWord.substring(0, lWord.length() - 2)));
                    } else if (lWord.endsWith("e")) {
                        l.add(new AnalyzedToken(word, "E nak", lWord));
                    } else if (lWord.endsWith("en")) {
                        l.add(new AnalyzedToken(word, "E akz", lWord.substring(0, lWord.length() - 1)));
                    } else {
                        matcher = patternVerb.matcher(lWord);
                        if (matcher.find()) {
                            String verb = matcher.group(1) + "i";
                            String tense = matcher.group(2);
                            String transitive = this.findTransitivity(verb);
                            l.add(new AnalyzedToken(word, "V " + transitive + " " + tense, verb));
                        } else {
                            l.add(new AnalyzedToken(word, null, null));
                        }
                    }
                    matcher = patternParticiple.matcher(lWord);
                    if (matcher.find() && !this.setNonParticiple.contains(matcher.group(1))) {
                        String verb = matcher.group(2) + "i";
                        String aio = matcher.group(3);
                        String antAt = matcher.group(4).equals("n") ? "n" : "-";
                        String aoe = matcher.group(5);
                        String plural = matcher.group(6).equals("j") ? "pl" : "np";
                        String accusative = matcher.group(7).equals("n") ? "akz" : "nak";
                        String transitive = this.findTransitivity(verb);
                        l.add(new AnalyzedToken(word, "C " + accusative + " " + plural + " " + transitive + " " + aio + " " + antAt + " " + aoe, verb));
                    }
                }
            } else {
                l.add(new AnalyzedToken(word, null, null));
            }
            tokenReadings.add(new AnalyzedTokenReadings(l, 0));
        }
        return tokenReadings;
    }

    public AnalyzedTokenReadings createNullToken(String token, int startPos) {
        return new AnalyzedTokenReadings(new AnalyzedToken(token, null, null), startPos);
    }

    public AnalyzedToken createToken(String token, String posTag) {
        return new AnalyzedToken(token, posTag, null);
    }
}

