/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.tagging.disambiguation.uk;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.languagetool.AnalyzedSentence;
import org.languagetool.AnalyzedToken;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.Language;
import org.languagetool.language.Ukrainian;
import org.languagetool.rules.uk.CaseGovernmentHelper;
import org.languagetool.rules.uk.LemmaHelper;
import org.languagetool.tagging.disambiguation.AbstractDisambiguator;
import org.languagetool.tagging.disambiguation.Disambiguator;
import org.languagetool.tagging.disambiguation.rules.XmlRuleDisambiguator;
import org.languagetool.tagging.disambiguation.uk.SimpleDisambiguator;
import org.languagetool.tagging.disambiguation.uk.UkrainianMultiwordChunker;
import org.languagetool.tagging.uk.PosTagHelper;
import org.languagetool.tools.StringTools;

public class UkrainianHybridDisambiguator
extends AbstractDisambiguator {
    private static final String LAST_NAME_TAG = ":lname";
    private static final Pattern INITIAL_REGEX = Pattern.compile("[\u0410-\u042f\u0406\u0407\u0404\u0490]\\.");
    private static final Pattern INANIM_VKLY = Pattern.compile("noun:inanim:.:v_kly.*");
    private static final Pattern PLURAL_NAME = Pattern.compile("noun:anim:p:.*:fname.*");
    private static final String PLURAL_LNAME = "noun:anim:p:.*:[lp]name.*";
    private static final String ST_ABBR = "\u0441\u0442.";
    private static final Pattern LATIN_DIGITS_PATTERN = Pattern.compile("[XIV\u0425\u0406]+([\u2013\u2014-][XIV\u0425\u0406]+)?");
    private static final Pattern DIGITS_PATTERN = Pattern.compile("[0-9]+([\u2013\u2014-][0-9]+)?");
    private static final Pattern STATION_NAME_PATTERN = Pattern.compile("\u043c\u0435\u0442\u0440\u043e|[\u0410-\u042f][\u0430-\u044f\u0456\u0457\u0454\u0491'-]+");
    private final Disambiguator chunker = new UkrainianMultiwordChunker("/uk/multiwords.txt", true);
    private final Disambiguator disambiguator = new XmlRuleDisambiguator((Language)new Ukrainian());
    private final SimpleDisambiguator simpleDisambiguator = new SimpleDisambiguator();
    static final Set<String> V_MIS_PREPS = CaseGovernmentHelper.CASE_GOVERNMENT_MAP.entrySet().stream().filter(e -> ((Set)e.getValue()).contains("v_mis")).map(Map.Entry::getKey).collect(Collectors.toSet());
    static final Set<String> V_NON_MIS_PREPS = CaseGovernmentHelper.CASE_GOVERNMENT_MAP.entrySet().stream().filter(e -> !((Set)e.getValue()).contains("v_mis")).map(Map.Entry::getKey).collect(Collectors.toSet());
    private static final Pattern PUNCT_AFTER_KLY_PATTERN;
    private static final Pattern ADJ_V_KLY_PATTERN;
    private static final Pattern PREP_PATTERN;

    public final AnalyzedSentence disambiguate(AnalyzedSentence input) throws IOException {
        this.preDisambiguate(input);
        return this.disambiguator.disambiguate(this.chunker.disambiguate(input));
    }

    public AnalyzedSentence preDisambiguate(AnalyzedSentence input) {
        this.removeVmis(input);
        this.retagInitials(input);
        this.retagUnknownInitials(input);
        this.removeInanimVKly(input);
        this.removePluralForNames(input);
        this.removeLowerCaseHomonymsForAbbreviations(input);
        this.removeLowerCaseBadForUpperCaseGood(input);
        this.simpleDisambiguator.removeRareForms(input);
        this.disambiguateSt(input);
        return input;
    }

    private void removeVmis(AnalyzedSentence input) {
        AnalyzedTokenReadings[] tokens = input.getTokensWithoutWhitespace();
        boolean startCheck = false;
        for (int i = 1; i < tokens.length; ++i) {
            List analyzedTokens = tokens[i].getReadings();
            if (tokens[i].getToken() == null) continue;
            String lowerCaseToken = tokens[i].getToken().toLowerCase();
            boolean hasPrep = PosTagHelper.hasPosTagPart(tokens[i], "prep");
            if (!startCheck) {
                if (hasPrep) {
                    startCheck = true;
                } else if (lowerCaseToken.matches("[\u0430-\u044f\u0456\u0457\u0454\u0491a-z0-9].*")) {
                    if (StringUtils.isAllLowerCase((CharSequence)tokens[i].getToken())) continue;
                    startCheck = true;
                }
            }
            if (hasPrep && V_MIS_PREPS.contains(lowerCaseToken)) {
                return;
            }
            if (!this.canRemoveVmis(analyzedTokens)) continue;
            for (AnalyzedToken analyzedToken : analyzedTokens) {
                if (!PosTagHelper.hasPosTagPart(analyzedToken, "v_mis")) continue;
                tokens[i].removeReading(analyzedToken, "dis_v_mis");
            }
        }
    }

    private boolean canRemoveVmis(List<AnalyzedToken> analyzedTokens) {
        boolean foundVmis = false;
        boolean foundOther = false;
        for (AnalyzedToken token : analyzedTokens) {
            if (PosTagHelper.hasPosTagPart(token, "v_mis")) {
                foundVmis = true;
            } else if (token.getPOSTag() != null && !token.getPOSTag().endsWith("_END")) {
                foundOther = true;
            }
            if (!foundVmis || !foundOther) continue;
            break;
        }
        return foundVmis && foundOther;
    }

    private void removeLowerCaseBadForUpperCaseGood(AnalyzedSentence input) {
        AnalyzedTokenReadings[] tokens = input.getTokensWithoutWhitespace();
        for (int i = 1; i < tokens.length; ++i) {
            if (tokens[i].getReadings().size() <= 1 || !StringTools.isCapitalizedWord((String)tokens[i].getToken()) || !LemmaHelper.hasLemma(tokens[i], Pattern.compile("[\u0410-\u042f\u0406\u0407\u0404\u0490][\u0430-\u044f\u0456\u0457\u0454\u0491'-].*"), Pattern.compile(".*?:prop"))) continue;
            String lowerLemmaToCheck = tokens[i].getAnalyzedToken(0).getLemma().toLowerCase();
            List analyzedTokens = tokens[i].getReadings();
            for (int j = analyzedTokens.size() - 1; j >= 0; --j) {
                AnalyzedToken analyzedToken = (AnalyzedToken)analyzedTokens.get(j);
                if (!PosTagHelper.hasPosTagPart(analyzedToken, ":bad") || !lowerLemmaToCheck.equals(analyzedToken.getLemma())) continue;
                tokens[i].removeReading(analyzedToken, ((Object)((Object)this)).toString());
            }
        }
    }

    private void removeLowerCaseHomonymsForAbbreviations(AnalyzedSentence input) {
        AnalyzedTokenReadings[] tokens = input.getTokensWithoutWhitespace();
        for (int i = 1; i < tokens.length; ++i) {
            if (!StringUtils.isAllUpperCase((CharSequence)tokens[i].getToken()) || !PosTagHelper.hasPosTagPart(tokens[i], ":abbr")) continue;
            List analyzedTokens = tokens[i].getReadings();
            for (int j = analyzedTokens.size() - 1; j >= 0; --j) {
                AnalyzedToken analyzedToken = (AnalyzedToken)analyzedTokens.get(j);
                if (PosTagHelper.hasPosTagPart(analyzedToken, ":abbr") || "SENT_END".equals(analyzedToken)) continue;
                tokens[i].removeReading(analyzedToken, ((Object)((Object)this)).toString());
            }
        }
    }

    private void removeInanimVKly(AnalyzedSentence input) {
        AnalyzedTokenReadings[] tokens = input.getTokensWithoutWhitespace();
        for (int i = 1; i < tokens.length; ++i) {
            String posTag;
            List analyzedTokens = tokens[i].getReadings();
            if (!PosTagHelper.hasPosTag((Collection<AnalyzedToken>)analyzedTokens, Pattern.compile("noun:inanim:.:v_kly.*")) || this.likelyVklyContext(tokens, i)) continue;
            ArrayList inanimVklyReadings = new ArrayList();
            boolean otherFound = false;
            for (int j = 0; j < analyzedTokens.size() && (posTag = ((AnalyzedToken)analyzedTokens.get(j)).getPOSTag()) != null; ++j) {
                if (posTag.equals("SENT_END")) continue;
                if (INANIM_VKLY.matcher(posTag).matches()) {
                    inanimVklyReadings.add(analyzedTokens.get(j));
                    continue;
                }
                otherFound = true;
            }
            if (inanimVklyReadings.size() <= 0 || !otherFound) continue;
            for (AnalyzedToken analyzedToken : inanimVklyReadings) {
                tokens[i].removeReading(analyzedToken, ((Object)((Object)this)).toString());
            }
        }
    }

    private boolean likelyVklyContext(AnalyzedTokenReadings[] tokens, int i) {
        return !(i >= tokens.length - 1 || !"\u043e".equalsIgnoreCase(tokens[i - 1].getToken()) && PosTagHelper.hasPosTag(tokens[i - 1], PREP_PATTERN) || !PUNCT_AFTER_KLY_PATTERN.matcher(tokens[i + 1].getToken()).matches() || !PosTagHelper.hasPosTag(tokens[i - 1], ADJ_V_KLY_PATTERN) && !"\u043e".equalsIgnoreCase(tokens[i - 1].getToken()));
    }

    private void removePluralForNames(AnalyzedSentence input) {
        AnalyzedTokenReadings[] tokens = input.getTokensWithoutWhitespace();
        for (int i = 1; i < tokens.length; ++i) {
            String posTag;
            List analyzedTokens = tokens[i].getReadings();
            if (i > 1 && (PosTagHelper.hasPosTag(tokens[i - 1], "adj:p:.*") || PosTagHelper.hasPosTag(tokens[i - 1], ".*num.*") || LemmaHelper.hasLemma(tokens[i - 1], Arrays.asList("\u0431\u0430\u0433\u0430\u0442\u043e", "\u043c\u0430\u043b\u043e", "\u043f\u0456\u0432\u0441\u043e\u0442\u043d\u044f", "\u0441\u043e\u0442\u043d\u044f"))) || i < tokens.length - 1 && PosTagHelper.hasPosTag(tokens[i + 1], PLURAL_LNAME) || i < tokens.length - 3 && PosTagHelper.hasPosTagPart(tokens[i + 1], LAST_NAME_TAG) && PosTagHelper.hasPosTagPart(tokens[i + 3], LAST_NAME_TAG)) continue;
            ArrayList pluralNameReadings = new ArrayList();
            boolean otherFound = false;
            for (int j = 0; j < analyzedTokens.size() && (posTag = ((AnalyzedToken)analyzedTokens.get(j)).getPOSTag()) != null; ++j) {
                if (posTag.equals("SENT_END")) continue;
                if (PLURAL_NAME.matcher(posTag).matches()) {
                    pluralNameReadings.add(analyzedTokens.get(j));
                    continue;
                }
                otherFound = true;
            }
            if (pluralNameReadings.size() <= 0 || !otherFound) continue;
            for (AnalyzedToken analyzedToken : pluralNameReadings) {
                tokens[i].removeReading(analyzedToken, ((Object)((Object)this)).toString());
            }
        }
    }

    private void retagInitials(AnalyzedSentence input) {
        AnalyzedTokenReadings[] tokens = input.getTokens();
        ArrayList<Integer> initialsIdxs = new ArrayList<Integer>();
        AnalyzedTokenReadings lastName = null;
        for (int i = 1; i < tokens.length; ++i) {
            if (tokens[i].isWhitespace()) continue;
            if (tokens[i].hasPartialPosTag(LAST_NAME_TAG)) {
                lastName = tokens[i];
                if (initialsIdxs.size() <= 0) continue;
                UkrainianHybridDisambiguator.checkForInitialRetag(lastName, initialsIdxs, tokens);
                lastName = null;
                initialsIdxs.clear();
                continue;
            }
            if (UkrainianHybridDisambiguator.isInitial(tokens, i)) {
                initialsIdxs.add(i);
                continue;
            }
            UkrainianHybridDisambiguator.checkForInitialRetag(lastName, initialsIdxs, tokens);
            lastName = null;
            initialsIdxs.clear();
        }
        UkrainianHybridDisambiguator.checkForInitialRetag(lastName, initialsIdxs, tokens);
    }

    private void retagUnknownInitials(AnalyzedSentence input) {
        AnalyzedTokenReadings[] tokens = input.getTokens();
        for (int i = 1; i < tokens.length; ++i) {
            if (!tokens[i].getToken().endsWith(".") || !INITIAL_REGEX.matcher(tokens[i].getToken()).matches() || PosTagHelper.hasPosTagPart(tokens[i], "name")) continue;
            for (AnalyzedToken tokenReading : tokens[i].getReadings()) {
                tokens[i].removeReading(tokenReading, "dis_unknown_initials");
            }
            AnalyzedToken newToken = new AnalyzedToken(tokens[i].getToken(), "noninf:abbr", null);
            tokens[i].addReading(newToken, "dis_unknown_initials");
        }
    }

    private static void checkForInitialRetag(AnalyzedTokenReadings lastName, List<Integer> initialsIdxs, AnalyzedTokenReadings[] tokens) {
        if (lastName != null && (initialsIdxs.size() == 1 || initialsIdxs.size() == 2)) {
            AnalyzedTokenReadings newReadings;
            int fnamePos = initialsIdxs.get(0);
            tokens[fnamePos] = newReadings = UkrainianHybridDisambiguator.getInitialReadings(tokens[fnamePos], lastName, "fname");
            if (initialsIdxs.size() == 2) {
                AnalyzedTokenReadings newReadings2;
                int pnamePos = initialsIdxs.get(1);
                tokens[pnamePos] = newReadings2 = UkrainianHybridDisambiguator.getInitialReadings(tokens[pnamePos], lastName, "pname");
            }
        }
    }

    private void disambiguateSt(AnalyzedSentence input) {
        AnalyzedTokenReadings[] tokens = input.getTokensWithoutWhitespace();
        for (int i = 1; i < tokens.length; ++i) {
            Pattern pattern;
            if (!ST_ABBR.equals(tokens[i].getToken())) continue;
            if (i < tokens.length - 1 && tokens[i + 1].getToken().matches("[0-9]+([.,\u2013\u2014-][0-9]+)?")) {
                pattern = Pattern.compile("noun:inanim:f:.*");
                if (i > 2 && ST_ABBR.equals(tokens[i - 1].getToken())) {
                    pattern = Pattern.compile("noun:inanim:p:.*");
                    UkrainianHybridDisambiguator.remove(tokens[i - 1], pattern);
                }
                UkrainianHybridDisambiguator.remove(tokens[i], pattern);
                continue;
            }
            if (i < tokens.length - 1) {
                if (LemmaHelper.hasLemma(tokens[i + 1], "\u043b\u043e\u0436\u043a\u0430") || tokens[i + 1].getToken().equals("\u043b.")) {
                    pattern = Pattern.compile("adj:[fp]:.*");
                    UkrainianHybridDisambiguator.remove(tokens[i], pattern);
                    ++i;
                    continue;
                }
                if (LemmaHelper.hasLemma(tokens[i + 1], Arrays.asList("\u043b\u0435\u0439\u0442\u0435\u043d\u0430\u043d\u0442", "\u0441\u0435\u0440\u0436\u0430\u043d\u0442", "\u0441\u043e\u043b\u0434\u0430\u0442", "\u043d\u0430\u0443\u043a\u043e\u0432\u0438\u0439", "\u043c\u0435\u0434\u0441\u0435\u0441\u0442\u0440\u0430"))) {
                    pattern = Pattern.compile("adj:m:.*");
                    UkrainianHybridDisambiguator.remove(tokens[i], pattern);
                    ++i;
                    continue;
                }
                if (STATION_NAME_PATTERN.matcher(tokens[i + 1].getToken()).matches()) {
                    pattern = Pattern.compile("noun:inanim:f:.*");
                    UkrainianHybridDisambiguator.remove(tokens[i], pattern);
                    ++i;
                    continue;
                }
            }
            if (i <= 1) continue;
            if (LATIN_DIGITS_PATTERN.matcher(tokens[i - 1].getToken()).matches()) {
                pattern = Pattern.compile("noun:inanim:n:.*");
                if (i < tokens.length - 1 && ST_ABBR.equals(tokens[i + 1].getToken())) {
                    pattern = Pattern.compile("noun:inanim:p:.*");
                    UkrainianHybridDisambiguator.remove(tokens[i + 1], pattern);
                }
                UkrainianHybridDisambiguator.remove(tokens[i], pattern);
                ++i;
                continue;
            }
            if (!DIGITS_PATTERN.matcher(tokens[i - 1].getToken()).matches()) continue;
            pattern = Pattern.compile("noun:inanim:[nf]:.*");
            if (i < tokens.length - 1 && ST_ABBR.equals(tokens[i + 1].getToken())) {
                pattern = Pattern.compile("noun:inanim:p:.*");
                UkrainianHybridDisambiguator.remove(tokens[i + 1], pattern);
            }
            UkrainianHybridDisambiguator.remove(tokens[i], pattern);
            ++i;
        }
    }

    private static void remove(AnalyzedTokenReadings readings, Pattern pattern) {
        List analyzedTokens = readings.getReadings();
        for (int j = analyzedTokens.size() - 1; j >= 0; --j) {
            AnalyzedToken analyzedToken = (AnalyzedToken)analyzedTokens.get(j);
            if ("SENT_END".equals(analyzedToken.getPOSTag()) || PosTagHelper.hasPosTag(analyzedToken, pattern)) continue;
            readings.removeReading(analyzedToken, "UkranianHybridDisambiguator");
        }
    }

    private static AnalyzedTokenReadings getInitialReadings(AnalyzedTokenReadings initialsReadings, AnalyzedTokenReadings lnameTokens, String initialType) {
        ArrayList<AnalyzedToken> newTokens = new ArrayList<AnalyzedToken>();
        for (AnalyzedToken lnameToken : lnameTokens.getReadings()) {
            String lnamePosTag = lnameToken.getPOSTag();
            if (lnamePosTag == null || !lnamePosTag.contains(LAST_NAME_TAG)) continue;
            String initialsToken = initialsReadings.getAnalyzedToken(0).getToken();
            AnalyzedToken newToken = new AnalyzedToken(initialsToken, lnamePosTag.replace(LAST_NAME_TAG, ":" + initialType + ":abbr"), initialsToken);
            newToken.setWhitespaceBefore(initialsReadings.isWhitespaceBefore());
            newTokens.add(newToken);
        }
        return new AnalyzedTokenReadings(newTokens, initialsReadings.getStartPos());
    }

    private static boolean isInitial(AnalyzedTokenReadings[] tokens, int pos) {
        return tokens[pos].getToken().endsWith(".") && INITIAL_REGEX.matcher(tokens[pos].getToken()).matches();
    }

    static {
        V_MIS_PREPS.add("y");
        V_MIS_PREPS.add("B");
        PUNCT_AFTER_KLY_PATTERN = Pattern.compile("[!?,\u00bb\"\u201c\u201d\u2026]|[\\.!?]{2,3}");
        ADJ_V_KLY_PATTERN = Pattern.compile("adj:.:v_kly.*");
        PREP_PATTERN = Pattern.compile("prep.*");
    }
}

