/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.chunking;

import com.google.common.base.Function;
import edu.washington.cs.knowitall.regex.Match;
import edu.washington.cs.knowitall.regex.RegularExpression;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.Experimental;
import org.languagetool.chunking.ChunkTag;
import org.languagetool.chunking.ChunkTaggedToken;
import org.languagetool.chunking.Chunker;
import org.languagetool.chunking.TokenExpressionFactory;

@Experimental
public class RussianChunker
implements Chunker {
    private static final Set<String> FILTER_TAGS = new HashSet<String>(Arrays.asList("PP", "NPP", "NPS", "MayMissingYO", "VP", "SBAR", "ADJP", "DPT"));
    private static final TokenExpressionFactory FACTORY = new TokenExpressionFactory(false);
    private static final Map<String, String> SYNTAX_EXPANSION = new HashMap<String, String>();
    private static boolean debug;
    private static final List<RegularExpressionWithPhraseType> REGEXES1;
    private static final List<RegularExpressionWithPhraseType> REGEXES2;

    public static void setDebug(boolean debugMode) {
        debug = debugMode;
    }

    public static boolean isDebug() {
        return debug;
    }

    private static RegularExpressionWithPhraseType build(String expr, PhraseType phraseType) {
        return RussianChunker.build(expr, phraseType, false);
    }

    private static RegularExpressionWithPhraseType build(String expr, PhraseType phraseType, boolean overwrite) {
        String expandedExpr = expr;
        for (Map.Entry<String, String> entry : SYNTAX_EXPANSION.entrySet()) {
            expandedExpr = expandedExpr.replace(entry.getKey(), entry.getValue());
        }
        RegularExpression expression = RegularExpression.compile((String)expandedExpr, (Function)FACTORY);
        return new RegularExpressionWithPhraseType((RegularExpression<ChunkTaggedToken>)expression, phraseType, overwrite);
    }

    public void addChunkTags(List<AnalyzedTokenReadings> tokenReadings) {
        List<ChunkTaggedToken> chunkTaggedTokens = this.getBasicChunks(tokenReadings);
        for (RegularExpressionWithPhraseType regex : REGEXES2) {
            this.apply(regex, chunkTaggedTokens);
        }
        this.assignChunksToReadings(chunkTaggedTokens);
    }

    List<ChunkTaggedToken> getBasicChunks(List<AnalyzedTokenReadings> tokenReadings) {
        ArrayList<ChunkTaggedToken> chunkTaggedTokens = new ArrayList<ChunkTaggedToken>();
        for (AnalyzedTokenReadings tokenReading : tokenReadings) {
            if (tokenReading.isWhitespace() || tokenReading.getChunkTags().contains(new ChunkTag("MayMissingYO"))) continue;
            List<ChunkTag> chunkTags = Collections.singletonList(new ChunkTag("O"));
            ChunkTaggedToken chunkTaggedToken = new ChunkTaggedToken(tokenReading.getToken(), chunkTags, tokenReading);
            chunkTaggedTokens.add(chunkTaggedToken);
        }
        if (debug) {
            System.out.println("=============== CHUNKER INPUT ===============");
            System.out.println(this.getDebugString(chunkTaggedTokens));
        }
        for (RegularExpressionWithPhraseType regex : REGEXES1) {
            this.apply(regex, chunkTaggedTokens);
        }
        return chunkTaggedTokens;
    }

    private void apply(RegularExpressionWithPhraseType regex, List<ChunkTaggedToken> tokens) {
        String prevDebug = this.getDebugString(tokens);
        try {
            AffectedSpans affectedSpans = this.doApplyRegex(regex, tokens);
            String debug = this.getDebugString(tokens);
            if (!debug.equals(prevDebug)) {
                this.printDebugInfo(regex, affectedSpans, debug);
            }
        }
        catch (Exception e) {
            throw new RuntimeException("Could not apply chunk regexp '" + regex + "' to tokens: " + tokens, e);
        }
    }

    private void assignChunksToReadings(List<ChunkTaggedToken> chunkTaggedTokens) {
        for (ChunkTaggedToken taggedToken : chunkTaggedTokens) {
            AnalyzedTokenReadings readings = taggedToken.getReadings();
            if (readings == null) continue;
            readings.setChunkTags(taggedToken.getChunkTags());
        }
    }

    private AffectedSpans doApplyRegex(RegularExpressionWithPhraseType regex, List<ChunkTaggedToken> tokens) {
        List matches = regex.expression.findAll(tokens);
        ArrayList<Span> affectedSpans = new ArrayList<Span>();
        for (Match match : matches) {
            affectedSpans.add(new Span(match.startIndex(), match.endIndex()));
            for (int i = match.startIndex(); i < match.endIndex(); ++i) {
                ChunkTag newTag;
                ChunkTaggedToken token = tokens.get(i);
                ArrayList<ChunkTag> newChunkTags = new ArrayList<ChunkTag>();
                newChunkTags.addAll(token.getChunkTags());
                if (regex.overwrite) {
                    ArrayList<ChunkTag> filtered = new ArrayList<ChunkTag>();
                    for (ChunkTag newChunkTag : newChunkTags) {
                        if (FILTER_TAGS.contains(newChunkTag.getChunkTag())) continue;
                        filtered.add(newChunkTag);
                    }
                    newChunkTags = filtered;
                }
                if (!newChunkTags.contains(newTag = this.getChunkTag(regex, (Match<ChunkTaggedToken>)match, i))) {
                    newChunkTags.add(newTag);
                    newChunkTags.remove(new ChunkTag("O"));
                }
                tokens.set(i, new ChunkTaggedToken(token.getToken(), newChunkTags, token.getReadings()));
            }
        }
        return new AffectedSpans(affectedSpans);
    }

    private ChunkTag getChunkTag(RegularExpressionWithPhraseType regex, Match<ChunkTaggedToken> match, int i) {
        ChunkTag newTag = regex.phraseType == PhraseType.NP ? (i == match.startIndex() ? new ChunkTag("B-NP") : new ChunkTag("I-NP")) : (regex.phraseType == PhraseType.NPP ? (i == match.startIndex() ? new ChunkTag("B-NP-plural") : new ChunkTag("I-NP-plural")) : (regex.phraseType == PhraseType.VP ? (i == match.startIndex() ? new ChunkTag("B-VP") : new ChunkTag("I-VP")) : (regex.phraseType == PhraseType.ADJP ? (i == match.startIndex() ? new ChunkTag("B-ADJP") : new ChunkTag("I-ADJP")) : (regex.phraseType == PhraseType.DPT ? (i == match.startIndex() ? new ChunkTag("B-DPT") : new ChunkTag("I-DPT")) : new ChunkTag(regex.phraseType.name())))));
        return newTag;
    }

    private void printDebugInfo(RegularExpressionWithPhraseType regex, AffectedSpans affectedSpans, String debug) {
        System.out.println("=== Applied " + regex + " ===");
        if (regex.overwrite) {
            System.out.println("Note: overwrite mode, replacing old " + FILTER_TAGS + " tags");
        }
        String[] debugLines = debug.split("\n");
        int i = 0;
        for (String debugLine : debugLines) {
            if (affectedSpans.isAffected(i)) {
                System.out.println(debugLine.replaceFirst("^  ", " *"));
            } else {
                System.out.println(debugLine);
            }
            ++i;
        }
        System.out.println();
    }

    private String getDebugString(List<ChunkTaggedToken> tokens) {
        if (!debug) {
            return "";
        }
        StringBuilder sb = new StringBuilder();
        for (ChunkTaggedToken token : tokens) {
            String tokenReadingStr = token.getReadings().toString().replaceFirst(Pattern.quote(token.getToken()) + "\\[", "[");
            sb.append("  ").append(token).append(" -- ").append(tokenReadingStr).append('\n');
        }
        return sb.toString();
    }

    static {
        SYNTAX_EXPANSION.put("<NP>", "<chunk=B-NP> <chunk=I-NP>*");
        SYNTAX_EXPANSION.put("<VP>", "<chunk=B-VP> <chunk=I-VP>*");
        SYNTAX_EXPANSION.put("<ADJP>", "<chunk=B-ADJP> <chunk=I-ADJP>*");
        SYNTAX_EXPANSION.put("<DPT>", "<chunk=B-DPT> <chunk=I-DPT>*");
        debug = false;
        REGEXES1 = Arrays.asList(RussianChunker.build("<posre='NN:(Name|Fam|Patr):.*'> <posre='NN:(Name|Fam|Patr):.*'>+ ", PhraseType.NP, true), RussianChunker.build("<posre='NN:Fam:.*'> <regexCS=[\u0410-\u042f\u0401]> <.> <regexCS=[\u0410-\u042f\u0401]> <.> ", PhraseType.NP, true), RussianChunker.build("<regexCS=[\u0410-\u042f\u0401]> <.> <regexCS=[\u0410-\u042f\u0401]> <.> <posre='NN:Fam:.*'> ", PhraseType.NP, true), RussianChunker.build("<posre='VB:.*:.*' & !posre='NN:.*'>* ", PhraseType.VP, false), RussianChunker.build("<\u0435\u0441\u043b\u0438>", PhraseType.SBAR), RussianChunker.build("<\u043f\u043e\u044d\u0442\u043e\u043c\u0443>", PhraseType.SBAR), RussianChunker.build("<posre='ADJ:Posit:.*:.*'> <posre='NN:(Anim|Inanim):.*' & !posre='NN:(Anim|Inanim):.*:(R|D|T|P)'> ", PhraseType.NP, true), RussianChunker.build("<posre='ADJ:Posit:.*:.*'> <posre='NN:(Anim|Inanim):.*' & !posre='NN:(Anim|Inanim):.*:(R|D|T|P)'> <posre='NN:(Anim|Inanim):.*'> ", PhraseType.NP, true), RussianChunker.build("<posre='ADJ:Posit:.*:.*'> <posre='NN:(Anim|Inanim):.*' & !posre='NN:(Anim|Inanim):.*:(Nom|V)'> <posre='NN:(Anim|Inanim):.*:(Nom|V)' & !posre='NN:(Anim|Inanim):.*:(R|D|T|P)'> ", PhraseType.ADJP, true), RussianChunker.build("<posre='DPT:.*:.*' & !pos='PREP'> ", PhraseType.DPT), RussianChunker.build("<posre='DPT:.*:.*' & !pos='PREP'> <posre='NN:.*:.*:(R|D|T|P)' > ", PhraseType.DPT, true), RussianChunker.build("<posre='DPT:.*:.*' & !pos='PREP'> <posre='PREP'> <posre='NN:.*:.*:(R|D|T|P)' > ", PhraseType.DPT, true), RussianChunker.build("<posre='PT:.*:.*'> ", PhraseType.ADJP), RussianChunker.build("<posre='PT:.*:.*'> <pos='ADV' > ", PhraseType.ADJP, true), RussianChunker.build("<posre='PT:.*:.*'> <posre='NN:.*:.*:(R|D|T|P)' > ", PhraseType.ADJP, true), RussianChunker.build("<posre='PT:.*:.*'> <posre='PREP'> <posre='NN:.*:.*:(R|D|T|P|V)' > ", PhraseType.ADJP, true), RussianChunker.build("<posre='PT:.*:.*'> <posre='PREP'> <posre='ADJ:.*:.*:(R|D|T|P|V)' > <posre='NN:.*:.*:(R|D|T|P|V)' > ", PhraseType.ADJP, true), RussianChunker.build("<posre='PT:.*:.*'> <posre='NN:(Anim|Inanim):.*' & !posre='NN:(Anim|Inanim):.*:(Nom|V)'> <posre='NN:(Anim|Inanim):.*:(Nom|V)' & !posre='NN:(Anim|Inanim):.*:(R|D|T|P)'> ", PhraseType.ADJP, true), RussianChunker.build("<posre='PT:.*:.*'> <posre='PNN:.*' & !posre='PNN:.*:Nom:.*'> <posre='NN:(Anim|Inanim):.*:(Nom|V)' & !posre='NN:(Anim|Inanim):.*:(R|D|T|P)'> ", PhraseType.ADJP, true), RussianChunker.build("<posre='PT:.*:.*'> <posre='ADJ:.*:.*' > ", PhraseType.ADJP, false), RussianChunker.build("<\u0442\u043e\u0432>", PhraseType.NP));
        REGEXES2 = Arrays.asList(RussianChunker.build("<posre=NN:Name:.*> <\u0438> <posre=NN:Name:.*>", PhraseType.NPP, true), RussianChunker.build("<posre=NN:Name:.*> <\u0438\u043b\u0438> <posre=NN:Name:.*>", PhraseType.NPP, true), RussianChunker.build("<\u043d\u0435> <posre='VB:.*:.*' & !posre='NN:.*'>* ", PhraseType.VP, false));
    }

    private static class RegularExpressionWithPhraseType {
        final RegularExpression<ChunkTaggedToken> expression;
        final PhraseType phraseType;
        final boolean overwrite;

        RegularExpressionWithPhraseType(RegularExpression<ChunkTaggedToken> expression, PhraseType phraseType, boolean overwrite) {
            this.expression = expression;
            this.phraseType = phraseType;
            this.overwrite = overwrite;
        }

        public String toString() {
            return (Object)((Object)this.phraseType) + " <= " + this.expression + " (overwrite: " + this.overwrite + ")";
        }
    }

    private static class AffectedSpans {
        final List<Span> spans;

        AffectedSpans(List<Span> spans) {
            this.spans = spans;
        }

        boolean isAffected(int pos) {
            for (Span span : this.spans) {
                if (pos < span.startIndex || pos >= span.endIndex) continue;
                return true;
            }
            return false;
        }
    }

    private static class Span {
        final int startIndex;
        final int endIndex;

        Span(int startIndex, int endIndex) {
            this.startIndex = startIndex;
            this.endIndex = endIndex;
        }
    }

    static enum PhraseType {
        NP,
        NPS,
        NPP,
        PP,
        MayMissingYO,
        VP,
        SBAR,
        ADJP,
        DPT;

    }
}

