/*
 * Decompiled with CFR 0.152.
 */
package com.worksap.nlp.sudachi;

import com.worksap.nlp.sudachi.InputTextPlugin;
import com.worksap.nlp.sudachi.LatticeImpl;
import com.worksap.nlp.sudachi.LatticeNode;
import com.worksap.nlp.sudachi.LatticeNodeImpl;
import com.worksap.nlp.sudachi.Morpheme;
import com.worksap.nlp.sudachi.MorphemeList;
import com.worksap.nlp.sudachi.OovProviderPlugin;
import com.worksap.nlp.sudachi.PathRewritePlugin;
import com.worksap.nlp.sudachi.Tokenizer;
import com.worksap.nlp.sudachi.UTF8InputText;
import com.worksap.nlp.sudachi.UTF8InputTextBuilder;
import com.worksap.nlp.sudachi.dictionary.CategoryType;
import com.worksap.nlp.sudachi.dictionary.Grammar;
import com.worksap.nlp.sudachi.dictionary.Lexicon;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;

class JapaneseTokenizer
implements Tokenizer {
    Grammar grammar;
    Lexicon lexicon;
    List<InputTextPlugin> inputTextPlugins;
    List<OovProviderPlugin> oovProviderPlugins;
    List<PathRewritePlugin> pathRewritePlugins;
    OovProviderPlugin defaultOovProvider;
    PrintStream dumpOutput;
    LatticeImpl lattice;

    JapaneseTokenizer(Grammar grammar, Lexicon lexicon, List<InputTextPlugin> inputTextPlugins, List<OovProviderPlugin> oovProviderPlugins, List<PathRewritePlugin> pathRewritePlugins) {
        this.grammar = grammar;
        this.lexicon = lexicon;
        this.inputTextPlugins = inputTextPlugins;
        this.oovProviderPlugins = oovProviderPlugins;
        this.pathRewritePlugins = pathRewritePlugins;
        this.lattice = new LatticeImpl(grammar);
        if (!oovProviderPlugins.isEmpty()) {
            this.defaultOovProvider = oovProviderPlugins.get(oovProviderPlugins.size() - 1);
        }
    }

    @Override
    public List<Morpheme> tokenize(Tokenizer.SplitMode mode, String text) {
        if (text.isEmpty()) {
            return Collections.emptyList();
        }
        UTF8InputTextBuilder builder = new UTF8InputTextBuilder(text, this.grammar);
        for (InputTextPlugin plugin : this.inputTextPlugins) {
            plugin.rewrite(builder);
        }
        UTF8InputText input = builder.build();
        if (this.dumpOutput != null) {
            this.dumpOutput.println("=== Input dump:");
            this.dumpOutput.println(input.getText());
        }
        this.buildLattice(input);
        if (this.dumpOutput != null) {
            this.dumpOutput.println("=== Lattice dump:");
            this.lattice.dump(this.dumpOutput);
        }
        List<LatticeNode> path = this.lattice.getBestPath();
        if (this.dumpOutput != null) {
            this.dumpOutput.println("=== Before rewriting:");
            this.dumpPath(path);
        }
        for (PathRewritePlugin plugin : this.pathRewritePlugins) {
            plugin.rewrite(input, path, this.lattice);
        }
        this.lattice.clear();
        if (mode != Tokenizer.SplitMode.C) {
            path = this.splitPath(path, mode);
        }
        if (this.dumpOutput != null) {
            this.dumpOutput.println("=== After rewriting:");
            this.dumpPath(path);
            this.dumpOutput.println("===");
        }
        return new MorphemeList(input, this.grammar, this.lexicon, path);
    }

    @Override
    public void setDumpOutput(PrintStream output) {
        this.dumpOutput = output;
    }

    LatticeImpl buildLattice(UTF8InputText input) {
        byte[] bytes = input.getByteText();
        this.lattice.resize(bytes.length);
        for (int i = 0; i < bytes.length; ++i) {
            if (!input.canBow(i) || !this.lattice.hasPreviousNode(i)) continue;
            Iterator<int[]> iterator = this.lexicon.lookup(bytes, i);
            boolean hasWords = false;
            while (iterator.hasNext()) {
                int[] r = iterator.next();
                int wordId = r[0];
                int end = r[1];
                if (end < bytes.length && !input.canBow(end)) continue;
                LatticeNodeImpl n = new LatticeNodeImpl(this.lexicon, this.lexicon.getLeftId(wordId), this.lexicon.getRightId(wordId), this.lexicon.getCost(wordId), wordId);
                this.lattice.insert(i, end, n);
                hasWords = true;
            }
            if (!input.getCharCategoryTypes(i).contains((Object)CategoryType.NOOOVBOW)) {
                for (OovProviderPlugin plugin : this.oovProviderPlugins) {
                    for (LatticeNode node : plugin.getOOV(input, i, hasWords)) {
                        hasWords = true;
                        this.lattice.insert(node.getBegin(), node.getEnd(), node);
                    }
                }
            }
            if (!hasWords && this.defaultOovProvider != null) {
                for (LatticeNode node : this.defaultOovProvider.getOOV(input, i, hasWords)) {
                    hasWords = true;
                    this.lattice.insert(node.getBegin(), node.getEnd(), node);
                }
            }
            if (hasWords) continue;
            throw new IllegalStateException("there is no morpheme at " + i);
        }
        this.lattice.connectEosNode();
        return this.lattice;
    }

    List<LatticeNode> splitPath(List<LatticeNode> path, Tokenizer.SplitMode mode) {
        ArrayList<LatticeNode> newPath = new ArrayList<LatticeNode>();
        for (LatticeNode node : path) {
            int[] wids = mode == Tokenizer.SplitMode.A ? node.getWordInfo().getAunitSplit() : node.getWordInfo().getBunitSplit();
            if (wids.length == 0 || wids.length == 1) {
                newPath.add(node);
                continue;
            }
            int offset = node.getBegin();
            for (int wid : wids) {
                LatticeNodeImpl n = new LatticeNodeImpl(this.lexicon, 0, 0, 0, wid);
                n.begin = offset;
                n.end = offset += n.getWordInfo().getLength();
                newPath.add(n);
            }
        }
        return newPath;
    }

    void dumpPath(List<LatticeNode> path) {
        int i = 0;
        for (LatticeNode node : path) {
            this.dumpOutput.println(String.format("%d: %s", i, node.toString()));
            ++i;
        }
    }
}

