/*
 * Decompiled with CFR 0.152.
 */
package com.worksap.nlp.sudachi;

import com.worksap.nlp.sudachi.IOTools;
import com.worksap.nlp.sudachi.InputTextPlugin;
import com.worksap.nlp.sudachi.LatticeImpl;
import com.worksap.nlp.sudachi.LatticeNode;
import com.worksap.nlp.sudachi.LatticeNodeImpl;
import com.worksap.nlp.sudachi.MorphemeList;
import com.worksap.nlp.sudachi.OovProviderPlugin;
import com.worksap.nlp.sudachi.PathRewritePlugin;
import com.worksap.nlp.sudachi.SentenceSplittingAnalysis;
import com.worksap.nlp.sudachi.Tokenizer;
import com.worksap.nlp.sudachi.UTF8InputText;
import com.worksap.nlp.sudachi.UTF8InputTextBuilder;
import com.worksap.nlp.sudachi.WordMask;
import com.worksap.nlp.sudachi.dictionary.CategoryType;
import com.worksap.nlp.sudachi.dictionary.Grammar;
import com.worksap.nlp.sudachi.dictionary.Lexicon;
import com.worksap.nlp.sudachi.dictionary.LexiconSet;
import com.worksap.nlp.sudachi.dictionary.WordLookup;
import java.io.IOException;
import java.io.PrintStream;
import java.io.Reader;
import java.io.StringWriter;
import java.io.Writer;
import java.nio.CharBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import javax.json.Json;
import javax.json.JsonArrayBuilder;
import javax.json.JsonObjectBuilder;
import javax.json.JsonWriter;

class JapaneseTokenizer
implements Tokenizer {
    Grammar grammar;
    LexiconSet lexicon;
    List<InputTextPlugin> inputTextPlugins;
    List<OovProviderPlugin> oovProviderPlugins;
    List<PathRewritePlugin> pathRewritePlugins;
    OovProviderPlugin defaultOovProvider;
    PrintStream dumpOutput;
    JsonObjectBuilder jsonBuilder;
    boolean allowEmptyMorpheme;
    LatticeImpl lattice;

    JapaneseTokenizer(Grammar grammar, Lexicon lexicon, List<InputTextPlugin> inputTextPlugins, List<OovProviderPlugin> oovProviderPlugins, List<PathRewritePlugin> pathRewritePlugins) {
        this.grammar = grammar;
        this.lexicon = (LexiconSet)lexicon;
        this.inputTextPlugins = inputTextPlugins;
        this.oovProviderPlugins = oovProviderPlugins;
        this.pathRewritePlugins = pathRewritePlugins;
        this.lattice = new LatticeImpl(grammar);
        this.allowEmptyMorpheme = true;
        if (!oovProviderPlugins.isEmpty()) {
            this.defaultOovProvider = oovProviderPlugins.get(oovProviderPlugins.size() - 1);
        }
    }

    @Override
    public MorphemeList tokenize(Tokenizer.SplitMode mode, String text) {
        if (text.isEmpty()) {
            return MorphemeList.EMPTY;
        }
        UTF8InputText input = this.buildInputText(text);
        return this.tokenizeSentence(mode, input);
    }

    @Override
    public Iterable<MorphemeList> tokenizeSentences(Tokenizer.SplitMode mode, String text) {
        int eos;
        if (text.isEmpty()) {
            return Collections.emptyList();
        }
        SentenceSplittingAnalysis analysis = new SentenceSplittingAnalysis(mode, this);
        int length = analysis.tokenizeBuffer(text);
        ArrayList<MorphemeList> result = analysis.result;
        int bos = analysis.bos;
        if (length < 0 && bos != (eos = analysis.input.getText().length())) {
            UTF8InputText slice = analysis.input;
            if (bos != 0) {
                slice = slice.slice(bos, eos);
            }
            result.add(this.tokenizeSentence(mode, slice));
        }
        return result;
    }

    @Override
    public Iterable<MorphemeList> tokenizeSentences(Tokenizer.SplitMode mode, Reader reader) throws IOException {
        CharBuffer buffer = CharBuffer.allocate(4096);
        SentenceSplittingAnalysis analysis = new SentenceSplittingAnalysis(mode, this);
        while (IOTools.readAsMuchAsCan(reader, buffer) > 0) {
            buffer.flip();
            int length = analysis.tokenizeBuffer(buffer);
            if (length >= 0) continue;
            buffer.position(analysis.bosPosition());
            buffer.compact();
        }
        buffer.flip();
        ArrayList<MorphemeList> sentences = analysis.result;
        if (buffer.hasRemaining()) {
            sentences.add(this.tokenizeSentence(mode, this.buildInputText(buffer)));
        }
        return sentences;
    }

    @Override
    public void setDumpOutput(PrintStream output) {
        this.dumpOutput = output;
    }

    @Override
    public String dumpInternalStructures(String text) {
        this.jsonBuilder = Json.createObjectBuilder();
        this.tokenize(Tokenizer.SplitMode.C, text);
        StringWriter stringWriter = new StringWriter();
        try (JsonWriter writer = Json.createWriter((Writer)stringWriter);){
            writer.writeObject(this.jsonBuilder.build());
        }
        return stringWriter.toString();
    }

    UTF8InputText buildInputText(CharSequence text) {
        UTF8InputTextBuilder builder = new UTF8InputTextBuilder(text, this.grammar);
        for (InputTextPlugin plugin : this.inputTextPlugins) {
            plugin.rewrite(builder);
        }
        UTF8InputText input = builder.build();
        if (this.dumpOutput != null) {
            this.dumpOutput.println("=== Input dump:");
            this.dumpOutput.println(input.getText());
        }
        if (this.jsonBuilder != null) {
            this.jsonBuilder.add("inputText", Json.createObjectBuilder().add("originalText", input.getOriginalText()).add("modifiedText", input.getText()));
        }
        return input;
    }

    MorphemeList tokenizeSentence(Tokenizer.SplitMode mode, UTF8InputText input) {
        this.buildLattice(input);
        if (this.dumpOutput != null) {
            this.dumpOutput.println("=== Lattice dump:");
            this.lattice.dump(this.dumpOutput);
        }
        if (this.jsonBuilder != null) {
            this.jsonBuilder.add("lattice", this.lattice.toJson());
        }
        List<LatticeNode> path = this.lattice.getBestPath();
        if (this.dumpOutput != null) {
            this.dumpOutput.println("=== Before rewriting:");
            this.dumpPath(path);
        }
        if (this.jsonBuilder != null) {
            this.jsonBuilder.add("bestPath", this.pathToJson(path, this.lattice));
        }
        for (PathRewritePlugin plugin : this.pathRewritePlugins) {
            plugin.rewrite(input, path, this.lattice);
        }
        this.lattice.clear();
        if (mode != Tokenizer.SplitMode.C) {
            path = this.splitPath(path, mode);
        }
        if (this.dumpOutput != null) {
            this.dumpOutput.println("=== After rewriting:");
            this.dumpPath(path);
            this.dumpOutput.println("===");
        }
        if (this.jsonBuilder != null) {
            this.jsonBuilder.add("rewrittenPath", this.pathToJson(path, this.lattice));
        }
        return new MorphemeList(input, this.grammar, this.lexicon, path, this.allowEmptyMorpheme, mode);
    }

    LatticeImpl buildLattice(UTF8InputText input) {
        byte[] bytes = input.getByteText();
        this.lattice.resize(bytes.length);
        ArrayList<LatticeNodeImpl> unkNodes = new ArrayList<LatticeNodeImpl>(64);
        WordLookup wordLookup = this.lexicon.makeLookup();
        for (int byteBoundary = 0; byteBoundary < bytes.length; ++byteBoundary) {
            if (!input.canBow(byteBoundary) || !this.lattice.hasPreviousNode(byteBoundary)) continue;
            wordLookup.reset(bytes, byteBoundary, bytes.length);
            long wordMask = 0L;
            while (wordLookup.next()) {
                int end = wordLookup.getEndOffset();
                if (end < bytes.length && !input.canBow(end)) continue;
                int numWords = wordLookup.getNumWords();
                int[] wordIds = wordLookup.getWordsIds();
                for (int word = 0; word < numWords; ++word) {
                    int wordId = wordIds[word];
                    LatticeNodeImpl n = new LatticeNodeImpl(this.lexicon, this.lexicon.getLeftId(wordId), this.lexicon.getRightId(wordId), this.lexicon.getCost(wordId), wordId);
                    this.lattice.insert(byteBoundary, end, n);
                    unkNodes.add(n);
                    wordMask = WordMask.addNth(wordMask, end - byteBoundary);
                }
            }
            long wordMaskWithOov = wordMask;
            if (!input.getCharCategoryTypes(byteBoundary).contains((Object)CategoryType.NOOOVBOW)) {
                for (OovProviderPlugin plugin : this.oovProviderPlugins) {
                    wordMaskWithOov = this.provideOovs(plugin, input, unkNodes, byteBoundary, wordMaskWithOov);
                }
            }
            if (wordMaskWithOov == 0L && this.defaultOovProvider != null) {
                wordMaskWithOov = this.provideOovs(this.defaultOovProvider, input, unkNodes, byteBoundary, wordMaskWithOov);
            }
            if (wordMaskWithOov != 0L) continue;
            throw new IllegalStateException("failed to found any morpheme candidate at boundary " + byteBoundary);
        }
        this.lattice.connectEosNode();
        return this.lattice;
    }

    private long provideOovs(OovProviderPlugin plugin, UTF8InputText input, ArrayList<LatticeNodeImpl> unkNodes, int boundary, long wordMask) {
        int initialSize = unkNodes.size();
        int created = plugin.getOOV(input, boundary, wordMask, unkNodes);
        if (created == 0) {
            return wordMask;
        }
        for (int i = initialSize; i < initialSize + created; ++i) {
            LatticeNodeImpl node = unkNodes.get(i);
            this.lattice.insert(node.getBegin(), node.getEnd(), node);
            wordMask = WordMask.addNth(wordMask, node.getEnd() - node.getBegin());
        }
        return wordMask;
    }

    private List<LatticeNode> splitPath(List<LatticeNode> path, Tokenizer.SplitMode mode) {
        ArrayList<LatticeNode> newPath = new ArrayList<LatticeNode>();
        for (LatticeNode node : path) {
            LatticeNodeImpl nodeImpl = (LatticeNodeImpl)node;
            nodeImpl.appendSplitsTo(newPath, mode);
        }
        return newPath;
    }

    void dumpPath(List<LatticeNode> path) {
        int i = 0;
        for (LatticeNode node : path) {
            this.dumpOutput.printf("%d: %s\n", i, node.toString());
            ++i;
        }
    }

    JsonArrayBuilder pathToJson(List<LatticeNode> path, LatticeImpl lattice) {
        JsonArrayBuilder builder = Json.createArrayBuilder();
        for (LatticeNode node : path) {
            builder.add(lattice.nodeToJson((LatticeNodeImpl)node));
        }
        return builder;
    }

    void disableEmptyMorpheme() {
        this.allowEmptyMorpheme = false;
    }
}

