/*
 * Decompiled with CFR 0.152.
 */
package com.aliasi.chunk;

import com.aliasi.chunk.Chunk;
import com.aliasi.chunk.ChunkTagHandlerAdapter2;
import com.aliasi.chunk.Chunking;
import com.aliasi.chunk.HmmChunker;
import com.aliasi.corpus.ObjectHandler;
import com.aliasi.hmm.AbstractHmmEstimator;
import com.aliasi.hmm.HiddenMarkovModel;
import com.aliasi.hmm.HmmDecoder;
import com.aliasi.symbol.SymbolTable;
import com.aliasi.tag.Tagging;
import com.aliasi.tokenizer.Tokenizer;
import com.aliasi.tokenizer.TokenizerFactory;
import com.aliasi.util.AbstractExternalizable;
import com.aliasi.util.Compilable;
import com.aliasi.util.Strings;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class CharLmHmmChunker
extends HmmChunker
implements ObjectHandler<Chunking>,
Compilable {
    private final boolean mValidateTokenizer = false;
    private final AbstractHmmEstimator mHmmEstimator;
    private final TokenizerFactory mTokenizerFactory;
    private final Set<String> mTagSet = new HashSet<String>();
    private final boolean mSmoothTags;
    static final Chunk[] EMPTY_CHUNK_ARRAY = new Chunk[0];

    public CharLmHmmChunker(TokenizerFactory tokenizerFactory, AbstractHmmEstimator hmmEstimator) {
        this(tokenizerFactory, hmmEstimator, false);
    }

    public CharLmHmmChunker(TokenizerFactory tokenizerFactory, AbstractHmmEstimator hmmEstimator, boolean smoothTags) {
        super(tokenizerFactory, new HmmDecoder(hmmEstimator));
        this.mHmmEstimator = hmmEstimator;
        this.mTokenizerFactory = tokenizerFactory;
        this.mSmoothTags = smoothTags;
        this.smoothBoundaries();
    }

    public AbstractHmmEstimator getHmmEstimator() {
        return this.mHmmEstimator;
    }

    @Override
    public TokenizerFactory getTokenizerFactory() {
        return this.mTokenizerFactory;
    }

    public void trainDictionary(CharSequence cSeq, String type) {
        char[] cs = Strings.toCharArray(cSeq);
        Tokenizer tokenizer = this.getTokenizerFactory().tokenizer(cs, 0, cs.length);
        String[] tokens = tokenizer.tokenize();
        if (tokens.length < 1) {
            String msg = "Did not find any tokens in entry.Char sequence=" + cSeq;
            throw new IllegalArgumentException(msg);
        }
        AbstractHmmEstimator estimator = this.getHmmEstimator();
        SymbolTable table = estimator.stateSymbolTable();
        this.smoothBaseTag(type, table, estimator);
        if (tokens.length == 1) {
            estimator.trainEmit("W_" + type, tokens[0]);
            return;
        }
        String initialTag = "B_" + type;
        estimator.trainEmit(initialTag, tokens[0]);
        String prevTag = initialTag;
        int i = 1;
        while (i + 1 < tokens.length) {
            String tag = "M_" + type;
            estimator.trainEmit(tag, tokens[i]);
            estimator.trainTransit(prevTag, tag);
            prevTag = tag;
            ++i;
        }
        String finalTag = "E_" + type;
        estimator.trainEmit(finalTag, tokens[tokens.length - 1]);
        estimator.trainTransit(prevTag, finalTag);
    }

    @Override
    public void handle(Chunking chunking) {
        CharSequence cSeq = chunking.charSequence();
        char[] cs = Strings.toCharArray(cSeq);
        Set<Chunk> chunkSet = chunking.chunkSet();
        Chunk[] chunks = chunkSet.toArray(EMPTY_CHUNK_ARRAY);
        Arrays.sort(chunks, Chunk.TEXT_ORDER_COMPARATOR);
        ArrayList<String> tokenList = new ArrayList<String>();
        ArrayList<String> whiteList = new ArrayList<String>();
        ArrayList<String> tagList = new ArrayList<String>();
        int pos = 0;
        for (Chunk nextChunk : chunks) {
            String type = nextChunk.type();
            int start = nextChunk.start();
            int end = nextChunk.end();
            CharLmHmmChunker.outTag(cs, pos, start, tokenList, whiteList, tagList, this.mTokenizerFactory);
            CharLmHmmChunker.chunkTag(cs, start, end, type, tokenList, whiteList, tagList, this.mTokenizerFactory);
            pos = end;
        }
        CharLmHmmChunker.outTag(cs, pos, cSeq.length(), tokenList, whiteList, tagList, this.mTokenizerFactory);
        String[] toks = tokenList.toArray(Strings.EMPTY_STRING_ARRAY);
        String[] whites = whiteList.toArray(Strings.EMPTY_STRING_ARRAY);
        String[] tags = tagList.toArray(Strings.EMPTY_STRING_ARRAY);
        this.handle(toks, whites, tags);
    }

    void handle(String[] tokens, String[] whitespaces, String[] tags) {
        Tagging<String> tagging = new Tagging<String>(Arrays.asList(tokens), Arrays.asList(CharLmHmmChunker.trainNormalize(tags)));
        this.getHmmEstimator().handle(tagging);
        this.smoothTags(tags);
    }

    @Override
    public void compileTo(ObjectOutput objOut) throws IOException {
        objOut.writeObject(new Externalizer(this));
    }

    public String toString() {
        StringBuilder sb = new StringBuilder();
        TreeSet<String> expandedTagSet = new TreeSet<String>();
        expandedTagSet.add("MM_O");
        expandedTagSet.add("WW_O_BOS");
        expandedTagSet.add("BB_O_BOS");
        expandedTagSet.add("EE_O_BOS");
        for (String tag0 : this.mTagSet) {
            String x = tag0.toString();
            expandedTagSet.add("B_" + x);
            expandedTagSet.add("M_" + x);
            expandedTagSet.add("E_" + x);
            expandedTagSet.add("W_" + x);
            expandedTagSet.add("BB_O_" + x);
            expandedTagSet.add("EE_O_" + x);
            expandedTagSet.add("WW_O_" + x);
        }
        for (String tag0Obj : expandedTagSet) {
            String tag0 = tag0Obj.toString();
            sb.append("\n");
            sb.append("start(" + tag0 + ")=" + this.mHmmEstimator.startLog2Prob(tag0));
            sb.append("\n");
            sb.append("  end(" + tag0 + ")=" + this.mHmmEstimator.endLog2Prob(tag0));
            sb.append("\n");
            for (String tag1Obj : expandedTagSet) {
                String tag1 = tag1Obj.toString();
                sb.append("trans(" + tag0 + "," + tag1 + ")=" + this.mHmmEstimator.transitLog2Prob(tag0, tag1));
                sb.append("\n");
            }
        }
        return sb.toString();
    }

    void smoothBoundaries() {
        AbstractHmmEstimator hmmEstimator = this.getHmmEstimator();
        SymbolTable table = hmmEstimator.stateSymbolTable();
        String bbO = "BB_O_BOS";
        String mmO = "MM_O";
        String eeO = "EE_O_BOS";
        String wwO = "WW_O_BOS";
        table.getOrAddSymbol(bbO);
        table.getOrAddSymbol(mmO);
        table.getOrAddSymbol(eeO);
        table.getOrAddSymbol(wwO);
        hmmEstimator.trainStart(bbO);
        hmmEstimator.trainStart(wwO);
        hmmEstimator.trainEnd(eeO);
        hmmEstimator.trainEnd(wwO);
        hmmEstimator.trainTransit(bbO, mmO);
        hmmEstimator.trainTransit(bbO, eeO);
        hmmEstimator.trainTransit(mmO, mmO);
        hmmEstimator.trainTransit(mmO, eeO);
    }

    void smoothTags(String[] tags) {
        if (!this.mSmoothTags) {
            return;
        }
        AbstractHmmEstimator hmmEstimator = this.getHmmEstimator();
        SymbolTable table = hmmEstimator.stateSymbolTable();
        for (int i = 0; i < tags.length; ++i) {
            this.smoothTag(tags[i], table, hmmEstimator);
        }
    }

    void smoothTag(String tag, SymbolTable table, AbstractHmmEstimator hmmEstimator) {
        this.smoothBaseTag(HmmChunker.baseTag(tag), table, hmmEstimator);
    }

    void smoothBaseTag(String baseTag, SymbolTable table, AbstractHmmEstimator hmmEstimator) {
        if (!this.mTagSet.add(baseTag)) {
            return;
        }
        if ("O".equals(baseTag)) {
            return;
        }
        String b_x = "B_" + baseTag;
        String m_x = "M_" + baseTag;
        String e_x = "E_" + baseTag;
        String w_x = "W_" + baseTag;
        String bb_o_x = "BB_O_" + baseTag;
        String ee_o_x = "EE_O_" + baseTag;
        String ww_o_x = "WW_O_" + baseTag;
        table.getOrAddSymbol(b_x);
        table.getOrAddSymbol(m_x);
        table.getOrAddSymbol(e_x);
        table.getOrAddSymbol(w_x);
        table.getOrAddSymbol(bb_o_x);
        table.getOrAddSymbol(ee_o_x);
        table.getOrAddSymbol(ww_o_x);
        hmmEstimator.trainStart(b_x);
        hmmEstimator.trainTransit(b_x, m_x);
        hmmEstimator.trainTransit(b_x, e_x);
        hmmEstimator.trainTransit(m_x, m_x);
        hmmEstimator.trainTransit(m_x, e_x);
        hmmEstimator.trainEnd(e_x);
        hmmEstimator.trainTransit(e_x, bb_o_x);
        hmmEstimator.trainStart(w_x);
        hmmEstimator.trainEnd(w_x);
        hmmEstimator.trainTransit(w_x, bb_o_x);
        hmmEstimator.trainTransit(bb_o_x, "MM_O");
        hmmEstimator.trainTransit("MM_O", ee_o_x);
        hmmEstimator.trainTransit(ee_o_x, b_x);
        hmmEstimator.trainTransit(ee_o_x, w_x);
        hmmEstimator.trainStart(ww_o_x);
        hmmEstimator.trainTransit(ww_o_x, b_x);
        hmmEstimator.trainTransit(ww_o_x, w_x);
        hmmEstimator.trainTransit(e_x, "WW_O_BOS");
        hmmEstimator.trainTransit(w_x, "WW_O_BOS");
        hmmEstimator.trainTransit(bb_o_x, "EE_O_BOS");
        hmmEstimator.trainTransit("BB_O_BOS", ee_o_x);
        for (String type : this.mTagSet) {
            if ("O".equals(type) || "BOS".equals(type)) continue;
            String bb_o_y = "BB_O_" + type;
            String ww_o_y = "WW_O_" + type;
            String ee_o_y = "EE_O_" + type;
            String b_y = "B_" + type;
            String w_y = "W_" + type;
            String e_y = "E_" + type;
            hmmEstimator.trainTransit(e_x, ww_o_y);
            hmmEstimator.trainTransit(e_x, b_y);
            hmmEstimator.trainTransit(e_x, w_y);
            hmmEstimator.trainTransit(w_x, ww_o_y);
            hmmEstimator.trainTransit(w_x, b_y);
            hmmEstimator.trainTransit(w_x, w_y);
            hmmEstimator.trainTransit(e_y, b_x);
            hmmEstimator.trainTransit(e_y, w_x);
            hmmEstimator.trainTransit(e_y, ww_o_x);
            hmmEstimator.trainTransit(w_y, b_x);
            hmmEstimator.trainTransit(w_y, w_x);
            hmmEstimator.trainTransit(w_y, ww_o_x);
            hmmEstimator.trainTransit(bb_o_x, ee_o_y);
            hmmEstimator.trainTransit(bb_o_y, ee_o_x);
        }
    }

    static void outTag(char[] cs, int start, int end, List<String> tokenList, List<String> whiteList, List<String> tagList, TokenizerFactory factory) {
        String nextToken;
        Tokenizer tokenizer = factory.tokenizer(cs, start, end - start);
        whiteList.add(tokenizer.nextWhitespace());
        while ((nextToken = tokenizer.nextToken()) != null) {
            tokenList.add(nextToken);
            tagList.add(ChunkTagHandlerAdapter2.OUT_TAG);
            whiteList.add(tokenizer.nextWhitespace());
        }
    }

    static void chunkTag(char[] cs, int start, int end, String type, List<String> tokenList, List<String> whiteList, List<String> tagList, TokenizerFactory factory) {
        Tokenizer tokenizer = factory.tokenizer(cs, start, end - start);
        String firstToken = tokenizer.nextToken();
        tokenList.add(firstToken);
        tagList.add(ChunkTagHandlerAdapter2.BEGIN_TAG_PREFIX + type);
        while (true) {
            String nextWhitespace = tokenizer.nextWhitespace();
            String nextToken = tokenizer.nextToken();
            if (nextToken == null) break;
            tokenList.add(nextToken);
            whiteList.add(nextWhitespace);
            tagList.add(ChunkTagHandlerAdapter2.IN_TAG_PREFIX + type);
        }
    }

    public static boolean consistentTokens(String[] toks, String[] whitespaces, TokenizerFactory tokenizerFactory) {
        if (toks.length + 1 != whitespaces.length) {
            return false;
        }
        char[] cs = CharLmHmmChunker.getChars(toks, whitespaces);
        Tokenizer tokenizer = tokenizerFactory.tokenizer(cs, 0, cs.length);
        String nextWhitespace = tokenizer.nextWhitespace();
        if (!whitespaces[0].equals(nextWhitespace)) {
            return false;
        }
        for (int i = 0; i < toks.length; ++i) {
            String token = tokenizer.nextToken();
            if (token == null) {
                return false;
            }
            if (!toks[i].equals(token)) {
                return false;
            }
            nextWhitespace = tokenizer.nextWhitespace();
            if (whitespaces[i + 1].equals(nextWhitespace)) continue;
            return false;
        }
        return true;
    }

    List<String> tokenization(String[] toks, String[] whitespaces) {
        ArrayList<String> tokList = new ArrayList<String>();
        ArrayList whiteList = new ArrayList();
        char[] cs = CharLmHmmChunker.getChars(toks, whitespaces);
        Tokenizer tokenizer = this.mTokenizerFactory.tokenizer(cs, 0, cs.length);
        tokenizer.tokenize(tokList, whiteList);
        return tokList;
    }

    static char[] getChars(String[] toks, String[] whitespaces) {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < toks.length; ++i) {
            sb.append(whitespaces[i]);
            sb.append(toks[i]);
        }
        sb.append(whitespaces[whitespaces.length - 1]);
        return Strings.toCharArray(sb);
    }

    static class Externalizer
    extends AbstractExternalizable {
        private static final long serialVersionUID = 4630707998932521821L;
        final CharLmHmmChunker mChunker;

        public Externalizer() {
            this(null);
        }

        public Externalizer(CharLmHmmChunker chunker) {
            this.mChunker = chunker;
        }

        public Object read(ObjectInput in) throws ClassNotFoundException, IOException {
            TokenizerFactory tokenizerFactory = (TokenizerFactory)in.readObject();
            HiddenMarkovModel hmm = (HiddenMarkovModel)in.readObject();
            HmmDecoder decoder = new HmmDecoder(hmm);
            return new HmmChunker(tokenizerFactory, decoder);
        }

        public void writeExternal(ObjectOutput objOut) throws IOException {
            AbstractExternalizable.compileOrSerialize(this.mChunker.getTokenizerFactory(), objOut);
            AbstractExternalizable.compileOrSerialize(this.mChunker.getHmmEstimator(), objOut);
        }
    }
}

