/*
 * Decompiled with CFR 0.152.
 */
package com.hankcs.hanlp.model.crf;

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.corpus.document.sentence.Sentence;
import com.hankcs.hanlp.corpus.document.sentence.word.Word;
import com.hankcs.hanlp.dictionary.other.CharTable;
import com.hankcs.hanlp.model.crf.CRFTagger;
import com.hankcs.hanlp.model.crf.crfpp.TaggerImpl;
import com.hankcs.hanlp.tokenizer.lexical.Segmenter;
import java.io.BufferedWriter;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;

public class CRFSegmenter
extends CRFTagger
implements Segmenter {
    public CRFSegmenter() throws IOException {
        this(HanLP.Config.CRFCWSModelPath);
    }

    public CRFSegmenter(String modelPath) throws IOException {
        super(modelPath);
    }

    @Override
    protected void convertCorpus(Sentence sentence, BufferedWriter bw) throws IOException {
        for (Word w : sentence.toSimpleWordList()) {
            String word = CharTable.convert(w.value);
            if (word.length() == 1) {
                bw.write(word);
                bw.write(9);
                bw.write(83);
                bw.write(10);
                continue;
            }
            bw.write(word.charAt(0));
            bw.write(9);
            bw.write(66);
            bw.write(10);
            for (int i = 1; i < word.length() - 1; ++i) {
                bw.write(word.charAt(i));
                bw.write(9);
                bw.write(77);
                bw.write(10);
            }
            bw.write(word.charAt(word.length() - 1));
            bw.write(9);
            bw.write(69);
            bw.write(10);
        }
    }

    @Override
    public List<String> segment(String text) {
        LinkedList<String> wordList = new LinkedList<String>();
        this.segment(text, CharTable.convert(text), wordList);
        return wordList;
    }

    @Override
    public void segment(String text, String normalized, List<String> wordList) {
        if (text.isEmpty()) {
            return;
        }
        TaggerImpl tagger = this.createTagger();
        for (int i = 0; i < text.length(); ++i) {
            tagger.add(new String[]{String.valueOf(CharTable.convert(text.charAt(i)))});
        }
        if (!tagger.parse()) {
            return;
        }
        StringBuilder result = new StringBuilder();
        result.append(text.charAt(0));
        for (int i = 1; i < tagger.size(); ++i) {
            char tag = tagger.yname(tagger.y(i)).charAt(0);
            if (tag == 'B' || tag == 'S') {
                wordList.add(result.toString());
                result.setLength(0);
            }
            result.append(text.charAt(i));
        }
        if (result.length() != 0) {
            wordList.add(result.toString());
        }
    }

    @Override
    protected String getDefaultFeatureTemplate() {
        return "# Unigram\nU0:%x[-1,0]\nU1:%x[0,0]\nU2:%x[1,0]\nU3:%x[-1,0]%x[0,0]\nU4:%x[0,0]%x[1,0]\nU5:%x[-1,0]%x[1,0]\n\n# Bigram\nB";
    }
}

