/*
 * Decompiled with CFR 0.152.
 */
package com.hankcs.hanlp.corpus.dependency.model;

import com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLLoader;
import com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence;
import com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord;
import com.hankcs.hanlp.corpus.io.IOUtil;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.Collection;
import java.util.LinkedList;

public class MaxEntDependencyModelMaker {
    public static boolean makeModel(String corpusLoadPath, String modelSavePath) throws IOException {
        BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(IOUtil.newOutputStream(modelSavePath)));
        LinkedList<CoNLLSentence> sentenceList = CoNLLLoader.loadSentenceList(corpusLoadPath);
        int id = 1;
        for (CoNLLSentence sentence : sentenceList) {
            System.out.printf("%d / %d...", id++, sentenceList.size());
            String[][] edgeArray = sentence.getEdgeArray();
            CoNLLWord[] word = sentence.getWordArrayWithRoot();
            for (int i = 0; i < word.length; ++i) {
                for (int j = 0; j < word.length; ++j) {
                    if (i == j) continue;
                    LinkedList<String> contextList = new LinkedList<String>();
                    contextList.addAll(MaxEntDependencyModelMaker.generateSingleWordContext(word, i, "i"));
                    contextList.addAll(MaxEntDependencyModelMaker.generateSingleWordContext(word, j, "j"));
                    contextList.addAll(MaxEntDependencyModelMaker.generateUniContext(word, i, j));
                    for (String f : contextList) {
                        bw.write(f);
                        bw.write(32);
                    }
                    bw.write("" + edgeArray[i][j]);
                    bw.newLine();
                }
            }
            System.out.println("done.");
        }
        bw.close();
        return true;
    }

    public static Collection<String> generateSingleWordContext(CoNLLWord[] word, int index, String mark) {
        LinkedList<String> context = new LinkedList<String>();
        for (int i = index - 2; i < index + 2 + 1; ++i) {
            CoNLLWord w = i >= 0 && i < word.length ? word[i] : CoNLLWord.NULL;
            context.add(w.NAME + mark + (i - index));
            context.add(w.POSTAG + mark + (i - index));
        }
        return context;
    }

    public static Collection<String> generateUniContext(CoNLLWord[] word, int i, int j) {
        LinkedList<String> context = new LinkedList<String>();
        context.add(word[i].NAME + '\u2192' + word[j].NAME);
        context.add(word[i].POSTAG + '\u2192' + word[j].POSTAG);
        context.add(word[i].NAME + '\u2192' + word[j].NAME + (i - j));
        context.add(word[i].POSTAG + '\u2192' + word[j].POSTAG + (i - j));
        CoNLLWord wordBeforeI = i - 1 >= 0 ? word[i - 1] : CoNLLWord.NULL;
        CoNLLWord wordBeforeJ = j - 1 >= 0 ? word[j - 1] : CoNLLWord.NULL;
        context.add(wordBeforeI.NAME + '@' + word[i].NAME + '\u2192' + word[j].NAME);
        context.add(word[i].NAME + '\u2192' + wordBeforeJ.NAME + '@' + word[j].NAME);
        context.add(wordBeforeI.POSTAG + '@' + word[i].POSTAG + '\u2192' + word[j].POSTAG);
        context.add(word[i].POSTAG + '\u2192' + wordBeforeJ.POSTAG + '@' + word[j].POSTAG);
        return context;
    }

    public static void main(String[] args) throws IOException {
        MaxEntDependencyModelMaker.makeModel("D:\\Doc\\\u8bed\u6599\u5e93\\\u4f9d\u5b58\u5206\u6790\u8bad\u7ec3\u6570\u636e\\THU\\train.conll.fixed.txt", "data/model/dependency/MaxEntTrain.txt");
    }
}

