/*
 * Decompiled with CFR 0.152.
 */
package kr.co.shineware.nlp.komoran.corpus.parser;

import java.util.ArrayList;
import java.util.List;
import kr.co.shineware.nlp.komoran.corpus.parser.model.ProblemAnswerPair;
import kr.co.shineware.nlp.komoran.exception.FileFormatException;
import kr.co.shineware.util.common.model.Pair;

public class CorpusParser {
    private static final String PROBLEM_ANSWER_SPLITER = "\t";
    private static final String ANSWER_SPLITER = " ";
    private static final String WORD_POS_SPLITER = "\\/";
    private static final int CONTENTS_COUNT = 2;

    public ProblemAnswerPair parse(String line) throws FileFormatException {
        String[] problemAnswer = line.split(PROBLEM_ANSWER_SPLITER);
        if (problemAnswer.length != 2) {
            throw new FileFormatException("Corpus Format Error. " + line);
        }
        String problem = problemAnswer[0];
        String answer = problemAnswer[1];
        ArrayList<Pair<String, String>> answerList = new ArrayList<Pair<String, String>>();
        this.parseAnswer(answer, answerList);
        ProblemAnswerPair paPair = new ProblemAnswerPair();
        paPair.setProblem(problem);
        paPair.setAnswer(answer);
        paPair.setAnswerList(answerList);
        return paPair;
    }

    private void parseAnswer(String answer, List<Pair<String, String>> answerList) throws FileFormatException {
        String[] tmp = answer.trim().split(ANSWER_SPLITER);
        String prevWord = "";
        for (int i = 0; i < tmp.length; ++i) {
            String pos;
            String word;
            String token = tmp[i];
            String[] wordPos = token.split(WORD_POS_SPLITER);
            if (wordPos.length == 2) {
                word = wordPos[0];
                pos = wordPos[1];
            } else if (wordPos.length > 2) {
                pos = wordPos[wordPos.length - 1];
                word = token.substring(0, token.length() - pos.length() - 1);
            } else {
                if (wordPos.length == 1) {
                    prevWord = prevWord + token + ANSWER_SPLITER;
                    continue;
                }
                word = "";
                pos = "";
            }
            if (word.trim().length() == 0 || pos.trim().length() == 0) {
                throw new FileFormatException("Corpus Format Error. " + answer);
            }
            word = prevWord + word;
            answerList.add(new Pair<String, String>(word, pos));
            prevWord = "";
        }
    }
}

