/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.chunking;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import opennlp.tools.chunker.ChunkerME;
import opennlp.tools.chunker.ChunkerModel;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import org.jetbrains.annotations.Nullable;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.chunking.ChunkTag;
import org.languagetool.chunking.ChunkTaggedToken;
import org.languagetool.chunking.Chunker;
import org.languagetool.chunking.EnglishChunkFilter;
import org.languagetool.tools.Tools;

public class EnglishChunker
implements Chunker {
    private static final String TOKENIZER_MODEL = "/en-token.bin";
    private static final String POS_TAGGER_MODEL = "/en-pos-maxent.bin";
    private static final String CHUNKER_MODEL = "/en-chunker.bin";
    private static TokenizerModel tokenModel;
    private static POSModel posModel;
    private static ChunkerModel chunkerModel;
    private final EnglishChunkFilter chunkFilter;

    public EnglishChunker() {
        try {
            if (tokenModel == null) {
                tokenModel = new TokenizerModel(Tools.getStream((String)TOKENIZER_MODEL));
            }
            if (posModel == null) {
                posModel = new POSModel(Tools.getStream((String)POS_TAGGER_MODEL));
            }
            if (chunkerModel == null) {
                chunkerModel = new ChunkerModel(Tools.getStream((String)CHUNKER_MODEL));
            }
            this.chunkFilter = new EnglishChunkFilter();
        }
        catch (IOException e) {
            throw new RuntimeException("Could not initialize English chunker", e);
        }
    }

    public void addChunkTags(List<AnalyzedTokenReadings> tokenReadings) {
        List<ChunkTaggedToken> origChunkTags = this.getChunkTagsForReadings(tokenReadings);
        List<ChunkTaggedToken> chunkTags = this.chunkFilter.filter(origChunkTags);
        this.assignChunksToReadings(chunkTags);
    }

    private List<ChunkTaggedToken> getChunkTagsForReadings(List<AnalyzedTokenReadings> tokenReadings) {
        String sentence = this.getSentence(tokenReadings);
        String[] tokens = this.tokenize(sentence);
        String[] posTags = this.posTag(tokens);
        String[] chunkTags = this.chunk(tokens, posTags);
        if (tokens.length != posTags.length || tokens.length != chunkTags.length) {
            throw new RuntimeException("Length of results must be the same: " + tokens.length + ", " + posTags.length + ", " + chunkTags.length);
        }
        return this.getTokensWithTokenReadings(tokenReadings, tokens, chunkTags);
    }

    String[] tokenize(String sentence) {
        TokenizerME tokenizer = new TokenizerME(tokenModel);
        String cleanString = sentence.replace('\u2019', '\'');
        return tokenizer.tokenize(cleanString);
    }

    private String[] posTag(String[] tokens) {
        POSTaggerME posTagger = new POSTaggerME(posModel);
        return posTagger.tag(tokens);
    }

    private String[] chunk(String[] tokens, String[] posTags) {
        ChunkerME chunker = new ChunkerME(chunkerModel);
        return chunker.chunk(tokens, posTags);
    }

    private List<ChunkTaggedToken> getTokensWithTokenReadings(List<AnalyzedTokenReadings> tokenReadings, String[] tokens, String[] chunkTags) {
        ArrayList<ChunkTaggedToken> result = new ArrayList<ChunkTaggedToken>();
        int i = 0;
        int pos = 0;
        for (String chunkTag : chunkTags) {
            int startPos = pos;
            int endPos = startPos + tokens[i].length();
            AnalyzedTokenReadings readings = this.getAnalyzedTokenReadingsFor(startPos, endPos, tokenReadings);
            result.add(new ChunkTaggedToken(tokens[i], Collections.singletonList(new ChunkTag(chunkTag)), readings));
            pos = endPos;
            ++i;
        }
        return result;
    }

    private void assignChunksToReadings(List<ChunkTaggedToken> chunkTaggedTokens) {
        for (ChunkTaggedToken taggedToken : chunkTaggedTokens) {
            AnalyzedTokenReadings readings = taggedToken.getReadings();
            if (readings == null) continue;
            readings.setChunkTags(taggedToken.getChunkTags());
        }
    }

    private String getSentence(List<AnalyzedTokenReadings> sentenceTokens) {
        StringBuilder sb = new StringBuilder();
        for (AnalyzedTokenReadings token : sentenceTokens) {
            sb.append(token.getToken());
        }
        return sb.toString();
    }

    @Nullable
    private AnalyzedTokenReadings getAnalyzedTokenReadingsFor(int startPos, int endPos, List<AnalyzedTokenReadings> tokenReadings) {
        int pos = 0;
        for (AnalyzedTokenReadings tokenReading : tokenReadings) {
            String token = tokenReading.getToken();
            if (token.trim().isEmpty()) continue;
            int tokenStart = pos;
            int tokenEnd = pos + token.length();
            if (tokenStart == startPos && tokenEnd == endPos) {
                return tokenReading;
            }
            pos = tokenEnd;
        }
        return null;
    }
}

