/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.rules.ngrams;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.ResourceBundle;
import org.languagetool.AnalyzedSentence;
import org.languagetool.Experimental;
import org.languagetool.Language;
import org.languagetool.languagemodel.LanguageModel;
import org.languagetool.rules.Category;
import org.languagetool.rules.ITSIssueType;
import org.languagetool.rules.Rule;
import org.languagetool.rules.RuleMatch;
import org.languagetool.rules.ngrams.GoogleToken;
import org.languagetool.rules.ngrams.Probability;
import org.languagetool.tokenizers.Tokenizer;

@Experimental
public class NgramProbabilityRule
extends Rule {
    public static final String RULE_ID = "NGRAM_RULE";
    private static final boolean DEBUG = false;
    private final LanguageModel lm;
    private final Language language;
    private double minProbability = 1.0E-15;

    public NgramProbabilityRule(ResourceBundle messages, LanguageModel languageModel, Language language) {
        super(messages);
        this.setCategory(new Category(messages.getString("category_typo")));
        this.setLocQualityIssueType(ITSIssueType.NonConformance);
        this.lm = Objects.requireNonNull(languageModel);
        this.language = Objects.requireNonNull(language);
    }

    @Override
    public String getId() {
        return RULE_ID;
    }

    @Experimental
    public void setMinProbability(double minProbability) {
        this.minProbability = minProbability;
    }

    @Override
    public RuleMatch[] match(AnalyzedSentence sentence) {
        String text = sentence.getText();
        List<GoogleToken> tokens = GoogleToken.getGoogleTokens(text, true, this.getGoogleStyleWordTokenizer());
        ArrayList<RuleMatch> matches = new ArrayList<RuleMatch>();
        GoogleToken prevPrevToken = null;
        GoogleToken prevToken = null;
        int i = 0;
        for (GoogleToken googleToken : tokens) {
            String token = googleToken.token;
            if (prevPrevToken != null && prevToken != null && i < tokens.size() - 1) {
                GoogleToken next = tokens.get(i + 1);
                Probability p = this.lm.getPseudoProbability(Arrays.asList(prevToken.token, token, next.token));
                String ngram = prevToken + " " + token + " " + next.token;
                double prob = p.getProb();
                if (prob < this.minProbability) {
                    String message = "The phrase '" + ngram + "' rarely occurs in the reference corpus (" + p.getOccurrences() + " times)";
                    RuleMatch match = new RuleMatch(this, prevToken.startPos, next.endPos, message);
                    matches.add(match);
                }
            }
            prevPrevToken = prevToken;
            prevToken = googleToken;
            ++i;
        }
        return matches.toArray(new RuleMatch[matches.size()]);
    }

    @Override
    public String getDescription() {
        return "Assume errors for phrases (ngrams) that occur rarely in a reference index";
    }

    @Override
    public void reset() {
    }

    protected Tokenizer getGoogleStyleWordTokenizer() {
        return this.language.getWordTokenizer();
    }

    private void debug(String message, Object ... vars) {
    }
}

