/*
 * Decompiled with CFR 0.152.
 */
package com.aliasi.spell;

import com.aliasi.corpus.ObjectHandler;
import com.aliasi.spell.TokenizedDistance;
import com.aliasi.tokenizer.TokenizerFactory;
import com.aliasi.util.Counter;
import com.aliasi.util.ObjectToCounterMap;
import com.aliasi.util.Strings;
import java.util.Collections;
import java.util.Map;
import java.util.Set;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class TfIdfDistance
extends TokenizedDistance
implements ObjectHandler<CharSequence> {
    private int mDocCount = 0;
    private final ObjectToCounterMap<String> mDocFrequency = new ObjectToCounterMap();

    public TfIdfDistance(TokenizerFactory tokenizerFactory) {
        super(tokenizerFactory);
    }

    @Override
    public void handle(CharSequence cSeq) {
        char[] cs = Strings.toCharArray(cSeq);
        for (String token : this.tokenSet(cs, 0, cs.length)) {
            this.mDocFrequency.increment(token);
        }
        ++this.mDocCount;
    }

    @Override
    public double distance(CharSequence cSeq1, CharSequence cSeq2) {
        return 1.0 - this.proximity(cSeq1, cSeq2);
    }

    @Override
    public double proximity(CharSequence cSeq1, CharSequence cSeq2) {
        String term;
        ObjectToCounterMap<String> tf1 = this.termFrequencyVector(cSeq1);
        ObjectToCounterMap<String> tf2 = this.termFrequencyVector(cSeq2);
        double len1 = 0.0;
        double len2 = 0.0;
        double prod = 0.0;
        for (Map.Entry entry : tf1.entrySet()) {
            term = (String)entry.getKey();
            Counter count1 = (Counter)entry.getValue();
            double tfIdf1 = this.tfIdf(term, count1);
            len1 += tfIdf1 * tfIdf1;
            Counter count2 = (Counter)tf2.remove(term);
            if (count2 == null) continue;
            double tfIdf2 = this.tfIdf(term, count2);
            len2 += tfIdf2 * tfIdf2;
            prod += tfIdf1 * tfIdf2;
        }
        for (Map.Entry entry : tf2.entrySet()) {
            term = (String)entry.getKey();
            Counter count2 = (Counter)entry.getValue();
            double tfIdf2 = this.tfIdf(term, count2);
            len2 += tfIdf2 * tfIdf2;
        }
        if (len1 == 0.0) {
            return len2 == 0.0 ? 1.0 : 0.0;
        }
        if (len2 == 0.0) {
            return 0.0;
        }
        double prox = prod / Math.sqrt(len1 * len2);
        return prox < 0.0 ? 0.0 : (prox > 1.0 ? 1.0 : prox);
    }

    public int docFrequency(String term) {
        return this.mDocFrequency.getCount(term);
    }

    public double idf(String term) {
        int df = this.mDocFrequency.getCount(term);
        if (df == 0) {
            return 0.0;
        }
        return Math.log((double)this.mDocCount / (double)df);
    }

    public int numDocuments() {
        return this.mDocCount;
    }

    public int numTerms() {
        return this.mDocFrequency.size();
    }

    public Set<String> termSet() {
        return Collections.unmodifiableSet(this.mDocFrequency.keySet());
    }

    double tfIdf(String term, Counter count) {
        double idf = this.idf(term);
        double tf = count.doubleValue();
        return Math.sqrt(tf * idf);
    }
}

