/*
 * Decompiled with CFR 0.152.
 */
package com.google.refine.clustering.binning;

import com.google.refine.clustering.binning.FingerprintKeyer;
import java.util.TreeSet;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;

public class NGramFingerprintKeyer
extends FingerprintKeyer {
    static final Pattern ctrlspace = Pattern.compile("[\\p{Cntrl}\\p{Space}]", 256);

    @Override
    public String key(String s, Object ... o) {
        int ngram_size = 2;
        if (o != null && o.length > 0 && o[0] instanceof Number) {
            ngram_size = (Integer)o[0];
        }
        s = this.normalize(s, true);
        s = ctrlspace.matcher(s).replaceAll("");
        return this.sorted_ngrams(s, ngram_size).collect(Collectors.joining());
    }

    protected Stream<String> sorted_ngrams(String s, int size) {
        return IntStream.rangeClosed(0, s.length() - size).mapToObj(i -> s.substring(i, i + size)).sorted().distinct();
    }

    @Deprecated
    protected TreeSet<String> ngram_split(String s, int size) {
        TreeSet<String> set = new TreeSet<String>();
        int length = s.length();
        int i = 0;
        while (i + size <= length) {
            set.add(s.substring(i, i + size));
            ++i;
        }
        return set;
    }
}

