/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.tokenizers.br;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
import org.languagetool.tokenizers.WordTokenizer;

public class BretonWordTokenizer
extends WordTokenizer {
    private static final Pattern REPL_PATTERN_1 = Pattern.compile("([Cc])['\u2019\u2018\u02bc]([Hh])");
    private static final Pattern REPL_PATTERN_2 = Pattern.compile("(\\p{L})['\u2019\u2018\u02bc]");
    private static final Pattern REPL_PATTERN_3 = Pattern.compile("\u0001\u0001BR@APOS\u0001\u0001", 16);

    public List<String> tokenize(String text) {
        String replaced = REPL_PATTERN_1.matcher(text).replaceAll("$1\u0001\u0001BR@APOS\u0001\u0001$2");
        replaced = REPL_PATTERN_2.matcher(replaced).replaceAll("$1\u0001\u0001BR@APOS\u0001\u0001 ");
        List tokenList = super.tokenize(replaced);
        ArrayList<String> tokens = new ArrayList<String>();
        Iterator itr = tokenList.iterator();
        while (itr.hasNext()) {
            String word = REPL_PATTERN_3.matcher((CharSequence)itr.next()).replaceAll("\u2019");
            tokens.add(word);
            if (word.equals("\u2019") || !word.endsWith("\u2019")) continue;
            itr.next();
        }
        return tokens;
    }
}

