/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.tokenizers.ca;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.languagetool.tagging.ca.CatalanTagger;
import org.languagetool.tokenizers.Tokenizer;

public class CatalanWordTokenizer
implements Tokenizer {
    private static final String PF = "('en|'hi|'ho|'l|'ls|'m|'n|'ns|'s|'t|-el|-els|-em|-en|-ens|-hi|-ho|-l|-la|-les|-li|-lo|-los|-m|-me|-n|-ne|-nos|-s|-se|-t|-te|-us|-vos)";
    private int maxPatterns = 11;
    private Pattern[] patterns = new Pattern[this.maxPatterns];
    private CatalanTagger tagger = new CatalanTagger();

    public CatalanWordTokenizer() {
        this.patterns[0] = Pattern.compile("^([lnmtsd]')([^'\\-]*)$", 66);
        this.patterns[1] = Pattern.compile("^(qui-sap-lo|qui-sap-la|qui-sap-los|qui-sap-les)$", 66);
        this.patterns[2] = Pattern.compile("^([lnmtsd]')(.{2,})('en|'hi|'ho|'l|'ls|'m|'n|'ns|'s|'t|-el|-els|-em|-en|-ens|-hi|-ho|-l|-la|-les|-li|-lo|-los|-m|-me|-n|-ne|-nos|-s|-se|-t|-te|-us|-vos)('en|'hi|'ho|'l|'ls|'m|'n|'ns|'s|'t|-el|-els|-em|-en|-ens|-hi|-ho|-l|-la|-les|-li|-lo|-los|-m|-me|-n|-ne|-nos|-s|-se|-t|-te|-us|-vos)('en|'hi|'ho|'l|'ls|'m|'n|'ns|'s|'t|-el|-els|-em|-en|-ens|-hi|-ho|-l|-la|-les|-li|-lo|-los|-m|-me|-n|-ne|-nos|-s|-se|-t|-te|-us|-vos)$", 66);
        this.patterns[3] = Pattern.compile("^(.{2,})('en|'hi|'ho|'l|'ls|'m|'n|'ns|'s|'t|-el|-els|-em|-en|-ens|-hi|-ho|-l|-la|-les|-li|-lo|-los|-m|-me|-n|-ne|-nos|-s|-se|-t|-te|-us|-vos)('en|'hi|'ho|'l|'ls|'m|'n|'ns|'s|'t|-el|-els|-em|-en|-ens|-hi|-ho|-l|-la|-les|-li|-lo|-los|-m|-me|-n|-ne|-nos|-s|-se|-t|-te|-us|-vos)('en|'hi|'ho|'l|'ls|'m|'n|'ns|'s|'t|-el|-els|-em|-en|-ens|-hi|-ho|-l|-la|-les|-li|-lo|-los|-m|-me|-n|-ne|-nos|-s|-se|-t|-te|-us|-vos)$", 66);
        this.patterns[4] = Pattern.compile("^([lnmtsd]')(.{2,})('en|'hi|'ho|'l|'ls|'m|'n|'ns|'s|'t|-el|-els|-em|-en|-ens|-hi|-ho|-l|-la|-les|-li|-lo|-los|-m|-me|-n|-ne|-nos|-s|-se|-t|-te|-us|-vos)('en|'hi|'ho|'l|'ls|'m|'n|'ns|'s|'t|-el|-els|-em|-en|-ens|-hi|-ho|-l|-la|-les|-li|-lo|-los|-m|-me|-n|-ne|-nos|-s|-se|-t|-te|-us|-vos)$", 66);
        this.patterns[5] = Pattern.compile("^(.{2,})('en|'hi|'ho|'l|'ls|'m|'n|'ns|'s|'t|-el|-els|-em|-en|-ens|-hi|-ho|-l|-la|-les|-li|-lo|-los|-m|-me|-n|-ne|-nos|-s|-se|-t|-te|-us|-vos)('en|'hi|'ho|'l|'ls|'m|'n|'ns|'s|'t|-el|-els|-em|-en|-ens|-hi|-ho|-l|-la|-les|-li|-lo|-los|-m|-me|-n|-ne|-nos|-s|-se|-t|-te|-us|-vos)$", 66);
        this.patterns[6] = Pattern.compile("^([lnmtsd]')(.{2,})('en|'hi|'ho|'l|'ls|'m|'n|'ns|'s|'t|-el|-els|-em|-en|-ens|-hi|-ho|-l|-la|-les|-li|-lo|-los|-m|-me|-n|-ne|-nos|-s|-se|-t|-te|-us|-vos)$", 66);
        this.patterns[7] = Pattern.compile("^(.+[^cbfhjkovwyzCBFHJKOVWYZ])('en|'hi|'ho|'l|'ls|'m|'n|'ns|'s|'t|-el|-els|-em|-en|-ens|-hi|-ho|-l|-la|-les|-li|-lo|-los|-m|-me|-n|-ne|-nos|-s|-se|-t|-te|-us|-vos)$", 64);
        this.patterns[8] = Pattern.compile("^([lnmtsd]')(.*)$", 66);
        this.patterns[9] = Pattern.compile("^(a|de|pe)(ls?)$", 66);
        this.patterns[10] = Pattern.compile("^(ca)(n)$", 66);
    }

    public List<String> tokenize(String text) {
        ArrayList<String> l = new ArrayList<String>();
        StringTokenizer st = new StringTokenizer(text.replaceAll("([aeiou\u00e0\u00e9\u00e8\u00ed\u00f3\u00f2\u00fa\u00ef\u00fc])l[.\u2022-]l([aeiou\u00e0\u00e9\u00e8\u00ed\u00f3\u00f2\u00fa\u00ef\u00fc])", "$1##ELA_GEMINADA##$2").replaceAll("([\\p{L}])['\u2019]([\\p{L}])", "$1##CA_APOS##$2").replaceAll("([dlDL])['\u2019](1[\\s\\.,])", "$1##CA_APOS##$2").replaceAll("([\\p{L}])-([\\p{L}])-([\\p{L}])", "$1##CA_HYPHEN##$2##CA_HYPHEN##$3").replaceAll("([\\p{L}])-([\\p{L}\\d])", "$1##CA_HYPHEN##$2").replaceAll("([\\d])\\.([\\d])", "$1##CA_DECIMALPOINT##$2").replaceAll("([\\d]),([\\d])", "$1##CA_DECIMALCOMMA##$2").replaceAll("([\\d]) ([\\d])", "$1##CA_SPACE##$2"), " \u00a0\u115f\u1160\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u200b\u200c\u200d\u200e\u200f\u2013\u2014\u2015\u2028\u2029\u202a\u202b\u202c\u202d\u202e\u202f\u205f\u2060\u2061\u2062\u2063\u206a\u206b\u206c\u206d\u206e\u206f\u3000\u3164\ufeff\uffa0\ufff9\ufffa\ufffb,.;()[]{}<>!?:/\\\"'\u00ab\u00bb\u201e\u201d\u201c\u2018\u2019`\u00b4\u2026\u00bf\u00a1\t\n\r-", true);
        while (st.hasMoreElements()) {
            String s = st.nextToken().replaceAll("##CA_APOS##", "'").replaceAll("##CA_HYPHEN##", "-").replaceAll("##CA_DECIMALPOINT##", ".").replaceAll("##CA_DECIMALCOMMA##", ",").replaceAll("##CA_SPACE##", " ").replaceAll("##ELA_GEMINADA##", "l.l");
            Matcher matcher = null;
            boolean matchFound = false;
            for (int j = 0; j < this.maxPatterns && !matchFound; ++j) {
                matcher = this.patterns[j].matcher(s);
                matchFound = matcher.find();
            }
            if (matchFound) {
                for (int i = 1; i <= matcher.groupCount(); ++i) {
                    String groupStr = matcher.group(i);
                    l.addAll(this.wordsToAdd(groupStr));
                }
                continue;
            }
            l.addAll(this.wordsToAdd(s));
        }
        return l;
    }

    private List<String> wordsToAdd(String s) {
        ArrayList<String> l = new ArrayList<String>();
        if (!s.contains("-")) {
            l.add(s);
        } else {
            try {
                if (this.tagger.existsWord(s)) {
                    l.add(s);
                } else {
                    StringTokenizer st2 = new StringTokenizer(s, "-", true);
                    while (st2.hasMoreElements()) {
                        l.add(st2.nextToken());
                    }
                }
            }
            catch (IOException e) {
                e.printStackTrace();
            }
        }
        return l;
    }
}

