/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.tokenizers.es;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.languagetool.JLanguageTool;
import org.languagetool.rules.spelling.morfologik.MorfologikSpeller;
import org.languagetool.tokenizers.WordTokenizer;

public class SpanishWordTokenizer
extends WordTokenizer {
    private static final String DICT_FILENAME = "/es/es-ES.dict";
    protected MorfologikSpeller speller;
    private static final Pattern DECIMAL_POINT = Pattern.compile("([\\d])\\.([\\d])", 66);
    private static final Pattern DECIMAL_COMMA = Pattern.compile("([\\d]),([\\d])", 66);

    public SpanishWordTokenizer() {
        if (this.speller == null && JLanguageTool.getDataBroker().resourceExists(DICT_FILENAME)) {
            try {
                this.speller = new MorfologikSpeller(DICT_FILENAME);
            }
            catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    }

    public List<String> tokenize(String text) {
        ArrayList<String> l = new ArrayList<String>();
        String auxText = text;
        Matcher matcher = DECIMAL_POINT.matcher(auxText);
        auxText = matcher.replaceAll("$1\u0001\u0001CA_DECIMALPOINT\u0001\u0001$2");
        matcher = DECIMAL_COMMA.matcher(auxText);
        auxText = matcher.replaceAll("$1\u0001\u0001CA_DECIMALCOMMA\u0001\u0001$2");
        StringTokenizer st = new StringTokenizer(auxText, " \u00a0\u115f\u1160\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u200b\u200c\u200d\u200e\u200f\u2013\u2014\u2015\u2028\u2029\u202a\u202b\u202c\u202d\u202e\u202f\u205f\u2060\u2061\u2062\u2063\u206a\u206b\u206c\u206d\u206e\u206f\u3000\u3164\ufeff\uffa0\ufff9\ufffa\ufffb,.;()[]{}<>!?:=*#\u2217\u00d7+\u00f7/\\\"'\u00ab\u00bb\u201e\u201d\u201c\u2018`\u2019\u2026\u00bf\u00a1\t\n\r", true);
        while (st.hasMoreElements()) {
            String s = st.nextToken().replace("\u0001\u0001CA_DECIMALPOINT\u0001\u0001", ".").replace("\u0001\u0001CA_DECIMALCOMMA\u0001\u0001", ",");
            l.addAll(this.wordsToAdd(s));
        }
        return this.joinEMailsAndUrls(l);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private List<String> wordsToAdd(String s) {
        ArrayList<String> l = new ArrayList<String>();
        SpanishWordTokenizer spanishWordTokenizer = this;
        synchronized (spanishWordTokenizer) {
            if (!s.isEmpty()) {
                if (!s.contains("-")) {
                    l.add(s);
                } else if (!this.speller.isMisspelled(s.replace("\u2019", "'"))) {
                    l.add(s);
                } else if (s.equalsIgnoreCase("mers-cov") || s.equalsIgnoreCase("mcgraw-hill") || s.equalsIgnoreCase("sars-cov-2") || s.equalsIgnoreCase("sars-cov") || s.equalsIgnoreCase("ph-metre") || s.equalsIgnoreCase("ph-metres")) {
                    l.add(s);
                } else {
                    StringTokenizer st2 = new StringTokenizer(s, "-", true);
                    while (st2.hasMoreElements()) {
                        l.add(st2.nextToken());
                    }
                }
            }
            return l;
        }
    }
}

