/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.tokenizers.en;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.languagetool.tagging.en.EnglishTagger;
import org.languagetool.tokenizers.WordTokenizer;

public class EnglishWordTokenizer
extends WordTokenizer {
    private final EnglishTagger tagger = new EnglishTagger();
    private final List<Pattern> patternList = Arrays.asList(Pattern.compile("^(fo['\u2019]c['\u2019]sle|rec['\u2019][ds]|OK['\u2019]d|cc['\u2019][ds]|DJ['\u2019][d]|[pd]m['\u2019]d|rsvp['\u2019]d)$", 66), Pattern.compile("^(['\u2019]?)(are|is|were|was|do|does|did|have|has|had|wo|would|ca|could|sha|should|must|ai|ought|might|need|may|am|dare|das|dass|hai|used|use)(n['\u2019]t)$", 66), Pattern.compile("^(.+)(['\u2019]m|['\u2019]re|['\u2019]ll|['\u2019]ve|['\u2019]d|['\u2019]s)(['\u2019-]?)$", 66), Pattern.compile("^(['\u2019]t)(was)$", 66));

    public String getTokenizingCharacters() {
        return super.getTokenizingCharacters() + "\u2013";
    }

    public List<String> tokenize(String text) {
        ArrayList<String> l = new ArrayList<String>();
        String auxText = text;
        auxText = auxText.replaceAll("'", "\u0001\u0001APOSTYPEW\u0001\u0001");
        auxText = auxText.replaceAll("\u2019", "\u0001\u0001APOSTYPOG\u0001\u0001");
        StringTokenizer st = new StringTokenizer(auxText, this.getTokenizingCharacters(), true);
        while (st.hasMoreElements()) {
            String s = st.nextToken().replaceAll("\u0001\u0001APOSTYPEW\u0001\u0001", "'").replaceAll("\u0001\u0001APOSTYPOG\u0001\u0001", "\u2019");
            boolean matchFound = false;
            Matcher matcher = null;
            if (s.contains("'") || s.contains("\u2019")) {
                Pattern pattern;
                Iterator<Pattern> iterator = this.patternList.iterator();
                while (iterator.hasNext() && !(matchFound = (matcher = (pattern = iterator.next()).matcher(s)).find())) {
                }
            }
            if (matchFound) {
                for (int i = 1; i <= matcher.groupCount(); ++i) {
                    String groupStr = matcher.group(i);
                    l.addAll(this.wordsToAdd(groupStr));
                }
                continue;
            }
            l.addAll(this.wordsToAdd(s));
        }
        return this.joinEMailsAndUrls(l);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private List<String> wordsToAdd(String s) {
        ArrayList<String> l = new ArrayList<String>();
        EnglishWordTokenizer englishWordTokenizer = this;
        synchronized (englishWordTokenizer) {
            if (!s.isEmpty()) {
                if (s.startsWith("-")) {
                    l.add("-");
                    l.addAll(this.wordsToAdd(s.substring(1)));
                    return l;
                }
                if (s.endsWith("-")) {
                    l.addAll(this.wordsToAdd(s.substring(0, s.length() - 1)));
                    l.add("-");
                    return l;
                }
                if (!(s.contains("-") || s.contains("'") || s.contains("\u2019"))) {
                    l.add(s);
                } else if (this.tagger.tag(Arrays.asList(s.replace("\u2019", "'"))).get(0).isTagged()) {
                    l.add(s);
                } else if (s.equalsIgnoreCase("mers-cov") || s.equalsIgnoreCase("mcgraw-hill") || s.equalsIgnoreCase("sars-cov-2") || s.equalsIgnoreCase("sars-cov") || s.equalsIgnoreCase("ph-metre") || s.equalsIgnoreCase("ph-metres") || s.equalsIgnoreCase("anti-ivg") || s.equalsIgnoreCase("anti-uv") || s.equalsIgnoreCase("anti-vih") || s.equalsIgnoreCase("al-qaida")) {
                    l.add(s);
                } else {
                    StringTokenizer st2 = new StringTokenizer(s, "\u2019'", true);
                    while (st2.hasMoreElements()) {
                        l.add(st2.nextToken());
                    }
                }
            }
            return l;
        }
    }
}

