/*
 * Decompiled with CFR 0.152.
 */
package com.knuddels.jtokkit;

import com.knuddels.jtokkit.ByteArrayList;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.function.Predicate;

class Cl100kParser {
    private static final String SDTM = "sdtmSDTM\u017f";
    private static final String SIMPLE_WHITESPACES = "\t\n\u000b\f\r";
    private static final int[] REMAINING_WHITESPACES = "\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000".codePoints().sorted().toArray();

    Cl100kParser() {
    }

    static void split(String input2, Predicate<ByteArrayList> fragmentConsumer) {
        assert (Cl100kParser.isValidUTF8(input2)) : "Input is not UTF-8: " + input2;
        ByteArrayList utf8Bytes = new ByteArrayList();
        boolean finished = false;
        int endIndex = 0;
        while (endIndex < input2.length() && !finished) {
            int finalEndIndex;
            int c1;
            int startIndex = endIndex;
            int c0 = input2.codePointAt(startIndex);
            int cc0 = Character.charCount(c0);
            int nextIndex = startIndex + cc0;
            int n = c1 = nextIndex < input2.length() ? input2.codePointAt(nextIndex) : -1;
            if (c0 == 39 && c1 > 0) {
                if (Cl100kParser.isShortContraction(c1)) {
                    finished = fragmentConsumer.test(Cl100kParser.addUtf8Bytes(input2, startIndex, endIndex += 2, utf8Bytes));
                    continue;
                }
                if (startIndex + 2 < input2.length() && Cl100kParser.isLongContraction(c1, input2.codePointAt(startIndex + 2))) {
                    finished = fragmentConsumer.test(Cl100kParser.addUtf8Bytes(input2, startIndex, endIndex += 3, utf8Bytes));
                    continue;
                }
            }
            int cc1 = Character.charCount(c1);
            if (Cl100kParser.isNotNewlineOrLetterOrNumeric(c0) && Cl100kParser.isLetter(c1) || Cl100kParser.isLetter(c0)) {
                endIndex += cc0;
                if (Cl100kParser.isLetter(c1)) {
                    endIndex += cc1;
                    while (endIndex < input2.length() && Cl100kParser.isLetter(c0 = input2.codePointAt(endIndex))) {
                        endIndex += Character.charCount(c0);
                    }
                }
                finished = fragmentConsumer.test(Cl100kParser.addUtf8Bytes(input2, startIndex, endIndex, utf8Bytes));
                continue;
            }
            if (Cl100kParser.isNumeric(c0)) {
                endIndex += cc0;
                if (Cl100kParser.isNumeric(c1) && (endIndex += cc1) < input2.length() && Cl100kParser.isNumeric(c0 = input2.codePointAt(endIndex))) {
                    endIndex += Character.charCount(c0);
                }
                finished = fragmentConsumer.test(Cl100kParser.addUtf8Bytes(input2, startIndex, endIndex, utf8Bytes));
                continue;
            }
            if (Cl100kParser.isNotWhitespaceOrLetterOrNumeric(c0) || c0 == 32 && Cl100kParser.isNotWhitespaceOrLetterOrNumeric(c1)) {
                if ((endIndex += cc0) < input2.length() && Cl100kParser.isNotWhitespaceOrLetterOrNumeric(c1)) {
                    endIndex += cc1;
                    while (endIndex < input2.length() && Cl100kParser.isNotWhitespaceOrLetterOrNumeric(c0 = input2.codePointAt(endIndex))) {
                        endIndex += Character.charCount(c0);
                    }
                }
                while (endIndex < input2.length() && Cl100kParser.isNewline(input2.codePointAt(endIndex))) {
                    ++endIndex;
                }
                finished = fragmentConsumer.test(Cl100kParser.addUtf8Bytes(input2, startIndex, endIndex, utf8Bytes));
                continue;
            }
            assert (Cl100kParser.isWhitespace(c0)) : "Invalid character: " + Arrays.toString(Character.toChars(c0));
            int lastNewLineIndex = Cl100kParser.isNewline(c0) ? endIndex : -1;
            endIndex += cc0;
            if (Cl100kParser.isWhitespace(c1)) {
                lastNewLineIndex = Cl100kParser.isNewline(c1) ? endIndex : lastNewLineIndex;
                endIndex += cc1;
                while (endIndex < input2.length() && Cl100kParser.isWhitespace(c0 = input2.codePointAt(endIndex))) {
                    lastNewLineIndex = Cl100kParser.isNewline(c0) ? endIndex : lastNewLineIndex;
                    endIndex += Character.charCount(c0);
                }
            }
            if (lastNewLineIndex > -1 && (endIndex = lastNewLineIndex + 1) < (finalEndIndex = endIndex)) {
                assert (startIndex < endIndex);
                finished = fragmentConsumer.test(Cl100kParser.addUtf8Bytes(input2, startIndex, endIndex, utf8Bytes));
                startIndex = endIndex;
                endIndex = finalEndIndex;
            }
            if (finished) continue;
            if (lastNewLineIndex + 1 < endIndex && !Cl100kParser.isWhitespace(c0)) {
                --endIndex;
            }
            if (startIndex >= endIndex) continue;
            finished = fragmentConsumer.test(Cl100kParser.addUtf8Bytes(input2, startIndex, endIndex, utf8Bytes));
        }
    }

    static boolean isShortContraction(int ch) {
        return SDTM.indexOf(ch) >= 0;
    }

    static boolean isLongContraction(int ch1, int ch2) {
        if (ch1 == 108 && ch2 == 108 || ch1 == 118 && ch2 == 101 || ch1 == 114 && ch2 == 101) {
            return true;
        }
        int lch1 = Character.toUpperCase(ch1);
        int lch2 = Character.toUpperCase(ch2);
        return lch1 == 76 && lch2 == 76 || lch1 == 86 && lch2 == 69 || lch1 == 82 && lch2 == 69;
    }

    static boolean isValidUTF8(String input2) {
        return StandardCharsets.UTF_8.newEncoder().canEncode(input2);
    }

    static boolean isLetter(int ch) {
        if (ch < 170) {
            return ch >= 97 && ch <= 122 || ch >= 65 && ch <= 90;
        }
        if (ch <= 205743) {
            switch (Character.getType(ch)) {
                case 1: 
                case 2: 
                case 3: 
                case 4: 
                case 5: {
                    return true;
                }
            }
        }
        return false;
    }

    static boolean isNumeric(int ch) {
        if (ch < 178) {
            return ch >= 48 && ch <= 57;
        }
        if (ch <= 130041) {
            switch (Character.getType(ch)) {
                case 9: 
                case 10: 
                case 11: {
                    return true;
                }
            }
        }
        return false;
    }

    static boolean isLetterOrNumeric(int ch) {
        if (ch < 170) {
            return ch >= 97 && ch <= 122 || ch >= 65 && ch <= 90 || ch >= 48 && ch <= 57;
        }
        if (ch <= 205743) {
            switch (Character.getType(ch)) {
                case 1: 
                case 2: 
                case 3: 
                case 4: 
                case 5: 
                case 9: 
                case 10: 
                case 11: {
                    return true;
                }
            }
        }
        return false;
    }

    static boolean isWhitespace(int ch) {
        if (ch <= 13) {
            return SIMPLE_WHITESPACES.indexOf(ch) >= 0;
        }
        if (ch < 133) {
            return ch == 32;
        }
        return ch == 133 || ch == 160 || ch >= 5760 && ch <= 12288 && Arrays.binarySearch(REMAINING_WHITESPACES, ch) >= 0;
    }

    static boolean isNewline(int ch) {
        return ch == 13 || ch == 10;
    }

    static boolean isNotWhitespaceOrLetterOrNumeric(int ch) {
        if (ch < 48) {
            return ch >= 0 && ch != 32 && (ch > 13 || ch < 9);
        }
        return !Cl100kParser.isLetterOrNumeric(ch) && !Cl100kParser.isWhitespace(ch);
    }

    static boolean isNotNewlineOrLetterOrNumeric(int ch) {
        if (ch < 48) {
            return ch >= 0 && (ch == 32 || !Cl100kParser.isNewline(ch));
        }
        return !Cl100kParser.isLetterOrNumeric(ch);
    }

    static ByteArrayList addUtf8Bytes(String input2, int start, int end, ByteArrayList dst) {
        dst.clear();
        for (int i = start; i < end; ++i) {
            int cp = input2.codePointAt(i);
            if (cp < 128) {
                dst.add((byte)cp);
                continue;
            }
            if (cp < 2048) {
                dst.add((byte)(0xC0 | cp >> 6));
                dst.add((byte)(0x80 | cp & 0x3F));
                continue;
            }
            if (cp < 65536) {
                dst.add((byte)(0xE0 | cp >> 12));
                dst.add((byte)(0x80 | cp >> 6 & 0x3F));
                dst.add((byte)(0x80 | cp & 0x3F));
                continue;
            }
            assert (cp < 0x110000) : "Invalid code point: " + cp;
            dst.add((byte)(0xF0 | cp >> 18));
            dst.add((byte)(0x80 | cp >> 12 & 0x3F));
            dst.add((byte)(0x80 | cp >> 6 & 0x3F));
            dst.add((byte)(0x80 | cp & 0x3F));
            ++i;
        }
        return dst;
    }
}

