/*
 * Decompiled with CFR 0.152.
 */
package com.yahoo.language.simple;

import com.yahoo.language.Language;
import com.yahoo.language.detect.Detection;
import com.yahoo.language.detect.Detector;
import com.yahoo.language.detect.Hint;
import com.yahoo.text.Utf8;
import java.nio.ByteBuffer;

public class SimpleDetector
implements Detector {
    @Override
    public Detection detect(byte[] input, int offset, int length, Hint hint) {
        return new Detection(SimpleDetector.guessLanguage(input, offset, length), this.guessEncoding(input), false);
    }

    @Override
    public Detection detect(ByteBuffer input, Hint hint) {
        byte[] buf = new byte[input.remaining()];
        input.get(buf, 0, buf.length);
        return this.detect(buf, 0, buf.length, hint);
    }

    @Override
    public Detection detect(String input, Hint hint) {
        return new Detection(SimpleDetector.guessLanguage(input), Utf8.getCharset().name(), false);
    }

    public static Language guessLanguage(byte[] buf, int offset, int length) {
        return SimpleDetector.guessLanguage(Utf8.toString((byte[])buf, (int)offset, (int)length));
    }

    public static Language guessLanguage(String input) {
        if (input == null || input.length() == 0) {
            return Language.UNKNOWN;
        }
        Language soFar = Language.UNKNOWN;
        for (int i = 0; i < input.length(); ++i) {
            char c = input.charAt(i);
            Character.UnicodeBlock block = Character.UnicodeBlock.of(c);
            if (c >= '\u3200' && c < '\u3220' || c >= '\u3260' && c < '\u3280' || c >= '\uffa0' && c < '\uffe0' || c == '\u302e' || c == '\u302f' || block == Character.UnicodeBlock.HANGUL_SYLLABLES || block == Character.UnicodeBlock.HANGUL_JAMO || block == Character.UnicodeBlock.HANGUL_COMPATIBILITY_JAMO) {
                return Language.KOREAN;
            }
            if ('\u31f0' <= c && c <= '\u31ff') {
                return Language.JAPANESE;
            }
            if ('\u31f0' <= c && c <= '\u31ff' || block == Character.UnicodeBlock.HIRAGANA || block == Character.UnicodeBlock.KATAKANA || block == Character.UnicodeBlock.KANBUN) {
                return Language.JAPANESE;
            }
            if (block == Character.UnicodeBlock.CJK_COMPATIBILITY || block == Character.UnicodeBlock.CJK_COMPATIBILITY_FORMS || block == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS || block == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT || block == Character.UnicodeBlock.CJK_RADICALS_SUPPLEMENT || block == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION || block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS || block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A || block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B) {
                soFar = Language.CHINESE_TRADITIONAL;
            }
            if (block == Character.UnicodeBlock.BOPOMOFO || block == Character.UnicodeBlock.BOPOMOFO_EXTENDED) {
                return Language.CHINESE_TRADITIONAL;
            }
            if (block != Character.UnicodeBlock.THAI) continue;
            return Language.THAI;
        }
        return soFar;
    }

    private boolean isTrailingOctet(byte i) {
        return (i >>> 6 & 3) == 2;
    }

    private int isLeadingFor(byte c) {
        int i = c & 0xFF;
        if ((i & 0x80) == 0) {
            return 0;
        }
        if (i >>> 5 == 6) {
            return 1;
        }
        if (i >>> 4 == 14) {
            return 2;
        }
        if (i >>> 3 == 30) {
            return 3;
        }
        if (i >>> 2 == 62) {
            return 4;
        }
        if (i >>> 1 == 126) {
            return 5;
        }
        return -1;
    }

    /*
     * Enabled aggressive block sorting
     */
    private String guessEncoding(byte[] input) {
        boolean isUtf8 = true;
        boolean hasHighs = false;
        block8: for (int i = 0; i < input.length; ++i) {
            int l = this.isLeadingFor(input[i]);
            if (l < 0) return "ISO-8859-1";
            if (i + l >= input.length) {
                return "ISO-8859-1";
            }
            switch (l) {
                case 0: {
                    break;
                }
                case 5: {
                    isUtf8 = this.isTrailingOctet(input[++i]);
                }
                case 4: {
                    isUtf8 &= this.isTrailingOctet(input[++i]);
                }
                case 3: {
                    isUtf8 &= this.isTrailingOctet(input[++i]);
                }
                case 2: {
                    isUtf8 &= this.isTrailingOctet(input[++i]);
                }
                case 1: {
                    hasHighs = true;
                    if (!(isUtf8 &= this.isTrailingOctet(input[++i]))) break block8;
                }
            }
        }
        if (hasHighs && isUtf8) {
            return Utf8.getCharset().name();
        }
        if (hasHighs) return "ISO-8859-1";
        return "US-ASCII";
    }
}

