/*
 * Decompiled with CFR 0.152.
 */
package com.yahoo.language.simple;

import com.google.common.base.Optional;
import com.optimaize.langdetect.LanguageDetector;
import com.optimaize.langdetect.LanguageDetectorBuilder;
import com.optimaize.langdetect.i18n.LdLocale;
import com.optimaize.langdetect.ngram.NgramExtractor;
import com.optimaize.langdetect.ngram.NgramExtractors;
import com.optimaize.langdetect.profiles.LanguageProfileReader;
import com.optimaize.langdetect.text.CommonTextObjectFactories;
import com.optimaize.langdetect.text.TextObject;
import com.optimaize.langdetect.text.TextObjectFactory;
import com.yahoo.language.Language;
import com.yahoo.language.detect.Detection;
import com.yahoo.language.detect.Detector;
import com.yahoo.language.detect.Hint;
import com.yahoo.language.simple.SimpleLinguisticsConfig;
import com.yahoo.text.Utf8;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.List;
import java.util.Locale;

public class SimpleDetector
implements Detector {
    private static TextObjectFactory textObjectFactory;
    private static LanguageDetector languageDetector;
    private final boolean enableOptimaize;

    public SimpleDetector() {
        this.enableOptimaize = true;
    }

    public SimpleDetector(SimpleLinguisticsConfig.Detector detector) {
        this.enableOptimaize = detector.enableOptimaize();
    }

    @Override
    public Detection detect(byte[] input, int offset, int length, Hint hint) {
        return new Detection(this.guessLanguage(input, offset, length), this.guessEncoding(input), false);
    }

    @Override
    public Detection detect(ByteBuffer input, Hint hint) {
        byte[] buf = new byte[input.remaining()];
        input.get(buf, 0, buf.length);
        return this.detect(buf, 0, buf.length, hint);
    }

    @Override
    public Detection detect(String input, Hint hint) {
        return new Detection(this.guessLanguage(input), Utf8.getCharset().name(), false);
    }

    public Language guessLanguage(byte[] buf, int offset, int length) {
        return this.guessLanguage(Utf8.toString((byte[])buf, (int)offset, (int)length));
    }

    public Language guessLanguage(String input) {
        if (input == null || input.length() == 0) {
            return Language.UNKNOWN;
        }
        Language soFar = Language.UNKNOWN;
        for (int i = 0; i < input.length(); ++i) {
            char c = input.charAt(i);
            Character.UnicodeBlock block = Character.UnicodeBlock.of(c);
            if (c >= '\u3200' && c < '\u3220' || c >= '\u3260' && c < '\u3280' || c >= '\uffa0' && c < '\uffe0' || c == '\u302e' || c == '\u302f' || block == Character.UnicodeBlock.HANGUL_SYLLABLES || block == Character.UnicodeBlock.HANGUL_JAMO || block == Character.UnicodeBlock.HANGUL_COMPATIBILITY_JAMO) {
                return Language.KOREAN;
            }
            if ('\u31f0' <= c && c <= '\u31ff') {
                return Language.JAPANESE;
            }
            if ('\u31f0' <= c && c <= '\u31ff' || block == Character.UnicodeBlock.HIRAGANA || block == Character.UnicodeBlock.KATAKANA || block == Character.UnicodeBlock.KANBUN) {
                return Language.JAPANESE;
            }
            if (block == Character.UnicodeBlock.CJK_COMPATIBILITY || block == Character.UnicodeBlock.CJK_COMPATIBILITY_FORMS || block == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS || block == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT || block == Character.UnicodeBlock.CJK_RADICALS_SUPPLEMENT || block == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION || block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS || block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A || block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B) {
                soFar = Language.CHINESE_TRADITIONAL;
            }
            if (block == Character.UnicodeBlock.BOPOMOFO || block == Character.UnicodeBlock.BOPOMOFO_EXTENDED) {
                return Language.CHINESE_TRADITIONAL;
            }
            if (block != Character.UnicodeBlock.THAI) continue;
            return Language.THAI;
        }
        if (this.enableOptimaize && Language.UNKNOWN.equals((Object)soFar)) {
            return SimpleDetector.detectLangOptimaize(input);
        }
        return soFar;
    }

    private static Language detectLangOptimaize(String input) {
        if (input == null || input.length() == 0) {
            return Language.UNKNOWN;
        }
        TextObject textObject = textObjectFactory.forText((CharSequence)input);
        Optional lang = languageDetector.detect((CharSequence)textObject);
        if (lang.isPresent()) {
            String language = ((LdLocale)lang.get()).getLanguage();
            return Language.fromLocale(new Locale(language));
        }
        return Language.UNKNOWN;
    }

    private boolean isTrailingOctet(byte i) {
        return (i >>> 6 & 3) == 2;
    }

    private int isLeadingFor(byte c) {
        int i = c & 0xFF;
        if ((i & 0x80) == 0) {
            return 0;
        }
        if (i >>> 5 == 6) {
            return 1;
        }
        if (i >>> 4 == 14) {
            return 2;
        }
        if (i >>> 3 == 30) {
            return 3;
        }
        if (i >>> 2 == 62) {
            return 4;
        }
        if (i >>> 1 == 126) {
            return 5;
        }
        return -1;
    }

    /*
     * Enabled aggressive block sorting
     */
    public String guessEncoding(byte[] input) {
        boolean isUtf8 = true;
        boolean hasHighs = false;
        block8: for (int i = 0; i < input.length; ++i) {
            int l = this.isLeadingFor(input[i]);
            if (l < 0) return "ISO-8859-1";
            if (i + l >= input.length) {
                return "ISO-8859-1";
            }
            switch (l) {
                case 0: {
                    break;
                }
                case 5: {
                    isUtf8 = this.isTrailingOctet(input[++i]);
                }
                case 4: {
                    isUtf8 &= this.isTrailingOctet(input[++i]);
                }
                case 3: {
                    isUtf8 &= this.isTrailingOctet(input[++i]);
                }
                case 2: {
                    isUtf8 &= this.isTrailingOctet(input[++i]);
                }
                case 1: {
                    hasHighs = true;
                    if (!(isUtf8 &= this.isTrailingOctet(input[++i]))) break block8;
                }
            }
        }
        if (hasHighs && isUtf8) {
            return Utf8.getCharset().name();
        }
        if (hasHighs) return "ISO-8859-1";
        return "US-ASCII";
    }

    static {
        List languageProfiles;
        try {
            languageProfiles = new LanguageProfileReader().readAllBuiltIn();
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
        languageDetector = LanguageDetectorBuilder.create((NgramExtractor)NgramExtractors.standard()).withProfiles((Iterable)languageProfiles).build();
        textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText();
    }
}

