/*
 * Decompiled with CFR 0.152.
 */
package com.google.appengine.api.search.dev;

import com.google.appengine.api.search.dev.LuceneUtils;
import com.google.apphosting.api.AppEngineInternal;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.text.Normalizer;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LetterTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cjk.CJKTokenizer;
import org.apache.lucene.analysis.miscellaneous.EmptyTokenStream;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;

@AppEngineInternal
public class WordSeparatorAnalyzer
extends Analyzer {
    static final Logger LOG = Logger.getLogger(WordSeparatorAnalyzer.class.getCanonicalName());
    private final boolean detectCjk;
    private static final Pattern DIACRITICIAL_MARKS = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");

    public WordSeparatorAnalyzer(boolean detectCjk) {
        this.detectCjk = detectCjk;
    }

    public WordSeparatorAnalyzer() {
        this(true);
    }

    public TokenStream tokenStream(String fieldName, Reader reader) {
        StringBuilder readerContents = new StringBuilder();
        if (this.detectCjk) {
            boolean isCjk;
            try {
                isCjk = LuceneUtils.isProbablyCjk(reader, readerContents);
            }
            catch (IOException e) {
                LOG.log(Level.SEVERE, "Failed to read stream for tokenization.", e);
                return new EmptyTokenStream();
            }
            reader = new StringReader(readerContents.toString());
            if (isCjk) {
                return new CJKTokenizer(reader);
            }
        }
        WordSeparatorTokenizer tokenStream = new WordSeparatorTokenizer(reader);
        return new StandardFilter((TokenStream)tokenStream);
    }

    public static List<String> tokenList(String tokenizeString) {
        WordSeparatorAnalyzer analyzer = new WordSeparatorAnalyzer();
        TokenStream stream = analyzer.tokenStream("", new StringReader(tokenizeString));
        TermAttribute tokenTerm = (TermAttribute)stream.addAttribute(TermAttribute.class);
        ArrayList<String> tokens = new ArrayList<String>();
        try {
            while (stream.incrementToken()) {
                String term = tokenTerm.term();
                tokens.add(term);
            }
        }
        catch (IOException e) {
            return new ArrayList<String>();
        }
        return tokens;
    }

    public static String normalize(String tokenizeString) {
        StringBuilder builder = new StringBuilder();
        List<String> tokens = WordSeparatorAnalyzer.tokenList(tokenizeString);
        for (int i = 0; i < tokens.size(); ++i) {
            builder.append(tokens.get(i));
            if (i == tokens.size() - 1) continue;
            builder.append(" ");
        }
        return builder.toString();
    }

    public static String removeDiacriticals(String input) {
        return DIACRITICIAL_MARKS.matcher(Normalizer.normalize(input, Normalizer.Form.NFD)).replaceAll("");
    }

    private class WordSeparatorTokenizer
    extends LetterTokenizer {
        public WordSeparatorTokenizer(Reader in) {
            super(in);
        }

        protected char normalize(char c) {
            String cleaned = WordSeparatorAnalyzer.removeDiacriticals(Character.toString(c));
            if (cleaned.isEmpty()) {
                return '\'';
            }
            return Character.toLowerCase(cleaned.charAt(0));
        }

        protected boolean isTokenChar(char c) {
            return !LuceneUtils.WORD_SEPARATORS.contains((Object)new Character(c));
        }
    }
}

