/*
 * Decompiled with CFR 0.152.
 */
package gate.creole.tokeniser;

import gate.AnnotationSet;
import gate.Factory;
import gate.FeatureMap;
import gate.Resource;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ExecutionException;
import gate.creole.ExecutionInterruptedException;
import gate.creole.ResourceInstantiationException;
import gate.creole.tokeniser.DFSMState;
import gate.creole.tokeniser.FSMState;
import gate.creole.tokeniser.InvalidRuleException;
import gate.creole.tokeniser.TokeniserException;
import gate.creole.tokeniser.UnicodeType;
import gate.util.BomStrippingInputStreamReader;
import gate.util.Err;
import gate.util.GateRuntimeException;
import gate.util.InvalidOffsetException;
import gate.util.LuckyException;
import java.io.BufferedReader;
import java.io.IOException;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.net.URL;
import java.util.AbstractCollection;
import java.util.AbstractSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.Vector;

public class SimpleTokeniser
extends AbstractLanguageAnalyser {
    public static final String SIMP_TOK_DOCUMENT_PARAMETER_NAME = "document";
    public static final String SIMP_TOK_ANNOT_SET_PARAMETER_NAME = "annotationSetName";
    public static final String SIMP_TOK_RULES_URL_PARAMETER_NAME = "rulesURL";
    public static final String SIMP_TOK_ENCODING_PARAMETER_NAME = "encoding";
    private static final boolean DEBUG = false;
    protected FeatureMap features = null;
    protected String annotationSetName;
    protected FSMState initialState;
    protected Set fsmStates = new HashSet();
    protected DFSMState dInitialState;
    protected Set dfsmStates = new HashSet();
    static String LHStoRHS;
    static Set ignoreTokens;
    public static Map typeIds;
    public static int maxTypeId;
    public static String[] typeMnemonics;
    public static Map stringTypeIds;
    protected static String defaultResourceName;
    private String rulesResourceName;
    private URL rulesURL;
    private String encoding;
    private transient Vector progressListeners;
    protected transient Map newStates = new HashMap();

    public Resource init() throws ResourceInstantiationException {
        try {
            if (this.rulesURL == null) {
                throw new ResourceInstantiationException("No URL provided for the rules!");
            }
            BomStrippingInputStreamReader bomStrippingInputStreamReader = new BomStrippingInputStreamReader(this.rulesURL.openStream(), this.encoding);
            this.initialState = new FSMState(this);
            BufferedReader bufferedReader = new BufferedReader(bomStrippingInputStreamReader);
            String string = bufferedReader.readLine();
            StringBuffer stringBuffer = new StringBuffer(1024);
            while (string != null) {
                if (string.endsWith("\\")) {
                    stringBuffer.append(string.substring(0, string.length() - 1));
                } else {
                    stringBuffer.append(string);
                    this.parseRule(stringBuffer.toString());
                    stringBuffer.delete(0, stringBuffer.length());
                }
                string = bufferedReader.readLine();
            }
            this.eliminateVoidTransitions();
        }
        catch (IOException iOException) {
            throw new ResourceInstantiationException(iOException);
        }
        catch (TokeniserException tokeniserException) {
            throw new ResourceInstantiationException(tokeniserException);
        }
        return this;
    }

    public void reset() {
        this.document = null;
    }

    void parseRule(String string) throws TokeniserException {
        if (string.startsWith("#")) {
            return;
        }
        if (string.startsWith("//")) {
            return;
        }
        StringTokenizer stringTokenizer = new StringTokenizer(string, "()+*|\" \t\f>", true);
        FSMState fSMState = new FSMState(this);
        this.initialState.put(null, fSMState);
        FSMState fSMState2 = this.parseLHS(fSMState, stringTokenizer, LHStoRHS);
        String string2 = "";
        if (stringTokenizer.hasMoreTokens()) {
            string2 = stringTokenizer.nextToken("\f");
        }
        if (string2.length() > 0) {
            fSMState2.setRhs(string2);
        }
    }

    FSMState parseLHS(FSMState fSMState, StringTokenizer stringTokenizer, String string) throws TokeniserException {
        FSMState fSMState2 = fSMState;
        boolean bl = false;
        LinkedList<FSMState> linkedList = new LinkedList<FSMState>();
        String string2 = SimpleTokeniser.skipIgnoreTokens(stringTokenizer);
        if (null == string2) {
            return fSMState2;
        }
        while (!string2.equals(string)) {
            UnicodeType unicodeType;
            Integer n;
            Object object;
            FSMState fSMState3;
            if (string2.equals("(")) {
                fSMState3 = this.parseLHS(fSMState2, stringTokenizer, ")");
            } else if (string2.equals("\"")) {
                object = this.parseQuotedString(stringTokenizer, "\"");
                fSMState3 = new FSMState(this);
                n = (Integer)stringTypeIds.get(object);
                if (null == n) {
                    throw new InvalidRuleException("Invalid type: \"" + (String)object + "\"");
                }
                unicodeType = new UnicodeType(n);
                fSMState2.put(unicodeType, fSMState3);
            } else {
                object = string2;
                fSMState3 = new FSMState(this);
                n = (Integer)stringTypeIds.get(object);
                if (null == n) {
                    throw new InvalidRuleException("Invalid type: \"" + (String)object + "\"");
                }
                unicodeType = new UnicodeType(n);
                fSMState2.put(unicodeType, fSMState3);
            }
            string2 = SimpleTokeniser.skipIgnoreTokens(stringTokenizer);
            if (null == string2) {
                throw new InvalidRuleException("Tokeniser rule ended too soon!");
            }
            if (string2.equals("|")) {
                bl = true;
                linkedList.add(fSMState3);
                string2 = SimpleTokeniser.skipIgnoreTokens(stringTokenizer);
                if (null != string2) continue;
                throw new InvalidRuleException("Tokeniser rule ended too soon!");
            }
            if (bl) {
                bl = false;
                linkedList.add(fSMState3);
                fSMState3 = new FSMState(this);
                object = linkedList.iterator();
                while (object.hasNext()) {
                    ((FSMState)object.next()).put(null, fSMState3);
                }
                linkedList.clear();
            }
            if (string2.equals("+")) {
                fSMState3.put(null, fSMState2);
                fSMState2 = fSMState3;
                fSMState3 = new FSMState(this);
                fSMState2.put(null, fSMState3);
                string2 = SimpleTokeniser.skipIgnoreTokens(stringTokenizer);
                if (null == string2) {
                    throw new InvalidRuleException("Tokeniser rule ended too soon!");
                }
            } else if (string2.equals("*")) {
                fSMState2.put(null, fSMState3);
                fSMState3.put(null, fSMState2);
                fSMState2 = fSMState3;
                fSMState3 = new FSMState(this);
                fSMState2.put(null, fSMState3);
                string2 = SimpleTokeniser.skipIgnoreTokens(stringTokenizer);
                if (null == string2) {
                    throw new InvalidRuleException("Tokeniser rule ended too soon!");
                }
            }
            fSMState2 = fSMState3;
        }
        return fSMState2;
    }

    String parseQuotedString(StringTokenizer stringTokenizer, String string) throws TokeniserException {
        if (!stringTokenizer.hasMoreElements()) {
            return null;
        }
        String string2 = stringTokenizer.nextToken();
        StringBuffer stringBuffer = new StringBuffer(1024);
        while (!string2.equals(string)) {
            stringBuffer.append(string2);
            if (stringTokenizer.hasMoreElements()) {
                string2 = stringTokenizer.nextToken();
                continue;
            }
            throw new InvalidRuleException("Tokeniser rule ended too soon!");
        }
        return stringBuffer.toString();
    }

    protected static String skipIgnoreTokens(StringTokenizer stringTokenizer) {
        boolean bl = false;
        while (stringTokenizer.hasMoreTokens()) {
            String string = stringTokenizer.nextToken();
            Iterator iterator = ignoreTokens.iterator();
            bl = false;
            while (!bl && iterator.hasNext()) {
                if (!string.equals((String)iterator.next())) continue;
                bl = true;
            }
            if (bl) continue;
            return string;
        }
        return null;
    }

    private AbstractSet lambdaClosure(Set set) {
        LinkedList<FSMState> linkedList = new LinkedList<FSMState>(set);
        HashSet<FSMState> hashSet = new HashSet<FSMState>(set);
        while (!linkedList.isEmpty()) {
            FSMState fSMState = (FSMState)linkedList.removeFirst();
            Set set2 = fSMState.nextSet(null);
            if (null == set2) continue;
            for (FSMState fSMState2 : set2) {
                if (((AbstractCollection)hashSet).contains(fSMState2)) continue;
                ((AbstractCollection)hashSet).add(fSMState2);
                linkedList.addFirst(fSMState2);
            }
        }
        return hashSet;
    }

    void eliminateVoidTransitions() throws TokeniserException {
        String string;
        FSMState fSMState2;
        this.newStates.clear();
        HashSet<AbstractSet> hashSet = new HashSet<AbstractSet>();
        LinkedList<AbstractSet> linkedList = new LinkedList<AbstractSet>();
        DFSMState dFSMState = new DFSMState(this);
        Set set = new HashSet<FSMState>();
        set.add(this.initialState);
        set = this.lambdaClosure(set);
        this.newStates.put(set, dFSMState);
        hashSet.add((AbstractSet)set);
        Iterator iterator = set.iterator();
        HashSet<String> hashSet2 = new HashSet<String>();
        boolean bl = false;
        while (iterator.hasNext()) {
            fSMState2 = (FSMState)iterator.next();
            if (!fSMState2.isFinal()) continue;
            string = fSMState2.getRhs();
            hashSet2.add(string);
            dFSMState.rhs = string;
            bl = true;
        }
        if (hashSet2.size() > 1) {
            Err.println("Warning, rule clash: " + hashSet2 + "\nSelected last definition: " + dFSMState.rhs);
        }
        if (bl) {
            dFSMState.buildTokenDesc();
        }
        hashSet2.clear();
        linkedList.addFirst((AbstractSet)set);
        this.dInitialState = dFSMState;
        while (!linkedList.isEmpty()) {
            set = (Set)linkedList.removeFirst();
            for (int i = 0; i < maxTypeId; ++i) {
                AbstractSet abstractSet = new HashSet();
                for (FSMState fSMState2 : set) {
                    Set set2 = fSMState2.nextSet(i);
                    if (null == set2) continue;
                    abstractSet.addAll(set2);
                }
                if (abstractSet.isEmpty()) continue;
                dFSMState = (DFSMState)this.newStates.get(abstractSet = this.lambdaClosure(abstractSet));
                if (dFSMState == null) {
                    dFSMState = new DFSMState(this);
                    hashSet.add(abstractSet);
                    linkedList.add(abstractSet);
                    iterator = abstractSet.iterator();
                    bl = false;
                    while (iterator.hasNext()) {
                        fSMState2 = (FSMState)iterator.next();
                        if (!fSMState2.isFinal()) continue;
                        string = fSMState2.getRhs();
                        hashSet2.add(string);
                        dFSMState.rhs = string;
                        bl = true;
                    }
                    if (hashSet2.size() > 1) {
                        Err.println("Warning, rule clash: " + hashSet2 + "\nSelected last definition: " + dFSMState.rhs);
                    }
                    if (bl) {
                        dFSMState.buildTokenDesc();
                    }
                    hashSet2.clear();
                    this.newStates.put(abstractSet, dFSMState);
                }
                ((DFSMState)this.newStates.get(set)).put(i, dFSMState);
            }
        }
    }

    public String getFSMgml() {
        String string = "graph[ \ndirected 1\n";
        StringBuffer stringBuffer = new StringBuffer(1024);
        StringBuffer stringBuffer2 = new StringBuffer(1024);
        for (FSMState fSMState : this.fsmStates) {
            int n = fSMState.getIndex();
            stringBuffer.append("node[ id ");
            stringBuffer.append(n);
            stringBuffer.append(" label \"");
            stringBuffer.append(n);
            if (fSMState.isFinal()) {
                stringBuffer.append(",F\\n" + fSMState.getRhs());
            }
            stringBuffer.append("\"  ]\n");
            stringBuffer2.append(fSMState.getEdgesGML());
        }
        string = string + stringBuffer.toString() + stringBuffer2.toString() + "]\n";
        return string;
    }

    public String getDFSMgml() {
        String string = "graph[ \ndirected 1\n";
        StringBuffer stringBuffer = new StringBuffer(1024);
        StringBuffer stringBuffer2 = new StringBuffer(1024);
        for (DFSMState dFSMState : this.dfsmStates) {
            int n = dFSMState.getIndex();
            stringBuffer.append("node[ id ");
            stringBuffer.append(n);
            stringBuffer.append(" label \"");
            stringBuffer.append(n);
            if (dFSMState.isFinal()) {
                stringBuffer.append(",F\\n" + dFSMState.getRhs());
            }
            stringBuffer.append("\"  ]\n");
            stringBuffer2.append(dFSMState.getEdgesGML());
        }
        string = string + stringBuffer.toString() + stringBuffer2.toString() + "]\n";
        return string;
    }

    public FeatureMap getFeatures() {
        return this.features;
    }

    public void setFeatures(FeatureMap featureMap) {
        this.features = featureMap;
    }

    public void execute() throws ExecutionException {
        String string;
        FeatureMap featureMap;
        this.interrupted = false;
        if (this.document == null) {
            throw new ExecutionException("No document to tokenise!");
        }
        AnnotationSet annotationSet = this.annotationSetName == null || this.annotationSetName.equals("") ? this.document.getAnnotations() : this.document.getAnnotations(this.annotationSetName);
        this.fireStatusChanged("Tokenising " + this.document.getName() + "...");
        String string2 = this.document.getContent().toString();
        int n = string2.length();
        DFSMState dFSMState = this.dInitialState;
        int n2 = 0;
        int n3 = -1;
        DFSMState dFSMState2 = null;
        int n4 = 0;
        int n5 = 0;
        while (n4 < n) {
            char c = string2.charAt(n4);
            DFSMState dFSMState3 = dFSMState.next((Integer)typeIds.get(new Integer(Character.getType(c))));
            if (null != dFSMState3) {
                dFSMState = dFSMState3;
                if (dFSMState.isFinal()) {
                    n3 = n4;
                    dFSMState2 = dFSMState;
                }
                ++n4;
            } else {
                featureMap = Factory.newFeatureMap();
                if (null == dFSMState2) {
                    string = string2.substring(n2, n2 + 1);
                    featureMap.put("type", "UNKNOWN");
                    featureMap.put("string", string);
                    featureMap.put("length", Integer.toString(string.length()));
                    try {
                        annotationSet.add(new Long(n2), new Long(n2 + 1), "DEFAULT_TOKEN", featureMap);
                    }
                    catch (InvalidOffsetException invalidOffsetException) {
                        invalidOffsetException.printStackTrace(Err.getPrintWriter());
                    }
                    n4 = n2 + 1;
                } else {
                    string = string2.substring(n2, n3 + 1);
                    featureMap.put("string", string);
                    featureMap.put("length", Integer.toString(string.length()));
                    for (int i = 1; i < dFSMState2.getTokenDesc().length; ++i) {
                        featureMap.put(dFSMState2.getTokenDesc()[i][0], dFSMState2.getTokenDesc()[i][1]);
                    }
                    try {
                        annotationSet.add(new Long(n2), new Long(n3 + 1), dFSMState2.getTokenDesc()[0][0], featureMap);
                    }
                    catch (InvalidOffsetException invalidOffsetException) {
                        throw new GateRuntimeException(invalidOffsetException.toString());
                    }
                    n4 = n3 + 1;
                }
                dFSMState2 = null;
                dFSMState = this.dInitialState;
                n2 = n4;
            }
            if (n4 - n5 <= 256) continue;
            this.fireProgressChanged(100 * n4 / n);
            n5 = n4;
            if (!this.isInterrupted()) continue;
            throw new ExecutionInterruptedException();
        }
        if (null != dFSMState2) {
            string = string2.substring(n2, n3 + 1);
            featureMap = Factory.newFeatureMap();
            featureMap.put("string", string);
            featureMap.put("length", Integer.toString(string.length()));
            for (int i = 1; i < dFSMState2.getTokenDesc().length; ++i) {
                featureMap.put(dFSMState2.getTokenDesc()[i][0], dFSMState2.getTokenDesc()[i][1]);
            }
            try {
                annotationSet.add(new Long(n2), new Long(n3 + 1), dFSMState2.getTokenDesc()[0][0], featureMap);
            }
            catch (InvalidOffsetException invalidOffsetException) {
                throw new GateRuntimeException(invalidOffsetException.toString());
            }
        }
        this.reset();
        this.fireProcessFinished();
        this.fireStatusChanged("Tokenisation complete!");
    }

    public void setRulesURL(URL uRL) {
        this.rulesURL = uRL;
    }

    public URL getRulesURL() {
        return this.rulesURL;
    }

    public void setAnnotationSetName(String string) {
        this.annotationSetName = string;
    }

    public String getAnnotationSetName() {
        return this.annotationSetName;
    }

    public void setRulesResourceName(String string) {
        this.rulesResourceName = string;
    }

    public String getRulesResourceName() {
        return this.rulesResourceName;
    }

    public void setEncoding(String string) {
        this.encoding = string;
    }

    public String getEncoding() {
        return this.encoding;
    }

    static {
        Field[] fieldArray;
        LHStoRHS = ">";
        defaultResourceName = "creole/tokeniser/DefaultTokeniser.rules";
        try {
            fieldArray = Class.forName("java.lang.Character").getFields();
        }
        catch (ClassNotFoundException classNotFoundException) {
            throw new LuckyException("Could not find the java.lang.Character class!");
        }
        LinkedList<Field> linkedList = new LinkedList<Field>();
        for (int i = 0; i < fieldArray.length; ++i) {
            if (!Modifier.isStatic(fieldArray[i].getModifiers()) || fieldArray[i].getName().indexOf("DIRECTIONALITY") != -1) continue;
            linkedList.add(fieldArray[i]);
        }
        typeIds = new HashMap();
        maxTypeId = linkedList.size() - 1;
        typeMnemonics = new String[maxTypeId + 1];
        stringTypeIds = new HashMap();
        Iterator iterator = linkedList.iterator();
        int n = 0;
        try {
            while (iterator.hasNext()) {
                Field field = (Field)iterator.next();
                if (!field.getType().toString().equals("byte")) continue;
                String string = field.getName();
                typeIds.put(new Integer(field.getInt(null)), new Integer(n));
                SimpleTokeniser.typeMnemonics[n] = string;
                stringTypeIds.put(string, new Integer(n));
                ++n;
            }
        }
        catch (Exception exception) {
            throw new LuckyException(exception.toString());
        }
        ignoreTokens = new HashSet();
        ignoreTokens.add(" ");
        ignoreTokens.add("\t");
        ignoreTokens.add("\f");
    }
}

