/*
 * Decompiled with CFR 0.152.
 */
package gate.twitter;

import com.opencsv.CSVReader;
import com.opencsv.exceptions.CsvValidationException;
import gate.Annotation;
import gate.AnnotationSet;
import gate.Factory;
import gate.FeatureMap;
import gate.Resource;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.creole.ResourceReference;
import gate.creole.metadata.CreoleParameter;
import gate.creole.metadata.CreoleResource;
import gate.creole.metadata.Optional;
import gate.creole.metadata.RunTime;
import gate.util.GateRuntimeException;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.zip.GZIPInputStream;
import pt.tumba.spell.LevenshteinDistance;
import pt.tumba.spell.SpellChecker;

@CreoleResource(name="Tweet Normaliser", comment="Normalise texts in tweets (convert into standard English spelling mistakes, colloquialisms, typing variations and so on)", helpURL="http://gate.ac.uk/userguide/sec:social:twitter:prs")
public class Normaliser
extends AbstractLanguageAnalyser {
    private static final long serialVersionUID = -4139489923193104429L;
    protected HashMap<String, String> orthmappings;
    protected HashSet<String> wordlist;
    protected SpellChecker checker;
    protected LevenshteinDistance dist;
    private String outputASName;
    private String inputASName;
    private String initialTextFeature;
    private String normTextFeature;
    private String origTextFeature;
    private ResourceReference dictURL;
    private ResourceReference orthURL;
    private String dictEncoding;
    private String orthEncoding;
    private double maxDistance;

    public Resource init() throws ResourceInstantiationException {
        Throwable throwable;
        BufferedReader dictReader2;
        Throwable throwable2;
        InputStreamReader inReader2;
        Throwable throwable3;
        InputStream in;
        if (this.orthURL == null) {
            throw new ResourceInstantiationException("orth norm file not set");
        }
        if (this.dictURL == null) {
            throw new ResourceInstantiationException("dict file not set");
        }
        this.checker = new SpellChecker();
        this.wordlist = new HashSet();
        this.orthmappings = new HashMap();
        try {
            in = this.openPossiblyGzip(this.dictURL.toURL());
            throwable3 = null;
            try {
                inReader2 = new InputStreamReader(in, this.dictEncoding);
                throwable2 = null;
                try {
                    dictReader2 = new BufferedReader(inReader2);
                    throwable = null;
                    try {
                        this.checker.initialize((Reader)dictReader2);
                    }
                    catch (Throwable throwable4) {
                        throwable = throwable4;
                        throw throwable4;
                    }
                    finally {
                        if (dictReader2 != null) {
                            if (throwable != null) {
                                try {
                                    dictReader2.close();
                                }
                                catch (Throwable throwable5) {
                                    throwable.addSuppressed(throwable5);
                                }
                            } else {
                                dictReader2.close();
                            }
                        }
                    }
                }
                catch (Throwable dictReader2) {
                    throwable2 = dictReader2;
                    throw dictReader2;
                }
                finally {
                    if (inReader2 != null) {
                        if (throwable2 != null) {
                            try {
                                inReader2.close();
                            }
                            catch (Throwable dictReader2) {
                                throwable2.addSuppressed(dictReader2);
                            }
                        } else {
                            inReader2.close();
                        }
                    }
                }
            }
            catch (Throwable inReader2) {
                throwable3 = inReader2;
                throw inReader2;
            }
            finally {
                if (in != null) {
                    if (throwable3 != null) {
                        try {
                            in.close();
                        }
                        catch (Throwable inReader2) {
                            throwable3.addSuppressed(inReader2);
                        }
                    } else {
                        in.close();
                    }
                }
            }
        }
        catch (Exception e) {
            throw new ResourceInstantiationException("Error initializing spellchecker", e);
        }
        try {
            in = this.openPossiblyGzip(this.dictURL.toURL());
            throwable3 = null;
            try {
                inReader2 = new InputStreamReader(in, this.dictEncoding);
                throwable2 = null;
                try {
                    dictReader2 = new BufferedReader(inReader2);
                    throwable = null;
                    try {
                        String entry;
                        while ((entry = dictReader2.readLine()) != null) {
                            String[] tokens = entry.split(" : ");
                            this.wordlist.add(tokens[0]);
                        }
                    }
                    catch (Throwable throwable6) {
                        throwable = throwable6;
                        throw throwable6;
                    }
                    finally {
                        if (dictReader2 != null) {
                            if (throwable != null) {
                                try {
                                    dictReader2.close();
                                }
                                catch (Throwable throwable7) {
                                    throwable.addSuppressed(throwable7);
                                }
                            } else {
                                dictReader2.close();
                            }
                        }
                    }
                }
                catch (Throwable throwable8) {
                    throwable2 = throwable8;
                    throw throwable8;
                }
                finally {
                    if (inReader2 != null) {
                        if (throwable2 != null) {
                            try {
                                inReader2.close();
                            }
                            catch (Throwable throwable9) {
                                throwable2.addSuppressed(throwable9);
                            }
                        } else {
                            inReader2.close();
                        }
                    }
                }
            }
            catch (Throwable throwable10) {
                throwable3 = throwable10;
                throw throwable10;
            }
            finally {
                if (in != null) {
                    if (throwable3 != null) {
                        try {
                            in.close();
                        }
                        catch (Throwable throwable11) {
                            throwable3.addSuppressed(throwable11);
                        }
                    } else {
                        in.close();
                    }
                }
            }
        }
        catch (Exception e) {
            throw new ResourceInstantiationException(e);
        }
        try {
            this.readOrthMappings(this.orthURL.toURL());
        }
        catch (IOException e) {
            throw new ResourceInstantiationException("Error loading orth mappings", (Exception)e);
        }
        this.dist = new LevenshteinDistance();
        return this;
    }

    protected void readOrthMappings(URL url) throws IOException {
        try (InputStream in = this.openPossiblyGzip(url);
             InputStreamReader reader = new InputStreamReader(in, this.orthEncoding);
             CSVReader csvReader = new CSVReader((Reader)reader);){
            String[] line = csvReader.readNext();
            if (line == null) {
                return;
            }
            if (line.length < 2) {
                do {
                    String relpath;
                    if ("".equals(relpath = line[0].trim())) continue;
                    this.readOrthMappings(new URL(url, relpath));
                } while ((line = csvReader.readNext()) != null);
            } else {
                do {
                    this.orthmappings.put(line[0], line[1]);
                } while ((line = csvReader.readNext()) != null);
            }
        }
        catch (CsvValidationException e) {
            throw new IOException("Problem reading orth mapping CSV file", e);
        }
    }

    protected InputStream openPossiblyGzip(URL url) throws IOException {
        InputStream in = url.openStream();
        if (url.getPath().endsWith(".gz")) {
            in = new GZIPInputStream(in);
        }
        return in;
    }

    public void execute() throws ExecutionException {
        if (this.document == null) {
            throw new ExecutionException("No document to process!");
        }
        this.fireStatusChanged("Normalising " + this.document.getName());
        AnnotationSet inputAS = this.document.getAnnotations(this.inputASName);
        AnnotationSet tokensAS = inputAS.get("Token");
        ArrayList tokenList = new ArrayList(tokensAS);
        if (tokensAS != null && tokensAS.size() > 0) {
            for (Annotation ann : tokenList) {
                String kind = (String)ann.getFeatures().get((Object)"kind");
                if (!kind.equals("word")) continue;
                String initialText = (String)ann.getFeatures().get((Object)this.initialTextFeature);
                String initialLower = initialText.toLowerCase();
                if (this.orthmappings.containsKey(initialLower)) {
                    this.addFeatures(ann, this.origTextFeature, initialLower);
                    this.addFeatures(ann, this.normTextFeature, this.orthmappings.get(initialLower));
                    continue;
                }
                if (this.wordlist.contains(initialText)) continue;
                String orth = "invalid";
                try {
                    orth = (String)ann.getFeatures().get((Object)"orth");
                }
                catch (Exception e) {
                    continue;
                }
                if (orth == null) {
                    orth = "invalid";
                }
                if (orth.equals("upperInitial") || orth.equals("invalid")) continue;
                this.addFeatures(ann, this.origTextFeature, initialText);
                String normalisedText = initialText;
                String mostSimilar = this.checker.findMostSimilar(initialText);
                if (mostSimilar == null) continue;
                if (this.dist.modifiedLevenshteinDistance(initialText, mostSimilar) < this.maxDistance && !initialText.toLowerCase().equals(mostSimilar.toLowerCase())) {
                    normalisedText = mostSimilar;
                }
                this.addFeatures(ann, this.normTextFeature, normalisedText);
            }
        }
        this.fireProcessFinished();
    }

    protected void addFeatures(Annotation annot, String featureName, String featureValue) throws GateRuntimeException {
        String tempOASN;
        String tempIASN = this.inputASName == null ? "" : this.inputASName;
        String string = tempOASN = this.outputASName == null ? "" : this.outputASName;
        if (tempIASN.equals(tempOASN)) {
            annot.getFeatures().put((Object)featureName, (Object)featureValue);
            return;
        }
        int start = annot.getStartNode().getOffset().intValue();
        int end = annot.getEndNode().getOffset().intValue();
        AnnotationSet outputAS = this.outputASName == null ? this.document.getAnnotations() : this.document.getAnnotations(this.outputASName);
        AnnotationSet annotations = outputAS.get("Token");
        if (annotations == null || annotations.size() == 0) {
            FeatureMap features = Factory.newFeatureMap();
            features.put((Object)featureName, (Object)featureValue);
            try {
                outputAS.add(new Long(start), new Long(end), "Token", features);
            }
            catch (Exception e) {
                throw new GateRuntimeException("Invalid Offsets");
            }
        }
        ArrayList tempList = new ArrayList(annotations.get());
        boolean found = false;
        for (int i = 0; i < tempList.size(); ++i) {
            Annotation annotation = (Annotation)tempList.get(i);
            if (annotation.getStartNode().getOffset().intValue() != start || annotation.getEndNode().getOffset().intValue() != end) continue;
            annotation.getFeatures().put((Object)featureName, (Object)featureValue);
            found = true;
            break;
        }
        if (!found) {
            FeatureMap features = Factory.newFeatureMap();
            features.put((Object)featureName, (Object)featureValue);
            try {
                outputAS.add(new Long(start), new Long(end), "Token", features);
            }
            catch (Exception e) {
                throw new GateRuntimeException("Invalid Offsets");
            }
        }
    }

    public void reInit() throws ResourceInstantiationException {
        this.init();
    }

    @RunTime
    @Optional
    @CreoleParameter(comment="Input annotation set name", defaultValue="")
    public void setInputASName(String inputASName) {
        this.inputASName = inputASName;
    }

    public String getInputASName() {
        return this.inputASName;
    }

    @RunTime
    @Optional
    @CreoleParameter(comment="Output annotation set name", defaultValue="")
    public void setOutputASName(String outputASName) {
        this.outputASName = outputASName;
    }

    public String getOutputASName() {
        return this.outputASName;
    }

    @RunTime
    @Optional
    @CreoleParameter(comment="Feature on Token annotations in the input AS that contains the token string", defaultValue="string")
    public void setInitialTextFeature(String f) {
        this.initialTextFeature = f;
    }

    public String getInitialTextFeature() {
        return this.initialTextFeature;
    }

    @RunTime
    @Optional
    @CreoleParameter(comment="Feature to which the normalised text should be saved", defaultValue="string")
    public void setNormTextFeature(String f) {
        this.normTextFeature = f;
    }

    public String getNormTextFeature() {
        return this.normTextFeature;
    }

    @RunTime
    @Optional
    @CreoleParameter(comment="Feature to which the original text should be saved", defaultValue="origString")
    public void setOrigTextFeature(String f) {
        this.origTextFeature = f;
    }

    public String getOrigTextFeature() {
        return this.origTextFeature;
    }

    @CreoleParameter(comment="Path to JaSpell dictionary", defaultValue="resources/normaliser/english.jaspell")
    public void setDictURL(ResourceReference dictURL) {
        this.dictURL = dictURL;
    }

    public ResourceReference getDictURL() {
        return this.dictURL;
    }

    @CreoleParameter(comment="Character encoding used to read the dictionary file", defaultValue="UTF-8")
    public void setDictEncoding(String encoding) {
        this.dictEncoding = encoding;
    }

    public String getDictEncoding() {
        return this.dictEncoding;
    }

    @CreoleParameter(comment="Path to common normalisation terms list (for orthographic mappings, e.g. 'b4' to 'before').  This can either be a single two-column CSV file where the first column is the term to be mapped and the second column is the target, or a single column file listing relative paths to other lists (which is useful if you want to include more than one list of terms)", defaultValue="resources/normaliser/orth.en.csv")
    public void setOrthURL(ResourceReference orthURL) {
        this.orthURL = orthURL;
    }

    public ResourceReference getOrthURL() {
        return this.orthURL;
    }

    @CreoleParameter(comment="Character encoding used to read the orth files", defaultValue="UTF-8")
    public void setOrthEncoding(String encoding) {
        this.orthEncoding = encoding;
    }

    public String getOrthEncoding() {
        return this.orthEncoding;
    }

    @RunTime
    @Optional
    @CreoleParameter(comment="Maximum distance to consider (this determines OOV/IV threshold).\nBased on Levenshtein edit dist (with a case change downweighted to 0.5) and double-metaphone.", defaultValue="2.0")
    public void setMaxDistance(String maxDistance) {
        this.maxDistance = new Float(maxDistance).doubleValue();
    }

    public String getMaxDistance() {
        return new Float(this.maxDistance).toString();
    }
}

