/*
 * Decompiled with CFR 0.152.
 */
package gate.creole;

import gate.Annotation;
import gate.AnnotationSet;
import gate.Factory;
import gate.FeatureMap;
import gate.Resource;
import gate.Utils;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.creole.ResourceReference;
import gate.creole.metadata.CreoleParameter;
import gate.creole.metadata.CreoleResource;
import gate.creole.metadata.Optional;
import gate.creole.metadata.RunTime;
import gate.util.GateRuntimeException;
import gate.util.OffsetComparator;
import hepple.postag.InvalidRuleException;
import java.io.IOException;
import java.net.URISyntaxException;
import java.net.URL;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.event.Level;

@CreoleResource(name="ANNIE POS Tagger", helpURL="http://gate.ac.uk/userguide/sec:annie:tagger", comment="Mark Hepple's Brill-style POS tagger", icon="pos-tagger")
public class POSTagger
extends AbstractLanguageAnalyser {
    private static final long serialVersionUID = 7680938864165071808L;
    public static final String TAG_DOCUMENT_PARAMETER_NAME = "document";
    public static final String TAG_INPUT_AS_PARAMETER_NAME = "inputASName";
    public static final String TAG_LEXICON_URL_PARAMETER_NAME = "lexiconURL";
    public static final String TAG_RULES_URL_PARAMETER_NAME = "rulesURL";
    public static final String TAG_ENCODING_PARAMETER_NAME = "encoding";
    public static final String BASE_TOKEN_ANNOTATION_TYPE_PARAMETER_NAME = "baseTokenAnnotationType";
    public static final String OUTPUT_ANNOTATION_TYPE_PARAMETER_NAME = "outputAnnotationType";
    public static final String BASE_SENTENCE_ANNOTATION_TYPE_PARAMETER_NAME = "baseSentenceAnnotationType";
    public static final String TAG_OUTPUT_AS_PARAMETER_NAME = "outputASName";
    protected Boolean failOnMissingInputAnnotations = true;
    protected Boolean posTagAllTokens = true;
    protected Logger logger = LoggerFactory.getLogger((String)((Object)((Object)this)).getClass().getName());
    protected hepple.postag.POSTagger tagger;
    private ResourceReference lexiconURL;
    private ResourceReference rulesURL;
    private String inputASName;
    private String encoding;
    private String separator;
    private String baseTokenAnnotationType;
    private String baseSentenceAnnotationType;
    private String outputAnnotationType;
    private String outputASName;

    @RunTime
    @Optional
    @CreoleParameter(comment="Throw an exception when there are none of the required input annotations", defaultValue="true")
    public void setFailOnMissingInputAnnotations(Boolean fail) {
        this.failOnMissingInputAnnotations = fail;
    }

    public Boolean getFailOnMissingInputAnnotations() {
        return this.failOnMissingInputAnnotations;
    }

    @RunTime
    @Optional
    @CreoleParameter(comment="Should all Tokens be POS tagged or just those within baseSentenceAnnotationType?", defaultValue="true")
    public void setPosTagAllTokens(Boolean allTokens) {
        this.posTagAllTokens = allTokens;
    }

    public Boolean getPosTagAllTokens() {
        return this.posTagAllTokens;
    }

    public Resource init() throws ResourceInstantiationException {
        if (this.lexiconURL == null) {
            throw new ResourceInstantiationException("NoURL provided for the lexicon!");
        }
        if (this.rulesURL == null) {
            throw new ResourceInstantiationException("No URL provided for the rules!");
        }
        try {
            this.tagger = new hepple.postag.POSTagger(this.lexiconURL.toURL(), this.rulesURL.toURL(), this.encoding, this.separator);
        }
        catch (InvalidRuleException | IOException e) {
            throw new ResourceInstantiationException(e);
        }
        return this;
    }

    public void execute() throws ExecutionException {
        AnnotationSet inputAS;
        if (this.document == null) {
            throw new ExecutionException("No document to process!");
        }
        if (this.inputASName != null && this.inputASName.equals("")) {
            this.inputASName = null;
        }
        AnnotationSet annotationSet = inputAS = this.inputASName == null ? this.document.getAnnotations() : this.document.getAnnotations(this.inputASName);
        if (this.baseTokenAnnotationType == null || this.baseTokenAnnotationType.trim().length() == 0) {
            throw new ExecutionException("No base Token Annotation Type provided!");
        }
        if (this.outputASName != null && this.outputASName.equals("")) {
            this.outputASName = null;
        }
        if (this.baseSentenceAnnotationType == null || this.baseSentenceAnnotationType.trim().length() == 0) {
            throw new ExecutionException("No base Sentence Annotation Type provided!");
        }
        if (this.outputAnnotationType == null || this.outputAnnotationType.trim().length() == 0) {
            throw new ExecutionException("No AnnotationType provided to store the new feature!");
        }
        AnnotationSet sentencesAS = inputAS.get(this.baseSentenceAnnotationType);
        AnnotationSet tokensAS = inputAS.get(this.baseTokenAnnotationType);
        if (sentencesAS != null && sentencesAS.size() > 0 && tokensAS != null && tokensAS.size() > 0) {
            long startTime = System.currentTimeMillis();
            this.fireStatusChanged("POS tagging " + this.document.getName());
            this.fireProgressChanged(0);
            ArrayList<String> sentenceForTagger = new ArrayList<String>();
            ArrayList<List<String>> sentencesForTagger = new ArrayList<List<String>>(1);
            sentencesForTagger.add(sentenceForTagger);
            OffsetComparator offsetComparator = new OffsetComparator();
            ArrayList sentencesList = new ArrayList(sentencesAS);
            Collections.sort(sentencesList, offsetComparator);
            ArrayList tokensList = new ArrayList(tokensAS);
            Collections.sort(tokensList, offsetComparator);
            Iterator sentencesIter = sentencesList.iterator();
            ListIterator tokensIter = tokensList.listIterator();
            ArrayList<Annotation> tokensInCurrentSentence = new ArrayList<Annotation>();
            Annotation currentToken = (Annotation)tokensIter.next();
            int sentIndex = 0;
            int sentCnt = sentencesAS.size();
            while (sentencesIter.hasNext()) {
                Annotation currentSentence = (Annotation)sentencesIter.next();
                tokensInCurrentSentence.clear();
                sentenceForTagger.clear();
                while (currentToken != null && currentToken.getEndNode().getOffset().compareTo(currentSentence.getEndNode().getOffset()) <= 0) {
                    if (this.posTagAllTokens.booleanValue() || currentToken.withinSpanOf(currentSentence)) {
                        tokensInCurrentSentence.add(currentToken);
                        sentenceForTagger.add((String)currentToken.getFeatures().get((Object)"string"));
                    }
                    currentToken = tokensIter.hasNext() ? (Annotation)tokensIter.next() : null;
                }
                List<List<String[]>> taggerList = this.tagger.runTagger(sentencesForTagger);
                if (taggerList.size() > 0) {
                    List<String[]> taggerResults = taggerList.get(0);
                    if (taggerResults.size() != tokensInCurrentSentence.size()) {
                        throw new ExecutionException("POS Tagger malfunction: the output size (" + taggerResults.size() + ") is different from the input size (" + tokensInCurrentSentence.size() + ")!");
                    }
                    Iterator<String[]> resIter = taggerResults.iterator();
                    Iterator tokIter = tokensInCurrentSentence.iterator();
                    while (resIter.hasNext()) {
                        Annotation annot = (Annotation)tokIter.next();
                        this.addFeatures(annot, "category", resIter.next()[1]);
                    }
                }
                this.fireProgressChanged(sentIndex++ * 100 / sentCnt);
            }
            if (currentToken != null && this.posTagAllTokens.booleanValue()) {
                tokensInCurrentSentence.clear();
                sentenceForTagger.clear();
                while (currentToken != null) {
                    tokensInCurrentSentence.add(currentToken);
                    sentenceForTagger.add((String)currentToken.getFeatures().get((Object)"string"));
                    currentToken = tokensIter.hasNext() ? (Annotation)tokensIter.next() : null;
                }
                List<String[]> taggerResults = this.tagger.runTagger(sentencesForTagger).get(0);
                if (taggerResults.size() != tokensInCurrentSentence.size()) {
                    throw new ExecutionException("POS Tagger malfunction: the output size (" + taggerResults.size() + ") is different from the input size (" + tokensInCurrentSentence.size() + ")!");
                }
                Iterator<String[]> resIter = taggerResults.iterator();
                Iterator tokIter = tokensInCurrentSentence.iterator();
                while (resIter.hasNext()) {
                    Annotation annot = (Annotation)tokIter.next();
                    this.addFeatures(annot, "category", resIter.next()[1]);
                }
            }
            this.fireProcessFinished();
            this.fireStatusChanged(this.document.getName() + " tagged in " + NumberFormat.getInstance().format((double)(System.currentTimeMillis() - startTime) / 1000.0) + " seconds!");
        } else {
            if (this.failOnMissingInputAnnotations.booleanValue()) {
                throw new ExecutionException("No sentences or tokens to process in document " + this.document.getName() + "\nPlease run a sentence splitter and tokeniser first!");
            }
            Utils.logOnce((Logger)this.logger, (Level)Level.INFO, (String)"POS tagger: no sentence or token annotations in input document - see debug log for details.");
            this.logger.debug("No input annotations in document " + this.document.getName());
        }
    }

    protected void addFeatures(Annotation annot, String featureName, String featureValue) throws GateRuntimeException {
        String tempOASN;
        String tempIASN = this.inputASName == null ? "" : this.inputASName;
        String string = tempOASN = this.outputASName == null ? "" : this.outputASName;
        if (this.outputAnnotationType.equals(this.baseTokenAnnotationType) && tempIASN.equals(tempOASN)) {
            annot.getFeatures().put((Object)featureName, (Object)featureValue);
            return;
        }
        Long start = annot.getStartNode().getOffset();
        Long end = annot.getEndNode().getOffset();
        AnnotationSet outputAS = this.outputASName == null ? this.document.getAnnotations() : this.document.getAnnotations(this.outputASName);
        AnnotationSet annotations = outputAS.get(this.outputAnnotationType);
        if (annotations == null || annotations.size() == 0) {
            FeatureMap features = Factory.newFeatureMap();
            features.put((Object)featureName, (Object)featureValue);
            try {
                outputAS.add(start, end, this.outputAnnotationType, features);
            }
            catch (Exception e) {
                throw new GateRuntimeException("Invalid Offsets");
            }
        }
        ArrayList tempList = new ArrayList(annotations.get());
        boolean found = false;
        for (int i = 0; i < tempList.size(); ++i) {
            Annotation annotation = (Annotation)tempList.get(i);
            if (!annotation.getStartNode().getOffset().equals(start) || !annotation.getEndNode().getOffset().equals(end)) continue;
            annotation.getFeatures().put((Object)featureName, (Object)featureValue);
            found = true;
            break;
        }
        if (!found) {
            FeatureMap features = Factory.newFeatureMap();
            features.put((Object)featureName, (Object)featureValue);
            try {
                outputAS.add(start, end, this.outputAnnotationType, features);
            }
            catch (Exception e) {
                throw new GateRuntimeException("Invalid Offsets");
            }
        }
    }

    @Optional
    @CreoleParameter(comment="The URL to the lexicon file", defaultValue="resources/heptag/lexicon")
    public void setLexiconURL(ResourceReference newLexiconURL) {
        this.lexiconURL = newLexiconURL;
    }

    @Deprecated
    public void setLexiconURL(URL newLexiconURL) {
        try {
            this.setLexiconURL(new ResourceReference(newLexiconURL));
        }
        catch (URISyntaxException e) {
            throw new RuntimeException("Error converting URL to ResourceReference", e);
        }
    }

    public ResourceReference getLexiconURL() {
        return this.lexiconURL;
    }

    @Optional
    @CreoleParameter(comment="The URL to the ruleset file", defaultValue="resources/heptag/ruleset")
    public void setRulesURL(ResourceReference newRulesURL) {
        this.rulesURL = newRulesURL;
    }

    @Deprecated
    public void setRulesURL(URL newRulesURL) {
        try {
            this.setRulesURL(new ResourceReference(newRulesURL));
        }
        catch (URISyntaxException e) {
            throw new RuntimeException("Error converting URL to ResourceReference", e);
        }
    }

    @Optional
    @CreoleParameter(comment="The encoding used for reading rules and lexicons")
    public void setEncoding(String encoding) {
        this.encoding = encoding;
    }

    public ResourceReference getRulesURL() {
        return this.rulesURL;
    }

    @RunTime
    @Optional
    @CreoleParameter(comment="The annotation set to be used as input that must contain 'Token' and 'Sentence' annotations")
    public void setInputASName(String newInputASName) {
        this.inputASName = newInputASName;
    }

    public String getInputASName() {
        return this.inputASName;
    }

    public String getEncoding() {
        return this.encoding;
    }

    public String getBaseTokenAnnotationType() {
        return this.baseTokenAnnotationType;
    }

    public String getBaseSentenceAnnotationType() {
        return this.baseSentenceAnnotationType;
    }

    public String getOutputAnnotationType() {
        return this.outputAnnotationType;
    }

    @RunTime
    @CreoleParameter(comment="The name of the base 'Token' annotation type", defaultValue="Token")
    public void setBaseTokenAnnotationType(String baseTokenAnnotationType) {
        this.baseTokenAnnotationType = baseTokenAnnotationType;
    }

    @RunTime
    @CreoleParameter(comment="The name of the base 'Sentence' annotation type", defaultValue="Sentence")
    public void setBaseSentenceAnnotationType(String baseSentenceAnnotationtype) {
        this.baseSentenceAnnotationType = baseSentenceAnnotationtype;
    }

    @RunTime
    @CreoleParameter(comment="The name of the annotation type where the new features should be added", defaultValue="Token")
    public void setOutputAnnotationType(String outputAnnotationType) {
        this.outputAnnotationType = outputAnnotationType;
    }

    public String getOutputASName() {
        return this.outputASName;
    }

    @RunTime
    @Optional
    @CreoleParameter(comment="The annotation set to be used as output for POS annotations")
    public void setOutputASName(String outputASName) {
        this.outputASName = outputASName;
    }

    @CreoleParameter(comment="Character used to separate lexicon entry from list of POS tags", defaultValue=" ")
    public void setLexiconSeparator(String separator) {
        this.separator = separator;
    }

    public String getLexiconSeparator() {
        return this.separator;
    }
}

