/*
 * Decompiled with CFR 0.152.
 */
package org.openimaj.text.nlp.sentiment.lexicon;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import org.arabidopsis.ahocorasick.AhoCorasick;
import org.arabidopsis.ahocorasick.SearchResult;
import org.openimaj.text.nlp.textpipe.annotations.AnnotationUtils;
import org.openimaj.text.nlp.textpipe.annotations.RawTextAnnotation;
import org.openimaj.text.nlp.textpipe.annotations.TokenAnnotation;
import org.openimaj.text.nlp.textpipe.annotators.MissingRequiredAnnotationException;
import org.openimaj.text.nlp.textpipe.annotators.OpenNLPTokenAnnotator;

public class HMLexiconBuilder {
    Set<String> positiveLexicon = new HashSet<String>();
    Set<String> negativeLexicon = new HashSet<String>();
    List<String> newPos = new LinkedList<String>();
    List<String> newNeg = new LinkedList<String>();
    List<String> corpus;
    OpenNLPTokenAnnotator tokA = new OpenNLPTokenAnnotator();

    public HMLexiconBuilder(List<String> posBootStrap, List<String> negBootStrap) {
        for (String s : posBootStrap) {
            this.addToLexicon(this.positiveLexicon, this.newPos, s);
        }
        for (String s : negBootStrap) {
            this.addToLexicon(this.negativeLexicon, this.newNeg, s);
        }
    }

    private void addToLexicon(Set<String> compSet, List<String> q, String token) {
        if (compSet.add(token)) {
            q.add(token);
        }
    }

    public void buildFromCorpus(List<String> corpus) {
        this.corpus = corpus;
        this.process();
    }

    private void process() {
        while (!this.newPos.isEmpty()) {
            this.processNewLexTokens(this.positiveLexicon, this.newPos, this.negativeLexicon, this.newNeg);
        }
        while (!this.newNeg.isEmpty()) {
            this.processNewLexTokens(this.negativeLexicon, this.newNeg, this.positiveLexicon, this.newPos);
        }
        if (!this.newPos.isEmpty() && this.newNeg.isEmpty()) {
            this.process();
        }
    }

    private void processNewLexTokens(Set<String> lexicon, List<String> q, Set<String> anti_lexicon, List<String> anti_q) {
        AhoCorasick tri = new AhoCorasick();
        for (String string : q) {
            String syno = string + " and";
            String anti = string + " but";
            tri.add(syno.getBytes(), (Object)syno);
            tri.add(anti.getBytes(), (Object)anti);
        }
        tri.prepare();
        q.clear();
        for (String doc : this.corpus) {
            String lcdoc = doc.toLowerCase();
            Iterator result = tri.search(lcdoc.getBytes());
            ArrayList<String> hits = new ArrayList<String>();
            while (result.hasNext()) {
                SearchResult sr = (SearchResult)result.next();
                for (String s : sr.getOutputs()) {
                    hits.add(s);
                }
            }
            for (String hit : hits) {
                int tokeniseFrom = lcdoc.indexOf(hit) + hit.length();
                List<String> tokens = this.tokenise(lcdoc.substring(tokeniseFrom));
                Iterator<String> it = tokens.iterator();
                String newLex = null;
                boolean anti = false;
                if (it.hasNext()) {
                    String first = it.next();
                    if (first.equals("not")) {
                        anti = true;
                        if (it.hasNext()) {
                            newLex = it.next();
                        }
                    } else {
                        newLex = first;
                    }
                }
                if (hit.endsWith("but")) {
                    boolean bl = anti = !anti;
                }
                if (newLex == null) continue;
                if (!anti) {
                    this.addToLexicon(lexicon, q, newLex);
                    continue;
                }
                this.addToLexicon(anti_lexicon, anti_q, newLex);
            }
        }
    }

    private List<String> tokenise(String text) {
        RawTextAnnotation rta = new RawTextAnnotation(text);
        try {
            this.tokA.annotate(rta);
            return AnnotationUtils.getStringTokensFromTokenAnnotationList(rta.getAnnotationsFor(TokenAnnotation.class));
        }
        catch (MissingRequiredAnnotationException e) {
            e.printStackTrace();
            return null;
        }
    }
}

