/*
 * Decompiled with CFR 0.152.
 */
package org.deeplearning4j.models.word2vec.actor;

import akka.actor.UntypedActor;
import java.io.Closeable;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.compress.utils.IOUtils;
import org.deeplearning4j.models.word2vec.StreamWork;
import org.deeplearning4j.models.word2vec.VocabWord;
import org.deeplearning4j.models.word2vec.VocabWork;
import org.deeplearning4j.models.word2vec.wordstore.VocabCache;
import org.deeplearning4j.text.invertedindex.InvertedIndex;
import org.deeplearning4j.text.movingwindow.Util;
import org.deeplearning4j.text.tokenization.tokenizer.Tokenizer;
import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory;

public class VocabActor
extends UntypedActor {
    private transient TokenizerFactory tokenizer;
    private List<String> stopWords;
    private AtomicLong lastUpdate;
    private VocabCache cache;
    private int minWordFrequency;
    private AtomicInteger numWordsEncountered;
    private InvertedIndex index;

    public VocabActor(TokenizerFactory tokenizer, VocabCache cache, List<String> stopWords, AtomicLong lastUpdate, int minWordFrequency, AtomicInteger numWordsEncountered, InvertedIndex index) {
        this.tokenizer = tokenizer;
        this.stopWords = stopWords;
        this.lastUpdate = lastUpdate;
        this.cache = cache;
        this.minWordFrequency = minWordFrequency;
        this.numWordsEncountered = numWordsEncountered;
        this.index = index;
    }

    public void onReceive(Object message) throws Exception {
        HashSet<String> encountered = new HashSet<String>();
        if (message instanceof VocabWork) {
            ArrayList<VocabWord> document = new ArrayList<VocabWord>();
            VocabWork work = (VocabWork)message;
            if (work.getWork() == null || work.getWork().isEmpty()) {
                return;
            }
            String sentence = work.getWork();
            if (sentence.isEmpty() || sentence.length() <= 2) {
                work.countDown();
                return;
            }
            Tokenizer t = this.tokenizer.create(sentence);
            while (t.hasMoreTokens()) {
                String token = t.nextToken();
                this.processToken(token, encountered, document);
            }
            this.index.addWordsToDoc(this.index.numDocuments(), document);
            this.numWordsEncountered.set(this.numWordsEncountered.get() + document.size());
            work.countDown();
            this.lastUpdate.getAndSet(System.currentTimeMillis());
        } else if (message instanceof StreamWork) {
            String token;
            StreamWork work = (StreamWork)message;
            ArrayList<VocabWord> document = new ArrayList<VocabWord>();
            InputStream is = work.getIs();
            if (is == null) {
                return;
            }
            boolean tryRead = false;
            try {
                if (is.available() > 0) {
                    tryRead = true;
                }
            }
            catch (Exception e) {
                tryRead = false;
            }
            if (!tryRead) {
                return;
            }
            Tokenizer t = this.tokenizer.create(is);
            while (t.hasMoreTokens() && (token = t.nextToken()) != null && !token.isEmpty()) {
                this.processToken(token, encountered, document);
            }
            this.index.addWordsToDoc(this.index.numDocuments(), document);
            this.numWordsEncountered.set(this.numWordsEncountered.get() + document.size());
            IOUtils.closeQuietly((Closeable)is);
            work.countDown();
            this.lastUpdate.getAndSet(System.currentTimeMillis());
        } else {
            this.unhandled(message);
        }
    }

    protected void processToken(String token, Set<String> encountered, List<VocabWord> words) {
        if (this.stopWords.contains(token)) {
            token = "STOP";
        }
        this.cache.incrementWordCount(token);
        if (!encountered.contains(token)) {
            this.cache.incrementDocCount(token, 1);
            encountered.add(token);
        }
        VocabWord token2 = null;
        if (this.cache.hasToken(token)) {
            token2 = this.cache.tokenFor(token);
        } else {
            token2 = new VocabWord(1.0, token);
            this.cache.addToken(token2);
        }
        words.add(token2);
        if (!Util.matchesAnyStopWord(this.stopWords, token) && token != null && !token.isEmpty()) {
            if (!this.cache.containsWord(token) && this.cache.wordFrequency(token) >= this.minWordFrequency) {
                int idx = this.cache.numWords();
                token2.setIndex(idx);
                this.cache.putVocabWord(token);
            } else if (Util.matchesAnyStopWord(this.stopWords, token) && token != null && !token.isEmpty() && !this.cache.containsWord(token = "STOP") && this.cache.wordFrequency(token) >= this.minWordFrequency) {
                int idx = this.cache.numWords();
                token2.setIndex(idx);
                this.cache.putVocabWord(token);
            }
        }
    }
}

