/*
 * Decompiled with CFR 0.152.
 */
package org.deeplearning4j.bagofwords.vectorizer;

import akka.actor.ActorRef;
import akka.actor.ActorSystem;
import akka.actor.Props;
import akka.routing.RoundRobinPool;
import java.io.File;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import org.deeplearning4j.bagofwords.vectorizer.DefaultInputStreamCreator;
import org.deeplearning4j.bagofwords.vectorizer.TextVectorizer;
import org.deeplearning4j.models.word2vec.StreamWork;
import org.deeplearning4j.models.word2vec.VocabWork;
import org.deeplearning4j.models.word2vec.actor.VocabActor;
import org.deeplearning4j.models.word2vec.wordstore.VocabCache;
import org.deeplearning4j.text.documentiterator.DocumentIterator;
import org.deeplearning4j.text.invertedindex.InvertedIndex;
import org.deeplearning4j.text.invertedindex.LuceneInvertedIndex;
import org.deeplearning4j.text.sentenceiterator.SentenceIterator;
import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public abstract class BaseTextVectorizer
implements TextVectorizer {
    protected transient VocabCache cache;
    protected static ActorSystem trainingSystem;
    protected transient TokenizerFactory tokenizerFactory;
    protected List<String> stopWords;
    private int layerSize = 0;
    protected int minWordFrequency = 5;
    protected transient DocumentIterator docIter;
    protected List<String> labels;
    protected transient SentenceIterator sentenceIterator;
    protected AtomicLong numWordsEncountered = new AtomicLong(0L);
    private static Logger log;
    protected InvertedIndex index;
    protected int batchSize = 1000;
    protected double sample = 0.0;
    protected boolean stem = false;

    public BaseTextVectorizer() {
    }

    protected BaseTextVectorizer(VocabCache cache, TokenizerFactory tokenizerFactory, List<String> stopWords, int layerSize, int minWordFrequency, DocumentIterator docIter, SentenceIterator sentenceIterator, List<String> labels, InvertedIndex index, int batchSize, double sample, boolean stem) {
        this.cache = cache;
        this.tokenizerFactory = tokenizerFactory;
        this.stopWords = stopWords;
        this.layerSize = layerSize;
        this.minWordFrequency = minWordFrequency;
        this.docIter = docIter;
        this.sentenceIterator = sentenceIterator;
        this.labels = labels;
        this.index = index;
        this.batchSize = batchSize;
        this.sample = sample;
        this.stem = stem;
        if (index == null) {
            this.index = new LuceneInvertedIndex.Builder().batchSize(batchSize).indexDir(new File("word2vec-index")).sample(sample).cache(cache).build();
        }
    }

    @Override
    public int batchSize() {
        return this.batchSize;
    }

    @Override
    public double sample() {
        return this.sample;
    }

    @Override
    public void fit() {
        String sentence;
        if (trainingSystem == null) {
            trainingSystem = ActorSystem.create();
        }
        AtomicLong semaphore = new AtomicLong(System.currentTimeMillis());
        AtomicInteger queued = new AtomicInteger(0);
        ActorRef vocabActor = trainingSystem.actorOf(new RoundRobinPool(Runtime.getRuntime().availableProcessors()).props(Props.create(VocabActor.class, (Object[])new Object[]{this.tokenizerFactory, this.cache, this.stopWords, semaphore, this.minWordFrequency, this.numWordsEncountered, this.index})));
        AtomicInteger latch = new AtomicInteger(0);
        while (this.docIter != null && this.docIter.hasNext()) {
            vocabActor.tell((Object)new StreamWork(new DefaultInputStreamCreator(this.docIter), latch), vocabActor);
            queued.incrementAndGet();
            if (queued.get() % 10000 != 0) continue;
            log.info("Sent " + queued);
            try {
                Thread.sleep(1L);
            }
            catch (InterruptedException e) {
                Thread.currentThread().interrupt();
            }
        }
        while (this.getSentenceIterator() != null && this.getSentenceIterator().hasNext() && (sentence = this.getSentenceIterator().nextSentence()) != null) {
            vocabActor.tell((Object)new VocabWork(latch, sentence, this.stem), vocabActor);
            queued.incrementAndGet();
            if (queued.get() % 10000 != 0) continue;
            log.info("Sent " + queued);
            try {
                Thread.sleep(1L);
            }
            catch (InterruptedException e) {
                Thread.currentThread().interrupt();
            }
        }
        while (latch.get() < queued.get()) {
            try {
                Thread.sleep(10000L);
                log.info("latch count " + latch.get() + " with queued " + queued.get());
            }
            catch (InterruptedException e) {
                Thread.currentThread().interrupt();
            }
        }
        log.info("Invoking finish on index");
        this.index.finish();
        trainingSystem.shutdown();
    }

    @Override
    public VocabCache vocab() {
        return this.cache;
    }

    public SentenceIterator getSentenceIterator() {
        return this.sentenceIterator;
    }

    public void setSentenceIterator(SentenceIterator sentenceIterator) {
        this.sentenceIterator = sentenceIterator;
    }

    public DocumentIterator getDocIter() {
        return this.docIter;
    }

    public void setDocIter(DocumentIterator docIter) {
        this.docIter = docIter;
    }

    public int getMinWordFrequency() {
        return this.minWordFrequency;
    }

    public void setMinWordFrequency(int minWordFrequency) {
        this.minWordFrequency = minWordFrequency;
    }

    public int getLayerSize() {
        return this.layerSize;
    }

    public void setLayerSize(int layerSize) {
        this.layerSize = layerSize;
    }

    public List<String> getStopWords() {
        return this.stopWords;
    }

    public void setStopWords(List<String> stopWords) {
        this.stopWords = stopWords;
    }

    public TokenizerFactory getTokenizerFactory() {
        return this.tokenizerFactory;
    }

    public void setTokenizerFactory(TokenizerFactory tokenizerFactory) {
        this.tokenizerFactory = tokenizerFactory;
    }

    public VocabCache getCache() {
        return this.cache;
    }

    public void setCache(VocabCache cache) {
        this.cache = cache;
    }

    @Override
    public long numWordsEncountered() {
        return this.numWordsEncountered.get();
    }

    @Override
    public InvertedIndex index() {
        return this.index;
    }

    static {
        log = LoggerFactory.getLogger(BaseTextVectorizer.class);
    }
}

