/*
 * Decompiled with CFR 0.152.
 */
package org.deeplearning4j.models.word2vec;

import com.google.common.base.Function;
import com.google.common.util.concurrent.AtomicDouble;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.ConcurrentLinkedDeque;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.RejectedExecutionHandler;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.math3.random.RandomGenerator;
import org.deeplearning4j.bagofwords.vectorizer.TextVectorizer;
import org.deeplearning4j.bagofwords.vectorizer.TfidfVectorizer;
import org.deeplearning4j.berkeley.Counter;
import org.deeplearning4j.models.word2vec.Huffman;
import org.deeplearning4j.models.word2vec.VocabWord;
import org.deeplearning4j.models.word2vec.wordstore.VocabCache;
import org.deeplearning4j.models.word2vec.wordstore.inmemory.InMemoryLookupCache;
import org.deeplearning4j.nn.api.Persistable;
import org.deeplearning4j.text.documentiterator.DocumentIterator;
import org.deeplearning4j.text.sentenceiterator.SentenceIterator;
import org.deeplearning4j.text.stopwords.StopWords;
import org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory;
import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory;
import org.deeplearning4j.text.tokenization.tokenizerfactory.UimaTokenizerFactory;
import org.deeplearning4j.util.MathUtils;
import org.deeplearning4j.util.SetUtils;
import org.eclipse.jetty.util.ConcurrentHashSet;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.ops.transforms.Transforms;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class Word2Vec
implements Persistable {
    protected static final long serialVersionUID = -2367495638286018038L;
    protected transient TokenizerFactory tokenizerFactory = new DefaultTokenizerFactory();
    protected transient SentenceIterator sentenceIter;
    protected transient DocumentIterator docIter;
    protected transient VocabCache cache;
    protected int batchSize = 1000;
    protected int topNSize = 40;
    protected double sample = 0.0;
    protected long totalWords = 1L;
    protected AtomicInteger rateOfChange = new AtomicInteger(0);
    protected AtomicDouble alpha = new AtomicDouble(0.025);
    protected int minWordFrequency = 5;
    protected int window = 5;
    protected int layerSize = 50;
    protected transient RandomGenerator g;
    protected static Logger log = LoggerFactory.getLogger(Word2Vec.class);
    protected List<String> stopWords;
    protected boolean shouldReset = true;
    protected int numIterations = 1;
    public static final String UNK = "UNK";
    protected long seed = 123L;
    protected boolean saveVocab = false;
    protected double minLearningRate = 0.01;
    protected TextVectorizer vectorizer;
    protected int learningRateDecayWords = 10000;
    protected boolean useAdaGrad = false;
    protected int workers = Runtime.getRuntime().availableProcessors();
    protected Queue<List<List<VocabWord>>> jobQueue = new LinkedBlockingDeque<List<List<VocabWord>>>(10000);

    public Map<String, Double> accuracy(List<String> questions) {
        HashMap<String, Double> accuracy = new HashMap<String, Double>();
        Counter right = new Counter();
        for (String s : questions) {
            if (s.startsWith(":")) {
                double correct = right.getCount((Object)"correct");
                double wrong = right.getCount((Object)"wrong");
                double accuracyRet = 100.0 * correct / (correct / wrong);
                accuracy.put(s, accuracyRet);
                right.clear();
                continue;
            }
            String[] split = s.split(" ");
            String word = split[0];
            List<String> positive = Arrays.asList(word);
            String predicted = split[3];
            List<String> negative = Arrays.asList(split[1], split[2]);
            String w = this.wordsNearest(positive, negative, 1).iterator().next();
            if (predicted.equals(w)) {
                right.incrementCount((Object)"right", 1.0);
                continue;
            }
            right.incrementCount((Object)"wrong", 1.0);
        }
        return accuracy;
    }

    public List<String> similarWordsInVocabTo(String word, double accuracy) {
        ArrayList<String> ret = new ArrayList<String>();
        for (String s : this.cache.words()) {
            String[] stringArray = new String[]{word, s};
            if (!(MathUtils.stringSimilarity((String[])stringArray) >= accuracy)) continue;
            ret.add(s);
        }
        return ret;
    }

    public int indexOf(String word) {
        return this.cache.indexOf(word);
    }

    public double[] getWordVector(String word) {
        int i = this.cache.indexOf(word);
        if (i < 0) {
            return this.cache.vector(UNK).ravel().data().asDouble();
        }
        return this.cache.vector(word).ravel().data().asDouble();
    }

    public INDArray getWordVectorMatrix(String word) {
        int i = this.cache.indexOf(word);
        if (i < 0) {
            return this.cache.vector(UNK);
        }
        return this.cache.vector(word);
    }

    public INDArray getWordVectorMatrixNormalized(String word) {
        int i = this.cache.indexOf(word);
        if (i < 0) {
            return this.cache.vector(UNK);
        }
        INDArray r = this.cache.vector(word);
        return r.div((Number)Nd4j.getBlasWrapper().nrm2(r));
    }

    public Collection<String> wordsNearestSum(List<String> positive, List<String> negative, int top) {
        INDArray words = Nd4j.create((int)this.layerSize);
        Set union = SetUtils.union(new HashSet<String>(positive), new HashSet<String>(negative));
        for (String s : positive) {
            words.addi(this.cache.vector(s));
        }
        for (String s : negative) {
            words.addi(this.cache.vector(s).mul((Number)-1));
        }
        if (this.cache instanceof InMemoryLookupCache) {
            InMemoryLookupCache l = (InMemoryLookupCache)this.cache;
            INDArray syn0 = l.getSyn0();
            INDArray weights = syn0.norm2(0).rdivi((Number)1).muli(words);
            INDArray distances = syn0.mulRowVector(weights).sum(1);
            INDArray[] sorted = Nd4j.sortWithIndices((INDArray)distances, (int)0, (boolean)false);
            INDArray sort = sorted[0];
            ArrayList<String> ret = new ArrayList<String>();
            if (top > sort.length()) {
                top = sort.length();
            }
            int end = top + 1;
            for (int i = 0; i < end; ++i) {
                String word = this.cache.wordAtIndex(sort.getInt(new int[]{i}));
                if (union.contains(word)) {
                    if (++end < sort.length()) continue;
                    break;
                }
                ret.add(this.cache.wordAtIndex(sort.getInt(new int[]{i})));
            }
            return ret;
        }
        Counter distances = new Counter();
        for (String s : this.cache.words()) {
            INDArray otherVec = this.getWordVectorMatrix(s);
            double sim = Transforms.cosineSim((INDArray)words, (INDArray)otherVec);
            distances.incrementCount((Object)s, sim);
        }
        distances.keepTopNKeys(top);
        return distances.keySet();
    }

    public Collection<String> wordsNearestSum(String word, int n) {
        INDArray vec = Transforms.unitVec((INDArray)this.getWordVectorMatrix(word));
        if (this.cache instanceof InMemoryLookupCache) {
            InMemoryLookupCache l = (InMemoryLookupCache)this.cache;
            INDArray syn0 = l.getSyn0();
            INDArray weights = syn0.norm2(0).rdivi((Number)1).muli(vec);
            INDArray distances = syn0.mulRowVector(weights).sum(1);
            INDArray[] sorted = Nd4j.sortWithIndices((INDArray)distances, (int)0, (boolean)false);
            INDArray sort = sorted[0];
            ArrayList<String> ret = new ArrayList<String>();
            VocabWord word2 = this.cache.wordFor(word);
            if (n > sort.length()) {
                n = sort.length();
            }
            for (int i = 0; i < n + 1; ++i) {
                if (sort.getInt(new int[]{i}) == word2.getIndex()) continue;
                ret.add(this.cache.wordAtIndex(sort.getInt(new int[]{i})));
            }
            return ret;
        }
        if (vec == null) {
            return new ArrayList<String>();
        }
        Counter distances = new Counter();
        for (String s : this.cache.words()) {
            if (s.equals(word)) continue;
            INDArray otherVec = this.getWordVectorMatrix(s);
            double sim = Transforms.cosineSim((INDArray)vec, (INDArray)otherVec);
            distances.incrementCount((Object)s, sim);
        }
        distances.keepTopNKeys(n);
        return distances.keySet();
    }

    public Collection<String> wordsNearest(List<String> positive, List<String> negative, int top) {
        INDArray words = Nd4j.create((int)(positive.size() + negative.size()), (int)this.layerSize);
        int row = 0;
        Set union = SetUtils.union(new HashSet<String>(positive), new HashSet<String>(negative));
        for (String s : positive) {
            words.putRow(row++, this.cache.vector(s));
        }
        for (String s : negative) {
            words.putRow(row++, this.cache.vector(s).mul((Number)-1));
        }
        INDArray mean = words.mean(0);
        if (this.cache instanceof InMemoryLookupCache) {
            InMemoryLookupCache l = (InMemoryLookupCache)this.cache;
            INDArray syn0 = l.getSyn0();
            INDArray weights = syn0.norm2(0).rdivi((Number)1).muli(mean);
            INDArray distances = syn0.mulRowVector(weights).sum(1);
            INDArray[] sorted = Nd4j.sortWithIndices((INDArray)distances, (int)0, (boolean)false);
            INDArray sort = sorted[0];
            ArrayList<String> ret = new ArrayList<String>();
            if (top > sort.length()) {
                top = sort.length();
            }
            int end = top + 1;
            for (int i = 0; i < end; ++i) {
                String word = this.cache.wordAtIndex(sort.getInt(new int[]{i}));
                if (union.contains(word)) {
                    if (++end < sort.length()) continue;
                    break;
                }
                ret.add(this.cache.wordAtIndex(sort.getInt(new int[]{i})));
            }
            return ret;
        }
        Counter distances = new Counter();
        for (String s : this.cache.words()) {
            INDArray otherVec = this.getWordVectorMatrix(s);
            double sim = Transforms.cosineSim((INDArray)mean, (INDArray)otherVec);
            distances.incrementCount((Object)s, sim);
        }
        distances.keepTopNKeys(top);
        return distances.keySet();
    }

    public Collection<String> wordsNearest(String word, int n) {
        INDArray vec = Transforms.unitVec((INDArray)this.getWordVectorMatrix(word));
        if (this.cache instanceof InMemoryLookupCache) {
            InMemoryLookupCache l = (InMemoryLookupCache)this.cache;
            INDArray syn0 = l.getSyn0();
            INDArray weights = syn0.norm2(0).rdivi((Number)1).muli(vec);
            INDArray distances = syn0.mulRowVector(weights).sum(1);
            INDArray[] sorted = Nd4j.sortWithIndices((INDArray)distances, (int)0, (boolean)false);
            INDArray sort = sorted[0];
            ArrayList<String> ret = new ArrayList<String>();
            VocabWord word2 = this.cache.wordFor(word);
            if (n > sort.length()) {
                n = sort.length();
            }
            for (int i = 0; i < n + 1; ++i) {
                if (sort.getInt(new int[]{i}) == word2.getIndex()) continue;
                ret.add(this.cache.wordAtIndex(sort.getInt(new int[]{i})));
            }
            return ret;
        }
        if (vec == null) {
            return new ArrayList<String>();
        }
        Counter distances = new Counter();
        for (String s : this.cache.words()) {
            if (s.equals(word)) continue;
            INDArray otherVec = this.getWordVectorMatrix(s);
            double sim = Transforms.cosineSim((INDArray)vec, (INDArray)otherVec);
            distances.incrementCount((Object)s, sim);
        }
        distances.keepTopNKeys(n);
        return distances.keySet();
    }

    public boolean hasWord(String word) {
        return this.cache.indexOf(word) >= 0;
    }

    public void fit() throws IOException {
        boolean loaded = this.buildVocab();
        if (!loaded && this.saveVocab) {
            this.cache.saveVocab();
        }
        if (this.stopWords == null) {
            this.readStopWords();
        }
        log.info("Training word2vec multithreaded");
        if (this.sentenceIter != null) {
            this.sentenceIter.reset();
        }
        if (this.docIter != null) {
            this.docIter.reset();
        }
        int[] docs = this.vectorizer.index().allDocs();
        this.totalWords = this.vectorizer.numWordsEncountered();
        this.totalWords *= (long)this.numIterations;
        log.info("Processing sentences...");
        ArrayList<Thread> work = new ArrayList<Thread>();
        final AtomicInteger processed = new AtomicInteger(0);
        final int allDocs = docs.length * this.numIterations;
        AtomicLong numWordsSoFar = new AtomicLong(0L);
        AtomicLong lastReport = new AtomicLong(0L);
        for (int i = 0; i < this.workers; ++i) {
            ConcurrentHashSet set = new ConcurrentHashSet();
            Thread t = new Thread(new Runnable((Set)set, numWordsSoFar, lastReport){
                final /* synthetic */ Set val$set;
                final /* synthetic */ AtomicLong val$numWordsSoFar;
                final /* synthetic */ AtomicLong val$lastReport;
                {
                    this.val$set = set;
                    this.val$numWordsSoFar = atomicLong;
                    this.val$lastReport = atomicLong2;
                }

                @Override
                public void run() {
                    AtomicLong nextRandom = new AtomicLong(5L);
                    long checked = 0L;
                    while (checked <= 0L || checked % 1000L != 0L || processed.get() < allDocs) {
                        ++checked;
                        List<List<VocabWord>> job = Word2Vec.this.jobQueue.poll();
                        if (job == null || job.isEmpty() || this.val$set.contains(job)) continue;
                        log.info("Job of " + job.size());
                        double alpha = Math.max(Word2Vec.this.minLearningRate, Word2Vec.this.alpha.get() * (1.0 - 1.0 * (double)this.val$numWordsSoFar.get() / (double)Word2Vec.this.totalWords));
                        long diff = Math.abs(this.val$lastReport.get() - this.val$numWordsSoFar.get());
                        if (this.val$numWordsSoFar.get() > 0L && diff >= 10000L) {
                            log.info("Words so far " + this.val$numWordsSoFar.get() + " with alpha at " + alpha);
                            this.val$lastReport.set(this.val$numWordsSoFar.get());
                        }
                        long increment = 0L;
                        double diff2 = 0.0;
                        for (List<VocabWord> sentence : job) {
                            Word2Vec.this.trainSentence(sentence, nextRandom, alpha);
                            increment += (long)sentence.size();
                        }
                        log.info("Train sentence avg took " + diff2 / (double)job.size());
                        this.val$numWordsSoFar.set(this.val$numWordsSoFar.get() + increment);
                        processed.incrementAndGet();
                    }
                    return;
                }
            });
            t.setName("worker" + i);
            t.start();
            work.add(t);
        }
        final AtomicLong nextRandom = new AtomicLong(5L);
        final AtomicInteger doc = new AtomicInteger(0);
        int numDocs = this.vectorizer.index().numDocuments() * this.numIterations;
        ThreadPoolExecutor exec = new ThreadPoolExecutor(Runtime.getRuntime().availableProcessors(), Runtime.getRuntime().availableProcessors(), 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>(), new RejectedExecutionHandler(){

            @Override
            public void rejectedExecution(Runnable r, ThreadPoolExecutor executor) {
                try {
                    Thread.sleep(1000L);
                }
                catch (InterruptedException e) {
                    Thread.currentThread().interrupt();
                }
                executor.submit(r);
            }
        });
        final ConcurrentLinkedDeque batch2 = new ConcurrentLinkedDeque();
        this.vectorizer.index().eachDoc(new Function<List<VocabWord>, Void>(){

            public Void apply(List<VocabWord> input) {
                ArrayList<VocabWord> batch = new ArrayList<VocabWord>();
                Word2Vec.this.addWords(input, nextRandom, batch);
                if (batch.isEmpty()) {
                    return null;
                }
                if (batch2.size() >= 100) {
                    boolean added = false;
                    while (!added) {
                        try {
                            Word2Vec.this.jobQueue.add(new LinkedList(batch2));
                            batch2.clear();
                            added = true;
                        }
                        catch (Exception e) {}
                    }
                }
                for (int i = 0; i < Word2Vec.this.numIterations; ++i) {
                    batch2.add(batch);
                }
                doc.incrementAndGet();
                if (doc.get() > 0 && doc.get() % 10000 == 0) {
                    log.info("Doc " + doc.get() + " done so far");
                }
                return null;
            }
        }, exec);
        if (!batch2.isEmpty()) {
            this.jobQueue.add(new LinkedList(batch2));
        }
        exec.shutdown();
        try {
            exec.awaitTermination(1L, TimeUnit.DAYS);
        }
        catch (InterruptedException e) {
            Thread.currentThread().interrupt();
        }
        for (Thread t : work) {
            try {
                t.join();
            }
            catch (InterruptedException e) {
                Thread.currentThread().interrupt();
            }
        }
    }

    protected void addWords(List<VocabWord> sentence, AtomicLong nextRandom, List<VocabWord> currMiniBatch) {
        for (VocabWord word : sentence) {
            if (word == null) continue;
            if (this.sample > 0.0) {
                double numDocs = this.vectorizer.index().numDocuments();
                double ran = (Math.sqrt(word.getWordFrequency() / (this.sample * numDocs)) + 1.0) * (this.sample * numDocs) / word.getWordFrequency();
                if (ran < (double)(nextRandom.get() & 0xFFFFL) / 65536.0) continue;
                currMiniBatch.add(word);
                continue;
            }
            currMiniBatch.add(word);
        }
    }

    public void setup() {
        log.info("Building binary tree");
        this.buildBinaryTree();
        log.info("Resetting weights");
        if (this.shouldReset) {
            this.resetWeights();
        }
    }

    public boolean buildVocab() {
        this.readStopWords();
        if (this.cache.vocabExists()) {
            log.info("Loading vocab...");
            this.cache.loadVocab();
            this.cache.resetWeights();
            return true;
        }
        if (this.vectorizer == null) {
            this.vectorizer = new TfidfVectorizer.Builder().cache(this.cache).iterate(this.docIter).iterate(this.sentenceIter).batchSize(this.batchSize).minWords(this.minWordFrequency).stopWords(this.stopWords).tokenize(this.tokenizerFactory).build();
        }
        this.vectorizer.fit();
        this.setup();
        return false;
    }

    public void trainSentence(List<VocabWord> sentence, AtomicLong nextRandom, double alpha) {
        if (sentence == null || sentence.isEmpty()) {
            return;
        }
        for (int i = 0; i < sentence.size(); ++i) {
            nextRandom.set(nextRandom.get() * 25214903917L + 11L);
            this.skipGram(i, sentence, (int)nextRandom.get() % this.window, nextRandom, alpha);
        }
    }

    public void skipGram(int i, List<VocabWord> sentence, int b, AtomicLong nextRandom, double alpha) {
        VocabWord word = sentence.get(i);
        if (word == null || sentence.isEmpty()) {
            return;
        }
        int end = this.window * 2 + 1 - b;
        for (int a = b; a < end; ++a) {
            int c;
            if (a == this.window || (c = i - this.window + a) < 0 || c >= sentence.size()) continue;
            VocabWord lastWord = sentence.get(c);
            this.iterate(word, lastWord, nextRandom, alpha);
        }
    }

    public void iterate(VocabWord w1, VocabWord w2, AtomicLong nextRandom, double alpha) {
        this.cache.iterateSample(w1, w2, nextRandom, alpha);
    }

    protected void buildBinaryTree() {
        log.info("Constructing priority queue");
        Huffman huffman = new Huffman(this.cache.vocabWords());
        huffman.build();
        log.info("Built tree");
    }

    protected void resetWeights() {
        this.cache.resetWeights();
    }

    public double similarity(String word, String word2) {
        if (word.equals(word2)) {
            return 1.0;
        }
        INDArray vector = Transforms.unitVec((INDArray)this.getWordVectorMatrix(word));
        INDArray vector2 = Transforms.unitVec((INDArray)this.getWordVectorMatrix(word2));
        if (vector == null || vector2 == null) {
            return -1.0;
        }
        return Nd4j.getBlasWrapper().dot(vector, vector2);
    }

    protected void readStopWords() {
        if (this.stopWords != null) {
            return;
        }
        this.stopWords = StopWords.getStopWords();
    }

    public void write(OutputStream os) {
        try {
            ObjectOutputStream dos = new ObjectOutputStream(os);
            dos.writeObject(this);
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    public void load(InputStream is) {
        try {
            ObjectInputStream ois = new ObjectInputStream(is);
            Word2Vec vec = (Word2Vec)ois.readObject();
            this.alpha = vec.alpha;
            this.minWordFrequency = vec.minWordFrequency;
            this.sample = vec.sample;
            this.stopWords = vec.stopWords;
            this.topNSize = vec.topNSize;
            this.window = vec.window;
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public void setSentenceIter(SentenceIterator sentenceIter) {
        this.sentenceIter = sentenceIter;
        this.shouldReset = false;
    }

    public void resetWeightsOnSetup() {
        this.shouldReset = true;
    }

    public int getLayerSize() {
        return this.layerSize;
    }

    public void setLayerSize(int layerSize) {
        this.layerSize = layerSize;
    }

    public int getWindow() {
        return this.window;
    }

    public List<String> getStopWords() {
        return this.stopWords;
    }

    public synchronized SentenceIterator getSentenceIter() {
        return this.sentenceIter;
    }

    public TokenizerFactory getTokenizerFactory() {
        return this.tokenizerFactory;
    }

    public void setTokenizerFactory(TokenizerFactory tokenizerFactory) {
        this.tokenizerFactory = tokenizerFactory;
    }

    public VocabCache getCache() {
        return this.cache;
    }

    public void setCache(VocabCache cache) {
        InMemoryLookupCache l;
        this.cache = cache;
        if (cache instanceof InMemoryLookupCache && (l = (InMemoryLookupCache)cache).getSyn0() != null && l.getSyn0().columns() != this.layerSize) {
            this.layerSize = l.getSyn0().columns();
        }
    }

    public static class Builder {
        protected int minWordFrequency = 1;
        protected int layerSize = 50;
        protected SentenceIterator iter;
        protected List<String> stopWords = StopWords.getStopWords();
        protected int window = 5;
        protected TokenizerFactory tokenizerFactory;
        protected VocabCache vocabCache;
        protected DocumentIterator docIter;
        protected double lr = 0.25;
        protected int iterations = 1;
        protected long seed = 123L;
        protected boolean saveVocab = false;
        protected int batchSize = 1000;
        protected int learningRateDecayWords = 10000;
        protected boolean useAdaGrad = false;
        protected TextVectorizer textVectorizer;
        protected double minLearningRate = 0.01;
        protected double negative = 0.0;
        protected double sampling = 1.0E-5;
        protected int workers = Runtime.getRuntime().availableProcessors();

        public Builder workers(int workers) {
            this.workers = workers;
            return this;
        }

        public Builder sampling(double sample) {
            this.sampling = sample;
            return this;
        }

        public Builder negativeSample(double negative) {
            this.negative = negative;
            return this;
        }

        public Builder minLearningRate(double minLearningRate) {
            this.minLearningRate = minLearningRate;
            return this;
        }

        public Builder useAdaGrad(boolean useAdaGrad) {
            this.useAdaGrad = useAdaGrad;
            return this;
        }

        public Builder vectorizer(TextVectorizer textVectorizer) {
            this.textVectorizer = textVectorizer;
            return this;
        }

        public Builder learningRateDecayWords(int learningRateDecayWords) {
            this.learningRateDecayWords = learningRateDecayWords;
            return this;
        }

        public Builder batchSize(int batchSize) {
            this.batchSize = batchSize;
            return this;
        }

        public Builder saveVocab(boolean saveVocab) {
            this.saveVocab = saveVocab;
            return this;
        }

        public Builder seed(long seed) {
            this.seed = seed;
            return this;
        }

        public Builder iterations(int iterations) {
            this.iterations = iterations;
            return this;
        }

        public Builder learningRate(double lr) {
            this.lr = lr;
            return this;
        }

        public Builder iterate(DocumentIterator iter) {
            this.docIter = iter;
            return this;
        }

        public Builder vocabCache(VocabCache cache) {
            this.vocabCache = cache;
            return this;
        }

        public Builder minWordFrequency(int minWordFrequency) {
            this.minWordFrequency = minWordFrequency;
            return this;
        }

        public Builder tokenizerFactory(TokenizerFactory tokenizerFactory) {
            this.tokenizerFactory = tokenizerFactory;
            return this;
        }

        public Builder layerSize(int layerSize) {
            this.layerSize = layerSize;
            return this;
        }

        public Builder stopWords(List<String> stopWords) {
            this.stopWords = stopWords;
            return this;
        }

        public Builder windowSize(int window) {
            this.window = window;
            return this;
        }

        public Builder iterate(SentenceIterator iter) {
            this.iter = iter;
            return this;
        }

        public Word2Vec build() {
            if (this.iter == null) {
                Word2Vec ret = new Word2Vec();
                ret.layerSize = this.layerSize;
                ret.window = this.window;
                ret.alpha.set(this.lr);
                ret.vectorizer = this.textVectorizer;
                ret.stopWords = this.stopWords;
                ret.setCache(this.vocabCache);
                ret.numIterations = this.iterations;
                ret.minWordFrequency = this.minWordFrequency;
                ret.seed = this.seed;
                ret.saveVocab = this.saveVocab;
                ret.batchSize = this.batchSize;
                ret.useAdaGrad = this.useAdaGrad;
                ret.minLearningRate = this.minLearningRate;
                ret.sample = this.sampling;
                ret.workers = this.workers;
                try {
                    if (this.tokenizerFactory == null) {
                        this.tokenizerFactory = new UimaTokenizerFactory();
                    }
                }
                catch (Exception e) {
                    throw new RuntimeException(e);
                }
                if (this.vocabCache == null) {
                    ret.cache = this.vocabCache = new InMemoryLookupCache.Builder().negative(this.negative).useAdaGrad(this.useAdaGrad).lr(this.lr).vectorLength(this.layerSize).build();
                }
                ret.docIter = this.docIter;
                ret.tokenizerFactory = this.tokenizerFactory;
                return ret;
            }
            Word2Vec ret = new Word2Vec();
            ret.alpha.set(this.lr);
            ret.layerSize = this.layerSize;
            ret.sentenceIter = this.iter;
            ret.window = this.window;
            ret.useAdaGrad = this.useAdaGrad;
            ret.minLearningRate = this.minLearningRate;
            ret.vectorizer = this.textVectorizer;
            ret.stopWords = this.stopWords;
            ret.minWordFrequency = this.minWordFrequency;
            ret.setCache(this.vocabCache);
            ret.docIter = this.docIter;
            ret.minWordFrequency = this.minWordFrequency;
            ret.numIterations = this.iterations;
            ret.seed = this.seed;
            ret.numIterations = this.iterations;
            ret.saveVocab = this.saveVocab;
            ret.batchSize = this.batchSize;
            ret.sample = this.sampling;
            ret.workers = this.workers;
            try {
                if (this.tokenizerFactory == null) {
                    this.tokenizerFactory = new UimaTokenizerFactory();
                }
            }
            catch (Exception e) {
                throw new RuntimeException(e);
            }
            if (this.vocabCache == null) {
                ret.cache = this.vocabCache = new InMemoryLookupCache.Builder().negative(this.negative).useAdaGrad(this.useAdaGrad).lr(this.lr).vectorLength(this.layerSize).build();
            }
            ret.tokenizerFactory = this.tokenizerFactory;
            return ret;
        }
    }
}

