/*
 * Decompiled with CFR 0.152.
 */
package org.deeplearning4j.models.word2vec.wordstore;

import java.beans.ConstructorProperties;
import java.util.ArrayList;
import java.util.List;
import lombok.NonNull;
import org.deeplearning4j.models.word2vec.wordstore.VocabCache;
import org.deeplearning4j.models.word2vec.wordstore.VocabularyHolder;
import org.deeplearning4j.models.word2vec.wordstore.inmemory.InMemoryLookupCache;
import org.deeplearning4j.text.sentenceiterator.SentenceIterator;
import org.deeplearning4j.text.tokenization.tokenizer.Tokenizer;
import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class VocabConstructor {
    private List<VocabSource> sources = new ArrayList<VocabSource>();
    private TokenizerFactory tokenizerFactory;
    private VocabCache cache;
    private List<String> stopWords;
    private boolean useAdaGrad = false;
    protected static final Logger log = LoggerFactory.getLogger(VocabConstructor.class);

    private VocabConstructor() {
    }

    public VocabCache buildJointVocabulary(boolean resetCounters, boolean buildHuffmanTree) {
        if (resetCounters && buildHuffmanTree) {
            throw new IllegalStateException("You can't reset counters and build Huffman tree at the same time!");
        }
        if (this.cache == null) {
            this.cache = new InMemoryLookupCache(false);
        }
        VocabularyHolder topHolder = new VocabularyHolder.Builder().externalCache(this.cache).minWordFrequency(0).build();
        for (VocabSource source : this.sources) {
            SentenceIterator iterator = source.getIterator();
            iterator.reset();
            VocabularyHolder tempHolder = new VocabularyHolder.Builder().minWordFrequency(source.getMinWordFrequency()).build();
            while (iterator.hasNext()) {
                String line = iterator.nextSentence();
                Tokenizer tokenizer = this.tokenizerFactory.create(line);
                List<String> tokens = tokenizer.getTokens();
                for (String token : tokens) {
                    if (this.stopWords != null && this.stopWords.contains(token) || token == null || token.isEmpty()) continue;
                    if (!tempHolder.containsWord(token)) {
                        tempHolder.addWord(token);
                        continue;
                    }
                    tempHolder.incrementWordCounter(token);
                }
            }
            log.info("Vocab size before truncation: " + tempHolder.numWords());
            tempHolder.truncateVocabulary();
            log.info("Vocab size after truncation: " + tempHolder.numWords());
            topHolder.consumeVocabulary(tempHolder);
        }
        if (resetCounters) {
            topHolder.resetWordCounters();
        }
        if (buildHuffmanTree) {
            topHolder.updateHuffmanCodes();
        }
        topHolder.transferBackToVocabCache(this.cache);
        return this.cache;
    }

    private static class VocabSource {
        @NonNull
        private SentenceIterator iterator;
        @NonNull
        private int minWordFrequency;

        @ConstructorProperties(value={"iterator", "minWordFrequency"})
        public VocabSource(@NonNull SentenceIterator iterator, @NonNull int minWordFrequency) {
            if (iterator == null) {
                throw new NullPointerException("iterator");
            }
            this.iterator = iterator;
            this.minWordFrequency = minWordFrequency;
        }

        @NonNull
        public SentenceIterator getIterator() {
            return this.iterator;
        }

        @NonNull
        public int getMinWordFrequency() {
            return this.minWordFrequency;
        }

        public void setIterator(@NonNull SentenceIterator iterator) {
            if (iterator == null) {
                throw new NullPointerException("iterator");
            }
            this.iterator = iterator;
        }

        public void setMinWordFrequency(@NonNull int minWordFrequency) {
            this.minWordFrequency = minWordFrequency;
        }

        public boolean equals(Object o) {
            if (o == this) {
                return true;
            }
            if (!(o instanceof VocabSource)) {
                return false;
            }
            VocabSource other = (VocabSource)o;
            if (!other.canEqual(this)) {
                return false;
            }
            SentenceIterator this$iterator = this.getIterator();
            SentenceIterator other$iterator = other.getIterator();
            if (this$iterator == null ? other$iterator != null : !this$iterator.equals(other$iterator)) {
                return false;
            }
            return this.getMinWordFrequency() == other.getMinWordFrequency();
        }

        protected boolean canEqual(Object other) {
            return other instanceof VocabSource;
        }

        public int hashCode() {
            int PRIME = 59;
            int result = 1;
            SentenceIterator $iterator = this.getIterator();
            result = result * 59 + ($iterator == null ? 0 : $iterator.hashCode());
            result = result * 59 + this.getMinWordFrequency();
            return result;
        }

        public String toString() {
            return "VocabConstructor.VocabSource(iterator=" + this.getIterator() + ", minWordFrequency=" + this.getMinWordFrequency() + ")";
        }
    }

    public static class Builder {
        private List<VocabSource> sources = new ArrayList<VocabSource>();
        private TokenizerFactory tokenizerFactory;
        private VocabCache cache;
        private List<String> stopWords = new ArrayList<String>();
        private boolean useAdaGrad = false;

        public Builder useAdaGrad(boolean useAdaGrad) {
            this.useAdaGrad = useAdaGrad;
            return this;
        }

        public Builder setTargetVocabCache(@NonNull VocabCache cache) {
            if (cache == null) {
                throw new NullPointerException("cache");
            }
            this.cache = cache;
            return this;
        }

        public Builder addSource(SentenceIterator iterator, int minWordFrequency) {
            this.sources.add(new VocabSource(iterator, minWordFrequency));
            return this;
        }

        public Builder setTokenizerFactory(@NonNull TokenizerFactory factory) {
            if (factory == null) {
                throw new NullPointerException("factory");
            }
            this.tokenizerFactory = factory;
            return this;
        }

        public Builder setStopWords(@NonNull List<String> stopWords) {
            if (stopWords == null) {
                throw new NullPointerException("stopWords");
            }
            this.stopWords = stopWords;
            return this;
        }

        public VocabConstructor build() {
            VocabConstructor constructor = new VocabConstructor();
            constructor.sources = this.sources;
            constructor.tokenizerFactory = this.tokenizerFactory;
            constructor.cache = this.cache;
            constructor.stopWords = this.stopWords;
            constructor.useAdaGrad = this.useAdaGrad;
            return constructor;
        }
    }
}

