/*
 * Decompiled with CFR 0.152.
 */
package org.deeplearning4j.text.tokenization.tokenizerfactory;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.Collections;
import java.util.Map;
import java.util.NavigableMap;
import java.util.TreeMap;
import lombok.NonNull;
import org.deeplearning4j.text.tokenization.tokenizer.BertWordPieceStreamTokenizer;
import org.deeplearning4j.text.tokenization.tokenizer.BertWordPieceTokenizer;
import org.deeplearning4j.text.tokenization.tokenizer.TokenPreProcess;
import org.deeplearning4j.text.tokenization.tokenizer.Tokenizer;
import org.deeplearning4j.text.tokenization.tokenizer.preprocessor.BertWordPiecePreProcessor;
import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory;

public class BertWordPieceTokenizerFactory
implements TokenizerFactory {
    private final NavigableMap<String, Integer> vocab;
    private TokenPreProcess preTokenizePreProcessor;
    private TokenPreProcess tokenPreProcessor;
    private Charset charset;

    public BertWordPieceTokenizerFactory(NavigableMap<String, Integer> vocab, boolean lowerCaseOnly, boolean stripAccents) {
        this(vocab, new BertWordPiecePreProcessor(lowerCaseOnly, stripAccents, vocab));
    }

    public BertWordPieceTokenizerFactory(NavigableMap<String, Integer> vocab, TokenPreProcess preTokenizePreProcessor) {
        this.vocab = vocab;
        this.preTokenizePreProcessor = preTokenizePreProcessor;
    }

    public BertWordPieceTokenizerFactory(File pathToVocab, boolean lowerCaseOnly, boolean stripAccents, @NonNull Charset charset) throws IOException {
        this(BertWordPieceTokenizerFactory.loadVocab(pathToVocab, charset), lowerCaseOnly, stripAccents);
        if (charset == null) {
            throw new NullPointerException("charset is marked non-null but is null");
        }
        this.charset = charset;
    }

    public BertWordPieceTokenizerFactory(InputStream vocabInputStream, boolean lowerCaseOnly, boolean stripAccents, @NonNull Charset charset) throws IOException {
        this(BertWordPieceTokenizerFactory.loadVocab(vocabInputStream, charset), lowerCaseOnly, stripAccents);
        if (charset == null) {
            throw new NullPointerException("charset is marked non-null but is null");
        }
        this.charset = charset;
    }

    @Override
    public Tokenizer create(String toTokenize) {
        BertWordPieceTokenizer t = new BertWordPieceTokenizer(toTokenize, this.vocab, this.preTokenizePreProcessor, this.tokenPreProcessor);
        return t;
    }

    @Override
    public Tokenizer create(InputStream toTokenize) {
        BertWordPieceStreamTokenizer t = new BertWordPieceStreamTokenizer(toTokenize, this.charset, this.vocab, this.preTokenizePreProcessor, this.tokenPreProcessor);
        return t;
    }

    public Map<String, Integer> getVocab() {
        return Collections.unmodifiableMap(this.vocab);
    }

    public static NavigableMap<String, Integer> loadVocab(InputStream is, Charset charset) throws IOException {
        TreeMap<String, Integer> map = new TreeMap<String, Integer>(Collections.reverseOrder());
        try (BufferedReader reader = new BufferedReader(new InputStreamReader(is, charset));){
            String token;
            int i = 0;
            while ((token = reader.readLine()) != null) {
                map.put(token, i++);
            }
        }
        return map;
    }

    public static NavigableMap<String, Integer> loadVocab(File vocabFile, Charset charset) throws IOException {
        return BertWordPieceTokenizerFactory.loadVocab(new FileInputStream(vocabFile), charset);
    }

    public TokenPreProcess getPreTokenizePreProcessor() {
        return this.preTokenizePreProcessor;
    }

    public void setPreTokenizePreProcessor(TokenPreProcess preTokenizePreProcessor) {
        this.preTokenizePreProcessor = preTokenizePreProcessor;
    }

    @Override
    public TokenPreProcess getTokenPreProcessor() {
        return this.tokenPreProcessor;
    }

    @Override
    public void setTokenPreProcessor(TokenPreProcess tokenPreProcessor) {
        this.tokenPreProcessor = tokenPreProcessor;
    }
}

