/*
 * Decompiled with CFR 0.152.
 */
package ai.djl.huggingface.tokenizers;

import ai.djl.huggingface.tokenizers.Encoding;
import ai.djl.huggingface.tokenizers.PadTokenResolver;
import ai.djl.huggingface.tokenizers.TokenizerConfig;
import ai.djl.huggingface.tokenizers.jni.CharSpan;
import ai.djl.huggingface.tokenizers.jni.LibUtils;
import ai.djl.huggingface.tokenizers.jni.TokenizersLibrary;
import ai.djl.modality.nlp.preprocess.Tokenizer;
import ai.djl.ndarray.NDManager;
import ai.djl.translate.ArgumentsUtil;
import ai.djl.util.Ec2Utils;
import ai.djl.util.NativeResource;
import ai.djl.util.PairList;
import ai.djl.util.Platform;
import ai.djl.util.Utils;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.Normalizer;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Stream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public final class HuggingFaceTokenizer
extends NativeResource<Long>
implements Tokenizer {
    private static final Logger logger = LoggerFactory.getLogger(HuggingFaceTokenizer.class);
    private boolean addSpecialTokens;
    private boolean withOverflowingTokens;
    private Locale doLowerCase;
    private TruncationStrategy truncation = TruncationStrategy.LONGEST_FIRST;
    private PaddingStrategy padding = PaddingStrategy.LONGEST;
    private int maxLength;
    private int stride;
    private int padToMultipleOf;
    private int modelMaxLength;
    private boolean cleanupTokenizationSpaces;
    private boolean stripAccents;
    private boolean addPrefixSpace;

    private HuggingFaceTokenizer(long handle, Map<String, String> options, TokenizerConfig config, PadTokenResolver.PadInfo padInfo) {
        super((Object)handle);
        this.maxLength = TokenizersLibrary.LIB.getMaxLength(handle);
        this.stride = TokenizersLibrary.LIB.getStride(handle);
        this.padToMultipleOf = TokenizersLibrary.LIB.getPadToMultipleOf(handle);
        if (options != null) {
            String val = options.getOrDefault("addSpecialTokens", "true");
            this.addSpecialTokens = Boolean.parseBoolean(val);
            val = options.getOrDefault("withOverflowingTokens", "false");
            this.withOverflowingTokens = Boolean.parseBoolean(val);
            this.modelMaxLength = ArgumentsUtil.intValue(options, (String)"modelMaxLength", (int)512);
            if (options.containsKey("truncation")) {
                this.truncation = TruncationStrategy.fromValue(options.get("truncation"));
            }
            if (options.containsKey("padding")) {
                this.padding = PaddingStrategy.fromValue(options.get("padding"));
            }
            this.maxLength = ArgumentsUtil.intValue(options, (String)"maxLength", (int)this.maxLength);
            this.stride = ArgumentsUtil.intValue(options, (String)"stride", (int)this.stride);
            this.padToMultipleOf = ArgumentsUtil.intValue(options, (String)"padToMultipleOf", (int)this.padToMultipleOf);
            String lowerCase = options.getOrDefault("doLowerCase", "false");
            if ("true".equals(lowerCase)) {
                this.doLowerCase = Locale.getDefault();
            } else if (!"false".equals(lowerCase)) {
                this.doLowerCase = Locale.forLanguageTag(lowerCase);
            }
        } else {
            this.addSpecialTokens = true;
            this.modelMaxLength = 512;
        }
        if (config != null) {
            this.applyConfig(config, options);
        }
        this.updateTruncationAndPadding(padInfo);
    }

    private void applyConfig(TokenizerConfig config, Map<String, String> options) {
        if (options != null && !options.containsKey("modelMaxLength")) {
            this.modelMaxLength = config.getModelMaxLength();
        }
        this.cleanupTokenizationSpaces = config.isCleanUpTokenizationSpaces();
        if (options != null && !options.containsKey("addSpecialTokens")) {
            this.addSpecialTokens = Stream.of(config.getBosToken(), config.getClsToken(), config.getEosToken(), config.getSepToken(), config.getUnkToken(), config.getPadToken()).anyMatch(token -> token != null && !token.isEmpty());
        }
        if (options != null && !options.containsKey("stripAccents") && config.hasExplicitStripAccents()) {
            this.stripAccents = config.isStripAccents();
        }
        if (options != null && !options.containsKey("addPrefixSpace") && config.hasExplicitAddPrefixSpace()) {
            this.addPrefixSpace = config.isAddPrefixSpace();
        }
    }

    public static HuggingFaceTokenizer newInstance(String name) {
        return HuggingFaceTokenizer.newInstance(name, null);
    }

    public static HuggingFaceTokenizer newInstance(String identifier, Map<String, String> options) {
        Ec2Utils.callHome((String)"Huggingface");
        LibUtils.checkStatus();
        String autoToken = Utils.getEnvOrSystemProperty((String)"HF_TOKEN");
        if (options != null) {
            autoToken = options.getOrDefault("hf_token", autoToken);
        }
        long handle = TokenizersLibrary.LIB.createTokenizer(identifier, autoToken);
        return new HuggingFaceTokenizer(handle, options, null, null);
    }

    public static HuggingFaceTokenizer newInstance(Path modelPath) throws IOException {
        return HuggingFaceTokenizer.newInstance(modelPath, null);
    }

    public static HuggingFaceTokenizer newInstance(Path modelPath, Map<String, String> options) throws IOException {
        if (Files.isDirectory(modelPath, new LinkOption[0])) {
            modelPath = modelPath.resolve("tokenizer.json");
        }
        try (InputStream is = Files.newInputStream(modelPath, new OpenOption[0]);){
            HuggingFaceTokenizer huggingFaceTokenizer = HuggingFaceTokenizer.newInstance(is, options);
            return huggingFaceTokenizer;
        }
    }

    public static HuggingFaceTokenizer newInstance(Path modelPath, String configPath, Map<String, String> options) throws IOException {
        if (Files.isDirectory(modelPath, new LinkOption[0])) {
            modelPath = modelPath.resolve("tokenizer.json");
        }
        TokenizerConfig config = TokenizerConfig.load(Paths.get(configPath, new String[0]));
        try (InputStream is = Files.newInputStream(modelPath, new OpenOption[0]);){
            HuggingFaceTokenizer huggingFaceTokenizer = HuggingFaceTokenizer.newInstance(is, options, config);
            return huggingFaceTokenizer;
        }
    }

    public static HuggingFaceTokenizer newInstance(Path vocab, Path merges, Map<String, String> options) throws IOException {
        Ec2Utils.callHome((String)"Huggingface");
        LibUtils.checkStatus();
        String vocabFile = vocab.toAbsolutePath().toString();
        String mergesFile = merges.toAbsolutePath().toString();
        long handle = TokenizersLibrary.LIB.createBpeTokenizer(vocabFile, mergesFile);
        return new HuggingFaceTokenizer(handle, options, null, null);
    }

    public static HuggingFaceTokenizer newInstance(InputStream is, Map<String, String> options) throws IOException {
        Ec2Utils.callHome((String)"Huggingface");
        LibUtils.checkStatus();
        String json = Utils.toString((InputStream)is);
        long handle = TokenizersLibrary.LIB.createTokenizerFromString(json);
        return new HuggingFaceTokenizer(handle, options, null, null);
    }

    public static HuggingFaceTokenizer newInstance(InputStream is, Map<String, String> options, TokenizerConfig config) throws IOException {
        Ec2Utils.callHome((String)"Huggingface");
        LibUtils.checkStatus();
        String json = Utils.toString((InputStream)is);
        PadTokenResolver.PadInfo padInfo = PadTokenResolver.extractPadInfo(json, config);
        long handle = TokenizersLibrary.LIB.createTokenizerFromString(json);
        return new HuggingFaceTokenizer(handle, options, config, padInfo);
    }

    public String getVersion() {
        Platform platform = Platform.detectPlatform((String)"tokenizers");
        return platform.getVersion();
    }

    public List<String> tokenize(String sentence) {
        Encoding encoding = this.encode(sentence);
        return Arrays.asList(encoding.getTokens());
    }

    public String buildSentence(List<String> tokens) {
        return String.join((CharSequence)" ", tokens).replace(" ##", "").trim();
    }

    public void close() {
        Long pointer = this.handle.getAndSet(null);
        if (pointer != null) {
            TokenizersLibrary.LIB.deleteTokenizer(pointer);
        }
    }

    public Encoding encode(String text, boolean addSpecialTokens, boolean withOverflowingTokens) {
        if (text == null) {
            throw new NullPointerException("text cannot be null");
        }
        String processedText = this.prepareForTokenization(text);
        long encoding = TokenizersLibrary.LIB.encode((Long)this.getHandle(), processedText, addSpecialTokens);
        return this.toEncoding(encoding, withOverflowingTokens);
    }

    public Encoding encode(String text) {
        return this.encode(text, this.addSpecialTokens, this.withOverflowingTokens);
    }

    public Encoding encode(String text, String textPair, boolean addSpecialTokens, boolean withOverflowingTokens) {
        if (text == null || textPair == null) {
            throw new NullPointerException("text/text_pair cannot be null");
        }
        if (this.doLowerCase != null) {
            text = text.toLowerCase(this.doLowerCase);
            textPair = textPair.toLowerCase(this.doLowerCase);
        }
        long encoding = TokenizersLibrary.LIB.encodeDual((Long)this.getHandle(), text, textPair, addSpecialTokens);
        return this.toEncoding(encoding, withOverflowingTokens);
    }

    public Encoding encode(String text, String textPair) {
        return this.encode(text, textPair, this.addSpecialTokens, this.withOverflowingTokens);
    }

    public Encoding encode(List<String> inputs, boolean addSpecialTokens, boolean withOverflowingTokens) {
        String[] array = inputs.toArray(Utils.EMPTY_ARRAY);
        return this.encode(array, addSpecialTokens, withOverflowingTokens);
    }

    public Encoding encode(List<String> inputs) {
        return this.encode(inputs, this.addSpecialTokens, this.withOverflowingTokens);
    }

    public Encoding encode(String[] inputs, boolean addSpecialTokens, boolean withOverflowingTokens) {
        if (this.doLowerCase != null) {
            for (int i = 0; i < inputs.length; ++i) {
                inputs[i] = inputs[i].toLowerCase(this.doLowerCase);
            }
        } else if (Arrays.stream(inputs).anyMatch(Objects::isNull)) {
            throw new NullPointerException("input text cannot be null");
        }
        long encoding = TokenizersLibrary.LIB.encodeList((Long)this.getHandle(), inputs, addSpecialTokens);
        return this.toEncoding(encoding, withOverflowingTokens);
    }

    public Encoding encode(String[] inputs) {
        return this.encode(inputs, this.addSpecialTokens, this.withOverflowingTokens);
    }

    public Encoding[] batchEncode(List<String> inputs, boolean addSpecialTokens, boolean withOverflowingTokens) {
        String[] array = inputs.toArray(Utils.EMPTY_ARRAY);
        return this.batchEncode(array, addSpecialTokens, withOverflowingTokens);
    }

    public Encoding[] batchEncode(List<String> inputs) {
        return this.batchEncode(inputs, this.addSpecialTokens, this.withOverflowingTokens);
    }

    public Encoding[] batchEncode(String[] inputs, boolean addSpecialTokens, boolean withOverflowingTokens) {
        if (this.doLowerCase != null) {
            for (int i = 0; i < inputs.length; ++i) {
                inputs[i] = inputs[i].toLowerCase(this.doLowerCase);
            }
        } else if (Arrays.stream(inputs).anyMatch(Objects::isNull)) {
            throw new NullPointerException("input text cannot be null");
        }
        long[] encodings = TokenizersLibrary.LIB.batchEncode((Long)this.getHandle(), inputs, addSpecialTokens);
        Encoding[] ret = new Encoding[encodings.length];
        for (int i = 0; i < encodings.length; ++i) {
            ret[i] = this.toEncoding(encodings[i], withOverflowingTokens);
        }
        return ret;
    }

    public Encoding[] batchEncode(String[] inputs) {
        return this.batchEncode(inputs, this.addSpecialTokens, this.withOverflowingTokens);
    }

    public Encoding[] batchEncode(PairList<String, String> inputs, boolean addSpecialTokens, boolean withOverflowingTokens) {
        String[] text = (String[])inputs.keyArray((Object[])Utils.EMPTY_ARRAY);
        String[] textPair = (String[])inputs.valueArray((Object[])Utils.EMPTY_ARRAY);
        if (this.doLowerCase != null) {
            int i;
            for (i = 0; i < text.length; ++i) {
                text[i] = text[i].toLowerCase(this.doLowerCase);
            }
            for (i = 0; i < textPair.length; ++i) {
                textPair[i] = textPair[i].toLowerCase(this.doLowerCase);
            }
        } else {
            if (inputs.keys().stream().anyMatch(Objects::isNull)) {
                throw new NullPointerException("text pair key cannot be null");
            }
            if (inputs.values().stream().anyMatch(Objects::isNull)) {
                throw new NullPointerException("text pair value cannot be null");
            }
        }
        long[] encodings = TokenizersLibrary.LIB.batchEncodePair((Long)this.getHandle(), text, textPair, addSpecialTokens);
        Encoding[] ret = new Encoding[encodings.length];
        for (int i = 0; i < encodings.length; ++i) {
            ret[i] = this.toEncoding(encodings[i], withOverflowingTokens);
        }
        return ret;
    }

    public Encoding[] batchEncode(PairList<String, String> inputs) {
        return this.batchEncode(inputs, this.addSpecialTokens, this.withOverflowingTokens);
    }

    public String decode(long[] ids, boolean skipSpecialTokens) {
        String decodedText = TokenizersLibrary.LIB.decode((Long)this.getHandle(), ids, skipSpecialTokens);
        return this.cleanupTokenizationSpaces ? this.cleanUpTokenization(decodedText) : decodedText;
    }

    public String decode(long[] ids) {
        return this.decode(ids, !this.addSpecialTokens);
    }

    public String[] batchDecode(long[][] batchIds, boolean skipSpecialTokens) {
        return TokenizersLibrary.LIB.batchDecode((Long)this.getHandle(), batchIds, skipSpecialTokens);
    }

    public String[] batchDecode(long[][] batchIds) {
        return this.batchDecode(batchIds, !this.addSpecialTokens);
    }

    public String getTruncation() {
        return this.truncation.name();
    }

    public String getPadding() {
        return this.padding.name();
    }

    public int getMaxLength() {
        return this.maxLength;
    }

    public int getStride() {
        return this.stride;
    }

    public int getPadToMultipleOf() {
        return this.padToMultipleOf;
    }

    public static Builder builder() {
        return new Builder();
    }

    public static Builder builder(Map<String, ?> arguments) {
        Builder builder = HuggingFaceTokenizer.builder();
        builder.configure(arguments);
        return builder;
    }

    private String prepareForTokenization(String text) {
        if (this.addPrefixSpace && !text.startsWith(" ")) {
            text = " " + text;
        }
        if (this.doLowerCase != null) {
            text = text.toLowerCase(this.doLowerCase);
        }
        if (this.stripAccents) {
            text = Normalizer.normalize(text, Normalizer.Form.NFKD);
            text = text.replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
        }
        return text;
    }

    private void updateTruncationAndPadding(PadTokenResolver.PadInfo padInfo) {
        boolean isTruncate;
        boolean bl = isTruncate = this.truncation != TruncationStrategy.DO_NOT_TRUNCATE;
        if (this.padding == PaddingStrategy.MAX_LENGTH || isTruncate) {
            int remainder;
            if (this.maxLength == -1) {
                logger.warn("maxLength is not explicitly specified, use modelMaxLength: {}", (Object)this.modelMaxLength);
                this.maxLength = this.modelMaxLength;
            } else if (this.maxLength > this.modelMaxLength) {
                logger.warn("maxLength is greater then modelMaxLength, change to: {}", (Object)this.modelMaxLength);
                this.maxLength = this.modelMaxLength;
            }
            if (this.padding == PaddingStrategy.MAX_LENGTH && isTruncate && this.padToMultipleOf != 0 && (remainder = this.maxLength % this.padToMultipleOf) != 0) {
                int newMaxLength = this.maxLength + this.padToMultipleOf - this.maxLength % this.padToMultipleOf;
                if (newMaxLength > this.modelMaxLength) {
                    newMaxLength -= this.padToMultipleOf;
                }
                logger.warn("maxLength ({}) is not a multiple of padToMultipleOf ({}), change to: {}", new Object[]{this.maxLength, this.padToMultipleOf, newMaxLength});
                this.maxLength = newMaxLength;
            }
        }
        if (isTruncate) {
            TokenizersLibrary.LIB.setTruncation((Long)this.getHandle(), this.maxLength, this.truncation.name(), this.stride);
        } else {
            TokenizersLibrary.LIB.disableTruncation((Long)this.getHandle());
        }
        this.updatePadding(padInfo);
    }

    private void updatePadding(PadTokenResolver.PadInfo padInfo) {
        if (this.padding == PaddingStrategy.DO_NOT_PAD) {
            TokenizersLibrary.LIB.disablePadding((Long)this.getHandle());
            return;
        }
        if (padInfo != null) {
            TokenizersLibrary.LIB.setPaddingWithTokenAndId((Long)this.getHandle(), this.maxLength, this.padding.name(), padInfo.getPadToken(), padInfo.getPadId(), this.padToMultipleOf);
        } else {
            TokenizersLibrary.LIB.setPadding((Long)this.getHandle(), this.maxLength, this.padding.name(), this.padToMultipleOf);
        }
    }

    private Encoding toEncoding(long encoding, boolean withOverflowingTokens) {
        Encoding[] overflowing;
        boolean exceedMaxLength;
        long[] ids = TokenizersLibrary.LIB.getTokenIds(encoding);
        long[] typeIds = TokenizersLibrary.LIB.getTypeIds(encoding);
        String[] tokens = TokenizersLibrary.LIB.getTokens(encoding);
        long[] wordIds = TokenizersLibrary.LIB.getWordIds(encoding);
        long[] sequenceIds = TokenizersLibrary.LIB.getSequenceIds(encoding);
        long[] attentionMask = TokenizersLibrary.LIB.getAttentionMask(encoding);
        long[] specialTokenMask = TokenizersLibrary.LIB.getSpecialTokenMask(encoding);
        CharSpan[] charSpans = TokenizersLibrary.LIB.getTokenCharSpans(encoding);
        int overFlowCount = TokenizersLibrary.LIB.getOverflowCount(encoding);
        boolean bl = exceedMaxLength = overFlowCount > 0;
        if (withOverflowingTokens) {
            long[] overflowingHandles = TokenizersLibrary.LIB.getOverflowing(encoding);
            overflowing = new Encoding[overflowingHandles.length];
            for (int i = 0; i < overflowingHandles.length; ++i) {
                overflowing[i] = this.toEncoding(overflowingHandles[i], true);
            }
        } else {
            overflowing = new Encoding[]{};
        }
        TokenizersLibrary.LIB.deleteEncoding(encoding);
        return new Encoding(ids, typeIds, tokens, wordIds, sequenceIds, attentionMask, specialTokenMask, charSpans, exceedMaxLength, overflowing);
    }

    private String cleanUpTokenization(String text) {
        return text.replace(" .", ".").replace(" ?", "?").replace(" !", "!").replace(" ,", ",").replace(" ' ", "'").replace(" n't", "n't").replace(" 'm", "'m").replace(" 's", "'s").replace(" 've", "'ve").replace(" 're", "'re");
    }

    protected void finalize() throws Throwable {
        this.close();
        super.finalize();
    }

    private static enum TruncationStrategy {
        LONGEST_FIRST,
        ONLY_FIRST,
        ONLY_SECOND,
        DO_NOT_TRUNCATE;


        static TruncationStrategy fromValue(String value) {
            if ("true".equals(value)) {
                return LONGEST_FIRST;
            }
            if ("false".equals(value)) {
                return DO_NOT_TRUNCATE;
            }
            for (TruncationStrategy strategy : TruncationStrategy.values()) {
                if (!strategy.name().equalsIgnoreCase(value)) continue;
                return strategy;
            }
            throw new IllegalArgumentException("Invalid TruncationStrategy: " + value);
        }
    }

    private static enum PaddingStrategy {
        LONGEST,
        MAX_LENGTH,
        DO_NOT_PAD;


        static PaddingStrategy fromValue(String value) {
            if ("true".equals(value)) {
                return LONGEST;
            }
            if ("false".equals(value)) {
                return DO_NOT_PAD;
            }
            for (PaddingStrategy strategy : PaddingStrategy.values()) {
                if (!strategy.name().equalsIgnoreCase(value)) continue;
                return strategy;
            }
            throw new IllegalArgumentException("Invalid PaddingStrategy: " + value);
        }
    }

    public static final class Builder {
        private NDManager manager;
        private Map<String, String> options = new ConcurrentHashMap<String, String>();

        Builder() {
            this.options.put("addSpecialTokens", "true");
        }

        public Builder optManager(NDManager manager) {
            this.manager = manager;
            return this;
        }

        public Builder optTokenizerName(String tokenizerName) {
            this.options.put("tokenizer", tokenizerName);
            return this;
        }

        public Builder optTokenizerPath(Path tokenizerPath) {
            this.options.putIfAbsent("tokenizerPath", tokenizerPath.toString());
            return this;
        }

        public Builder optAddSpecialTokens(boolean addSpecialTokens) {
            this.options.put("addSpecialTokens", String.valueOf(addSpecialTokens));
            return this;
        }

        public Builder optWithOverflowingTokens(boolean withOverflowingTokens) {
            this.options.put("withOverflowingTokens", String.valueOf(withOverflowingTokens));
            return this;
        }

        public Builder optTruncation(boolean enabled) {
            this.options.put("truncation", String.valueOf(enabled));
            return this;
        }

        public Builder optTruncateFirstOnly() {
            this.options.put("truncation", TruncationStrategy.ONLY_FIRST.name());
            return this;
        }

        public Builder optTruncateSecondOnly() {
            this.options.put("truncation", TruncationStrategy.ONLY_SECOND.name());
            return this;
        }

        public Builder optPadding(boolean enabled) {
            this.options.put("padding", String.valueOf(enabled));
            return this;
        }

        public Builder optPadToMaxLength() {
            this.options.put("padding", PaddingStrategy.MAX_LENGTH.name());
            return this;
        }

        public Builder optMaxLength(int maxLength) {
            this.options.put("maxLength", String.valueOf(maxLength));
            return this;
        }

        public Builder optPadToMultipleOf(int padToMultipleOf) {
            this.options.put("padToMultipleOf", String.valueOf(padToMultipleOf));
            return this;
        }

        public Builder optStride(int stride) {
            this.options.put("stride", String.valueOf(stride));
            return this;
        }

        public Builder optDoLowerCase(boolean doLowerCase) {
            this.options.put("doLowerCase", String.valueOf(doLowerCase));
            return this;
        }

        public Builder optDoLowerCase(String locale) {
            this.options.put("doLowerCase", locale);
            return this;
        }

        public Builder optTokenizerConfigPath(String configPath) {
            this.options.put("tokenizerConfigPath", configPath);
            return this;
        }

        public void configure(Map<String, ?> arguments) {
            for (Map.Entry<String, ?> entry : arguments.entrySet()) {
                this.options.put(entry.getKey(), entry.getValue().toString());
            }
        }

        private HuggingFaceTokenizer managed(HuggingFaceTokenizer tokenizer) {
            if (this.manager != null) {
                this.manager.attachInternal(tokenizer.getUid(), new AutoCloseable[]{tokenizer});
            }
            return tokenizer;
        }

        public HuggingFaceTokenizer build() throws IOException {
            String tokenizerName = this.options.get("tokenizer");
            if (tokenizerName != null) {
                return this.managed(HuggingFaceTokenizer.newInstance(tokenizerName, this.options));
            }
            String path = this.options.get("tokenizerPath");
            if (path == null) {
                throw new IllegalArgumentException("Missing tokenizer path.");
            }
            Path tokenizerPath = Paths.get(path, new String[0]);
            if (!Files.exists(tokenizerPath, new LinkOption[0])) {
                throw new IOException("Tokenizer file not exists: " + tokenizerPath);
            }
            String configPath = this.options.get("tokenizerConfigPath");
            this.validateConfigPath(configPath);
            return this.managed(this.buildTokenizer(tokenizerPath, configPath, this.options));
        }

        private void validateConfigPath(String configPath) throws IOException {
            if (configPath != null && !Files.exists(Paths.get(configPath, new String[0]), new LinkOption[0])) {
                throw new IOException("Tokenizer config file not exists: " + configPath);
            }
        }

        private HuggingFaceTokenizer buildTokenizer(Path tokenizerPath, String configPath, Map<String, String> options) throws IOException {
            if (!Files.isDirectory(tokenizerPath, new LinkOption[0])) {
                return configPath != null ? HuggingFaceTokenizer.newInstance(tokenizerPath, configPath, options) : HuggingFaceTokenizer.newInstance(tokenizerPath, options);
            }
            Path tokenizerFile = tokenizerPath.resolve("tokenizer.json");
            if (Files.exists(tokenizerFile, new LinkOption[0])) {
                return configPath != null ? HuggingFaceTokenizer.newInstance(tokenizerPath, configPath, options) : HuggingFaceTokenizer.newInstance(tokenizerPath, options);
            }
            Path vocab = tokenizerPath.resolve("vocab.json");
            Path merges = tokenizerPath.resolve("merges.txt");
            if (Files.exists(vocab, new LinkOption[0]) && Files.exists(merges, new LinkOption[0])) {
                if (configPath != null) {
                    logger.warn("Config file is not supported for BPE tokenizers, ignoring config path: {}", (Object)configPath);
                }
                return HuggingFaceTokenizer.newInstance(vocab, merges, options);
            }
            throw new IOException("tokenizer.json file not found.");
        }
    }
}

