/*
 * Decompiled with CFR 0.152.
 */
package ai.vespa.embedding.huggingface;

import ai.vespa.embedding.PoolingStrategy;
import ai.vespa.modelintegration.evaluator.OnnxEvaluator;
import ai.vespa.modelintegration.evaluator.OnnxEvaluatorOptions;
import ai.vespa.modelintegration.evaluator.OnnxRuntime;
import com.yahoo.api.annotations.Beta;
import com.yahoo.component.AbstractComponent;
import com.yahoo.component.annotation.Inject;
import com.yahoo.embedding.huggingface.HuggingFaceEmbedderConfig;
import com.yahoo.language.huggingface.Encoding;
import com.yahoo.language.huggingface.HuggingFaceTokenizer;
import com.yahoo.language.huggingface.ModelInfo;
import com.yahoo.language.process.Embedder;
import com.yahoo.tensor.IndexedTensor;
import com.yahoo.tensor.Tensor;
import com.yahoo.tensor.TensorAddress;
import com.yahoo.tensor.TensorType;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.BitSet;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;

@Beta
public class HuggingFaceEmbedder
extends AbstractComponent
implements Embedder {
    private static final Logger log = Logger.getLogger(HuggingFaceEmbedder.class.getName());
    private final Embedder.Runtime runtime;
    private final String inputIdsName;
    private final String attentionMaskName;
    private final String tokenTypeIdsName;
    private final String outputName;
    private final boolean normalize;
    private final HuggingFaceTokenizer tokenizer;
    private final OnnxEvaluator evaluator;
    private final PoolingStrategy poolingStrategy;
    private final String prependQuery;
    private final String prependDocument;

    @Inject
    public HuggingFaceEmbedder(OnnxRuntime onnx, Embedder.Runtime runtime, HuggingFaceEmbedderConfig config) {
        this.runtime = runtime;
        this.inputIdsName = config.transformerInputIds();
        this.attentionMaskName = config.transformerAttentionMask();
        this.tokenTypeIdsName = config.transformerTokenTypeIds();
        this.outputName = config.transformerOutput();
        this.normalize = config.normalize();
        this.prependQuery = config.prependQuery();
        this.prependDocument = config.prependDocument();
        Path tokenizerPath = Paths.get(config.tokenizerPath().toString(), new String[0]);
        HuggingFaceTokenizer.Builder builder = new HuggingFaceTokenizer.Builder().addSpecialTokens(true).addDefaultModel(tokenizerPath).setPadding(false);
        ModelInfo info = HuggingFaceTokenizer.getModelInfo((Path)tokenizerPath);
        log.fine(() -> "'%s' has info '%s'".formatted(tokenizerPath, info));
        if (info.maxLength() == -1 || info.truncation() != ModelInfo.TruncationStrategy.LONGEST_FIRST) {
            int maxLength = info.maxLength() > 0 && info.maxLength() <= config.transformerMaxTokens() ? info.maxLength() : config.transformerMaxTokens();
            builder.setTruncation(true).setMaxLength(maxLength);
        }
        this.tokenizer = builder.build();
        this.poolingStrategy = PoolingStrategy.fromString(config.poolingStrategy().toString());
        OnnxEvaluatorOptions onnxOpts = new OnnxEvaluatorOptions();
        if (config.transformerGpuDevice() >= 0) {
            onnxOpts.setGpuDevice(config.transformerGpuDevice());
        }
        onnxOpts.setExecutionMode(config.transformerExecutionMode().toString());
        onnxOpts.setThreads(config.transformerInterOpThreads(), config.transformerIntraOpThreads());
        this.evaluator = onnx.evaluatorOf(config.transformerModel().toString(), onnxOpts);
        this.validateModel();
    }

    private void validateModel() {
        Map<String, TensorType> inputs = this.evaluator.getInputInfo();
        HuggingFaceEmbedder.validateName(inputs, this.inputIdsName, "input");
        HuggingFaceEmbedder.validateName(inputs, this.attentionMaskName, "input");
        if (!this.tokenTypeIdsName.isEmpty()) {
            HuggingFaceEmbedder.validateName(inputs, this.tokenTypeIdsName, "input");
        }
        Map<String, TensorType> outputs = this.evaluator.getOutputInfo();
        HuggingFaceEmbedder.validateName(outputs, this.outputName, "output");
    }

    private static void validateName(Map<String, TensorType> types, String name, String type) {
        if (!types.containsKey(name)) {
            throw new IllegalArgumentException("Model does not contain required " + type + ": '" + name + "'. Model contains: " + String.join((CharSequence)",", types.keySet()));
        }
    }

    public List<Integer> embed(String s, Embedder.Context context) {
        long start = System.nanoTime();
        List tokens = this.tokenizer.embed(s, context);
        this.runtime.sampleSequenceLength((long)tokens.size(), context);
        this.runtime.sampleEmbeddingLatency((double)(System.nanoTime() - start) / 1000000.0, context);
        return tokens;
    }

    public void deconstruct() {
        this.evaluator.close();
        this.tokenizer.close();
    }

    public Tensor embed(String text, Embedder.Context context, TensorType tensorType) {
        if (tensorType.dimensions().size() != 1) {
            throw new IllegalArgumentException("Error in embedding to type '" + tensorType + "': should only have one dimension.");
        }
        if (!((TensorType.Dimension)tensorType.dimensions().get(0)).isIndexed()) {
            throw new IllegalArgumentException("Error in embedding to type '" + tensorType + "': dimension should be indexed.");
        }
        HFEmbeddingResult embeddingResult = this.lookupOrEvaluate(context, this.prependInstruction(text, context));
        IndexedTensor tokenEmbeddings = embeddingResult.output;
        if (tensorType.valueType() == TensorType.Value.INT8) {
            return this.binaryQuantization(embeddingResult, tensorType);
        }
        Tensor result = this.poolingStrategy.toSentenceEmbedding(tensorType, (Tensor)tokenEmbeddings, embeddingResult.attentionMask);
        return this.normalize ? this.normalize(result, tensorType) : result;
    }

    String prependInstruction(String text, Embedder.Context context) {
        if (this.prependQuery != null && !this.prependQuery.isEmpty() && context.getDestination().startsWith("query")) {
            return this.prependQuery + " " + text;
        }
        if (this.prependDocument != null && !this.prependDocument.isEmpty()) {
            return this.prependDocument + " " + text;
        }
        return text;
    }

    Tensor normalize(Tensor embedding, TensorType tensorType) {
        double sumOfSquares = 0.0;
        Tensor.Builder builder = Tensor.Builder.of((TensorType)tensorType);
        int i = 0;
        while ((long)i < (Long)((TensorType.Dimension)tensorType.dimensions().get(0)).size().get()) {
            double item = embedding.get(TensorAddress.of((int[])new int[]{i++}));
            sumOfSquares += item * item;
        }
        double magnitude = Math.sqrt(sumOfSquares);
        int i2 = 0;
        while ((long)i2 < (Long)((TensorType.Dimension)tensorType.dimensions().get(0)).size().get()) {
            double value = embedding.get(TensorAddress.of((int[])new int[]{i2}));
            builder.cell(value / magnitude, new long[]{i2});
            ++i2;
        }
        return builder.build();
    }

    private HFEmbeddingResult lookupOrEvaluate(Embedder.Context context, String text) {
        HFEmbedderCacheKey key = new HFEmbedderCacheKey(context.getEmbedderId(), text);
        return (HFEmbeddingResult)context.computeCachedValueIfAbsent((Object)key, () -> this.evaluate(context, text));
    }

    private HFEmbeddingResult evaluate(Embedder.Context context, String text) {
        long start = System.nanoTime();
        Encoding encoding = this.tokenizer.encode(text, context.getLanguage());
        this.runtime.sampleSequenceLength((long)encoding.ids().size(), context);
        IndexedTensor inputSequence = this.createTensorRepresentation(encoding.ids(), "d1");
        IndexedTensor attentionMask = this.createTensorRepresentation(encoding.attentionMask(), "d1");
        IndexedTensor tokenTypeIds = this.tokenTypeIdsName.isEmpty() ? null : this.createTensorRepresentation(encoding.typeIds(), "d1");
        Map<String, Tensor> inputs = this.tokenTypeIdsName.isEmpty() || tokenTypeIds.isEmpty() ? Map.of(this.inputIdsName, inputSequence.expand("d0"), this.attentionMaskName, attentionMask.expand("d0")) : Map.of(this.inputIdsName, inputSequence.expand("d0"), this.attentionMaskName, attentionMask.expand("d0"), this.tokenTypeIdsName, tokenTypeIds.expand("d0"));
        IndexedTensor tokenEmbeddings = (IndexedTensor)this.evaluator.evaluate(inputs).get(this.outputName);
        long[] resultShape = tokenEmbeddings.shape();
        if (resultShape.length != 3) {
            throw new IllegalArgumentException("Expected 3 output dimensions for output name '" + this.outputName + "': [batch, sequence, embedding], got " + resultShape.length);
        }
        this.runtime.sampleEmbeddingLatency((double)(System.nanoTime() - start) / 1000000.0, context);
        return new HFEmbeddingResult(tokenEmbeddings, (Tensor)attentionMask, context.getEmbedderId());
    }

    private Tensor binaryQuantization(HFEmbeddingResult embeddingResult, TensorType tensorType) {
        long outputDimensions = embeddingResult.output().shape()[2];
        long targetDim = (Long)((TensorType.Dimension)tensorType.dimensions().get(0)).size().get();
        long floatDimensions = 8L * targetDim;
        if (floatDimensions > outputDimensions) {
            throw new IllegalArgumentException("Cannot pack " + outputDimensions + " into " + targetDim + " int8s");
        }
        TensorType poolingType = new TensorType.Builder(TensorType.Value.FLOAT).indexed(((TensorType.Dimension)tensorType.indexedSubtype().dimensions().get(0)).name(), floatDimensions).build();
        Tensor result = this.poolingStrategy.toSentenceEmbedding(poolingType, (Tensor)embeddingResult.output(), embeddingResult.attentionMask());
        result = this.normalize ? this.normalize(result, poolingType) : result;
        result = HuggingFaceEmbedder.binarize((IndexedTensor)result, tensorType);
        return result;
    }

    public static Tensor binarize(IndexedTensor embedding, TensorType tensorType) {
        Tensor.Builder builder = Tensor.Builder.of((TensorType)tensorType);
        BitSet bitSet = new BitSet(8);
        int index = 0;
        for (int d = 0; d < embedding.sizeAsInt(); ++d) {
            double value = embedding.get((long)d);
            int bitIndex = 7 - d % 8;
            if (value > 0.0) {
                bitSet.set(bitIndex);
            } else {
                bitSet.clear(bitIndex);
            }
            if ((d + 1) % 8 != 0) continue;
            byte[] bytes = bitSet.toByteArray();
            byte packed = bytes.length == 0 ? (byte)0 : bytes[0];
            builder.cell(TensorAddress.of((int[])new int[]{index++}), (float)packed);
            bitSet = new BitSet(8);
        }
        return builder.build();
    }

    private IndexedTensor createTensorRepresentation(List<Long> input, String dimension) {
        int size = input.size();
        TensorType type = new TensorType.Builder(TensorType.Value.FLOAT).indexed(dimension, (long)size).build();
        IndexedTensor.Builder builder = IndexedTensor.Builder.of((TensorType)type);
        for (int i = 0; i < size; ++i) {
            builder.cell((float)input.get(i).longValue(), new long[]{i});
        }
        return builder.build();
    }

    protected record HFEmbeddingResult(IndexedTensor output, Tensor attentionMask, String embedderId) {
    }

    protected record HFEmbedderCacheKey(String embedderId, Object embeddedValue) {
    }
}

