/*
 * Decompiled with CFR 0.152.
 */
package io.github.jbellis.jvector.vector;

import io.github.jbellis.jvector.util.MathUtil;
import io.github.jbellis.jvector.vector.ArrayByteSequence;
import io.github.jbellis.jvector.vector.ArrayVectorFloat;
import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
import io.github.jbellis.jvector.vector.VectorUtilSupport;
import io.github.jbellis.jvector.vector.VectorizationProvider;
import io.github.jbellis.jvector.vector.types.ByteSequence;
import io.github.jbellis.jvector.vector.types.VectorFloat;
import java.util.List;
import jdk.incubator.vector.ByteVector;
import jdk.incubator.vector.FloatVector;
import jdk.incubator.vector.IntVector;
import jdk.incubator.vector.LongVector;
import jdk.incubator.vector.ShortVector;
import jdk.incubator.vector.Vector;
import jdk.incubator.vector.VectorMask;
import jdk.incubator.vector.VectorOperators;
import jdk.incubator.vector.VectorSpecies;

class PanamaVectorUtilSupport
implements VectorUtilSupport {
    static final int PREFERRED_BIT_SIZE = FloatVector.SPECIES_PREFERRED.vectorBitSize();
    static final IntVector BYTE_TO_INT_MASK_512 = IntVector.broadcast((VectorSpecies)IntVector.SPECIES_512, (int)255);
    static final IntVector BYTE_TO_INT_MASK_256 = IntVector.broadcast((VectorSpecies)IntVector.SPECIES_256, (int)255);
    static final ThreadLocal<int[]> scratchInt512 = ThreadLocal.withInitial(() -> new int[IntVector.SPECIES_512.length()]);
    static final ThreadLocal<int[]> scratchInt256 = ThreadLocal.withInitial(() -> new int[IntVector.SPECIES_256.length()]);
    static final FloatVector const1f = FloatVector.broadcast((VectorSpecies)FloatVector.SPECIES_PREFERRED, (float)1.0f);
    static final FloatVector const05f = FloatVector.broadcast((VectorSpecies)FloatVector.SPECIES_PREFERRED, (float)0.5f);

    PanamaVectorUtilSupport() {
    }

    protected FloatVector fromVectorFloat(VectorSpecies<Float> SPEC, VectorFloat<?> vector, int offset) {
        return FloatVector.fromArray(SPEC, (float[])((ArrayVectorFloat)vector).get(), (int)offset);
    }

    protected FloatVector fromVectorFloat(VectorSpecies<Float> SPEC, VectorFloat<?> vector, int offset, int[] indices, int indicesOffset) {
        return FloatVector.fromArray(SPEC, (float[])((ArrayVectorFloat)vector).get(), (int)offset, (int[])indices, (int)indicesOffset);
    }

    protected void intoVectorFloat(FloatVector vector, VectorFloat<?> v, int offset) {
        vector.intoArray(((ArrayVectorFloat)v).get(), offset);
    }

    protected ByteVector fromByteSequence(VectorSpecies<Byte> SPEC, ByteSequence<?> vector, int offset) {
        return ByteVector.fromArray(SPEC, (byte[])((ArrayByteSequence)vector).get(), (int)offset);
    }

    protected void intoByteSequence(ByteVector vector, ByteSequence<?> v, int offset) {
        vector.intoArray(((ArrayByteSequence)v).get(), offset);
    }

    protected void intoByteSequence(ByteVector vector, ByteSequence<?> v, int offset, VectorMask<Byte> mask) {
        vector.intoArray(((ArrayByteSequence)v).get(), offset, mask);
    }

    @Override
    public float sum(VectorFloat<?> vector) {
        FloatVector sum = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_PREFERRED);
        int vectorizedLength = FloatVector.SPECIES_PREFERRED.loopBound(vector.length());
        for (int i = 0; i < vectorizedLength; i += FloatVector.SPECIES_PREFERRED.length()) {
            FloatVector a = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, vector, i);
            sum = sum.add((Vector)a);
        }
        float res = sum.reduceLanes(VectorOperators.ADD);
        for (int i = vectorizedLength; i < vector.length(); ++i) {
            res += vector.get(i);
        }
        return res;
    }

    @Override
    public VectorFloat<?> sum(List<VectorFloat<?>> vectors) {
        if (vectors == null || vectors.isEmpty()) {
            throw new IllegalArgumentException("Input list cannot be null or empty");
        }
        int dimension = vectors.get(0).length();
        VectorFloat<?> sum = VectorizationProvider.getInstance().getVectorTypeSupport().createFloatVector(dimension);
        for (VectorFloat<?> vector : vectors) {
            this.addInPlace(sum, vector);
        }
        return sum;
    }

    @Override
    public void scale(VectorFloat<?> vector, float multiplier) {
        int i;
        int vectorizedLength = FloatVector.SPECIES_PREFERRED.loopBound(vector.length());
        for (i = 0; i < vectorizedLength; i += FloatVector.SPECIES_PREFERRED.length()) {
            FloatVector a = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, vector, i);
            FloatVector divResult = a.mul(multiplier);
            this.intoVectorFloat(divResult, vector, i);
        }
        for (i = vectorizedLength; i < vector.length(); ++i) {
            vector.set(i, vector.get(i) * multiplier);
        }
    }

    float dot64(VectorFloat<?> v1, int offset1, VectorFloat<?> v2, int offset2) {
        FloatVector a = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_64, v1, offset1);
        FloatVector b = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_64, v2, offset2);
        return a.mul((Vector)b).reduceLanes(VectorOperators.ADD);
    }

    float dot128(VectorFloat<?> v1, int offset1, VectorFloat<?> v2, int offset2) {
        FloatVector a = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_128, v1, offset1);
        FloatVector b = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_128, v2, offset2);
        return a.mul((Vector)b).reduceLanes(VectorOperators.ADD);
    }

    float dot256(VectorFloat<?> v1, int offset1, VectorFloat<?> v2, int offset2) {
        FloatVector a = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_256, v1, offset1);
        FloatVector b = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_256, v2, offset2);
        return a.mul((Vector)b).reduceLanes(VectorOperators.ADD);
    }

    float dotPreferred(VectorFloat<?> v1, int offset1, VectorFloat<?> v2, int offset2) {
        FloatVector a = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, v1, offset1);
        FloatVector b = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, v2, offset2);
        return a.mul((Vector)b).reduceLanes(VectorOperators.ADD);
    }

    @Override
    public float dotProduct(VectorFloat<?> v1, VectorFloat<?> v2) {
        return this.dotProduct(v1, 0, v2, 0, v1.length());
    }

    @Override
    public float dotProduct(VectorFloat<?> v1, int v1offset, VectorFloat<?> v2, int v2offset, int length) {
        if (length >= FloatVector.SPECIES_PREFERRED.length()) {
            return this.dotProductPreferred(v1, v1offset, v2, v2offset, length);
        }
        if (length < FloatVector.SPECIES_128.length()) {
            return this.dotProduct64(v1, v1offset, v2, v2offset, length);
        }
        if (length < FloatVector.SPECIES_256.length()) {
            return this.dotProduct128(v1, v1offset, v2, v2offset, length);
        }
        return this.dotProduct256(v1, v1offset, v2, v2offset, length);
    }

    float dotProduct64(VectorFloat<?> v1, int v1offset, VectorFloat<?> v2, int v2offset, int length) {
        int i;
        if (length == FloatVector.SPECIES_64.length()) {
            return this.dot64(v1, v1offset, v2, v2offset);
        }
        int vectorizedLength = FloatVector.SPECIES_64.loopBound(length);
        FloatVector sum = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_64);
        for (i = 0; i < vectorizedLength; i += FloatVector.SPECIES_64.length()) {
            FloatVector a = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_64, v1, v1offset + i);
            FloatVector b = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_64, v2, v2offset + i);
            sum = a.fma((Vector)b, (Vector)sum);
        }
        float res = sum.reduceLanes(VectorOperators.ADD);
        while (i < length) {
            res += v1.get(v1offset + i) * v2.get(v2offset + i);
            ++i;
        }
        return res;
    }

    float dotProduct128(VectorFloat<?> v1, int v1offset, VectorFloat<?> v2, int v2offset, int length) {
        int i;
        if (length == FloatVector.SPECIES_128.length()) {
            return this.dot128(v1, v1offset, v2, v2offset);
        }
        int vectorizedLength = FloatVector.SPECIES_128.loopBound(length);
        FloatVector sum = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_128);
        for (i = 0; i < vectorizedLength; i += FloatVector.SPECIES_128.length()) {
            FloatVector a = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_128, v1, v1offset + i);
            FloatVector b = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_128, v2, v2offset + i);
            sum = a.fma((Vector)b, (Vector)sum);
        }
        float res = sum.reduceLanes(VectorOperators.ADD);
        while (i < length) {
            res += v1.get(v1offset + i) * v2.get(v2offset + i);
            ++i;
        }
        return res;
    }

    float dotProduct256(VectorFloat<?> v1, int v1offset, VectorFloat<?> v2, int v2offset, int length) {
        int i;
        if (length == FloatVector.SPECIES_256.length()) {
            return this.dot256(v1, v1offset, v2, v2offset);
        }
        int vectorizedLength = FloatVector.SPECIES_256.loopBound(length);
        FloatVector sum = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_256);
        for (i = 0; i < vectorizedLength; i += FloatVector.SPECIES_256.length()) {
            FloatVector a = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_256, v1, v1offset + i);
            FloatVector b = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_256, v2, v2offset + i);
            sum = a.fma((Vector)b, (Vector)sum);
        }
        float res = sum.reduceLanes(VectorOperators.ADD);
        while (i < length) {
            res += v1.get(v1offset + i) * v2.get(v2offset + i);
            ++i;
        }
        return res;
    }

    float dotProductPreferred(VectorFloat<?> va, int vaoffset, VectorFloat<?> vb, int vboffset, int length) {
        FloatVector sum0;
        if (length == FloatVector.SPECIES_PREFERRED.length()) {
            return this.dotPreferred(va, vaoffset, vb, vboffset);
        }
        FloatVector sum1 = sum0 = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_PREFERRED);
        int vectorLength = FloatVector.SPECIES_PREFERRED.length();
        if (length >= vectorLength * 2) {
            length -= vectorLength * 2;
            FloatVector a0 = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, va, vaoffset + vectorLength * 0);
            FloatVector b0 = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, vb, vboffset + vectorLength * 0);
            FloatVector a1 = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, va, vaoffset + vectorLength * 1);
            FloatVector b1 = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, vb, vboffset + vectorLength * 1);
            vaoffset += vectorLength * 2;
            vboffset += vectorLength * 2;
            while (length >= vectorLength * 2) {
                length -= vectorLength * 2;
                sum0 = a0.fma((Vector)b0, (Vector)sum0);
                a0 = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, va, vaoffset + vectorLength * 0);
                b0 = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, vb, vboffset + vectorLength * 0);
                sum1 = a1.fma((Vector)b1, (Vector)sum1);
                a1 = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, va, vaoffset + vectorLength * 1);
                b1 = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, vb, vboffset + vectorLength * 1);
                vaoffset += vectorLength * 2;
                vboffset += vectorLength * 2;
            }
            sum0 = a0.fma((Vector)b0, (Vector)sum0);
            sum1 = a1.fma((Vector)b1, (Vector)sum1);
        }
        sum0 = sum0.add((Vector)sum1);
        while (length >= vectorLength) {
            length -= vectorLength;
            FloatVector a = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, va, vaoffset);
            FloatVector b = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, vb, vboffset);
            vaoffset += vectorLength;
            vboffset += vectorLength;
            sum0 = a.fma((Vector)b, (Vector)sum0);
        }
        float resVec = sum0.reduceLanes(VectorOperators.ADD);
        float resTail = 0.0f;
        while (length > 0) {
            resTail += va.get(vaoffset++) * vb.get(vboffset++);
            --length;
        }
        return resVec + resTail;
    }

    @Override
    public float cosine(VectorFloat<?> v1, VectorFloat<?> v2) {
        if (v1.length() != v2.length()) {
            throw new IllegalArgumentException("Vectors must have the same length");
        }
        FloatVector vsum = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_PREFERRED);
        FloatVector vaMagnitude = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_PREFERRED);
        FloatVector vbMagnitude = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_PREFERRED);
        int vectorizedLength = FloatVector.SPECIES_PREFERRED.loopBound(v1.length());
        for (int i = 0; i < vectorizedLength; i += FloatVector.SPECIES_PREFERRED.length()) {
            FloatVector a = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, v1, i);
            FloatVector b = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, v2, i);
            vsum = a.fma((Vector)b, (Vector)vsum);
            vaMagnitude = a.fma((Vector)a, (Vector)vaMagnitude);
            vbMagnitude = b.fma((Vector)b, (Vector)vbMagnitude);
        }
        float sum = vsum.reduceLanes(VectorOperators.ADD);
        float aMagnitude = vaMagnitude.reduceLanes(VectorOperators.ADD);
        float bMagnitude = vbMagnitude.reduceLanes(VectorOperators.ADD);
        for (int i = vectorizedLength; i < v1.length(); ++i) {
            sum += v1.get(i) * v2.get(i);
            aMagnitude += v1.get(i) * v1.get(i);
            bMagnitude += v2.get(i) * v2.get(i);
        }
        return (float)((double)sum / Math.sqrt(aMagnitude * bMagnitude));
    }

    @Override
    public float cosine(VectorFloat<?> v1, int v1offset, VectorFloat<?> v2, int v2offset, int length) {
        FloatVector vsum = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_PREFERRED);
        FloatVector vaMagnitude = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_PREFERRED);
        FloatVector vbMagnitude = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_PREFERRED);
        int vectorizedLength = FloatVector.SPECIES_PREFERRED.loopBound(length);
        for (int i = 0; i < vectorizedLength; i += FloatVector.SPECIES_PREFERRED.length()) {
            FloatVector a = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, v1, v1offset + i);
            FloatVector b = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, v2, v2offset + i);
            vsum = a.fma((Vector)b, (Vector)vsum);
            vaMagnitude = a.fma((Vector)a, (Vector)vaMagnitude);
            vbMagnitude = b.fma((Vector)b, (Vector)vbMagnitude);
        }
        float sum = vsum.reduceLanes(VectorOperators.ADD);
        float aMagnitude = vaMagnitude.reduceLanes(VectorOperators.ADD);
        float bMagnitude = vbMagnitude.reduceLanes(VectorOperators.ADD);
        for (int i = vectorizedLength; i < length; ++i) {
            sum += v1.get(v1offset + i) * v2.get(v2offset + i);
            aMagnitude += v1.get(v1offset + i) * v1.get(v1offset + i);
            bMagnitude += v2.get(v2offset + i) * v2.get(v2offset + i);
        }
        return (float)((double)sum / Math.sqrt(aMagnitude * bMagnitude));
    }

    float squareDistance64(VectorFloat<?> v1, int offset1, VectorFloat<?> v2, int offset2) {
        FloatVector a = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_64, v1, offset1);
        FloatVector b = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_64, v2, offset2);
        FloatVector diff = a.sub((Vector)b);
        return diff.mul((Vector)diff).reduceLanes(VectorOperators.ADD);
    }

    float squareDistance128(VectorFloat<?> v1, int offset1, VectorFloat<?> v2, int offset2) {
        FloatVector a = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_128, v1, offset1);
        FloatVector b = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_128, v2, offset2);
        FloatVector diff = a.sub((Vector)b);
        return diff.mul((Vector)diff).reduceLanes(VectorOperators.ADD);
    }

    float squareDistance256(VectorFloat<?> v1, int offset1, VectorFloat<?> v2, int offset2) {
        FloatVector a = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_256, v1, offset1);
        FloatVector b = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_256, v2, offset2);
        FloatVector diff = a.sub((Vector)b);
        return diff.mul((Vector)diff).reduceLanes(VectorOperators.ADD);
    }

    float squareDistancePreferred(VectorFloat<?> v1, int offset1, VectorFloat<?> v2, int offset2) {
        FloatVector a = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, v1, offset1);
        FloatVector b = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, v2, offset2);
        FloatVector diff = a.sub((Vector)b);
        return diff.mul((Vector)diff).reduceLanes(VectorOperators.ADD);
    }

    @Override
    public float squareDistance(VectorFloat<?> v1, VectorFloat<?> v2) {
        return this.squareDistance(v1, 0, v2, 0, v1.length());
    }

    @Override
    public float squareDistance(VectorFloat<?> v1, int v1offset, VectorFloat<?> v2, int v2offset, int length) {
        if (length >= FloatVector.SPECIES_PREFERRED.length()) {
            return this.squareDistancePreferred(v1, v1offset, v2, v2offset, length);
        }
        if (length < FloatVector.SPECIES_128.length()) {
            return this.squareDistance64(v1, v1offset, v2, v2offset, length);
        }
        if (length < FloatVector.SPECIES_256.length()) {
            return this.squareDistance128(v1, v1offset, v2, v2offset, length);
        }
        return this.squareDistance256(v1, v1offset, v2, v2offset, length);
    }

    float squareDistance64(VectorFloat<?> v1, int v1offset, VectorFloat<?> v2, int v2offset, int length) {
        int i;
        if (length == FloatVector.SPECIES_64.length()) {
            return this.squareDistance64(v1, v1offset, v2, v2offset);
        }
        int vectorizedLength = FloatVector.SPECIES_64.loopBound(length);
        FloatVector sum = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_64);
        for (i = 0; i < vectorizedLength; i += FloatVector.SPECIES_64.length()) {
            FloatVector a = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_64, v1, v1offset + i);
            FloatVector b = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_64, v2, v2offset + i);
            FloatVector diff = a.sub((Vector)b);
            sum = diff.fma((Vector)diff, (Vector)sum);
        }
        float res = sum.reduceLanes(VectorOperators.ADD);
        while (i < length) {
            float diff = v1.get(v1offset + i) - v2.get(v2offset + i);
            res += diff * diff;
            ++i;
        }
        return res;
    }

    float squareDistance128(VectorFloat<?> v1, int v1offset, VectorFloat<?> v2, int v2offset, int length) {
        int i;
        if (length == FloatVector.SPECIES_128.length()) {
            return this.squareDistance128(v1, v1offset, v2, v2offset);
        }
        int vectorizedLength = FloatVector.SPECIES_128.loopBound(length);
        FloatVector sum = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_128);
        for (i = 0; i < vectorizedLength; i += FloatVector.SPECIES_128.length()) {
            FloatVector a = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_128, v1, v1offset + i);
            FloatVector b = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_128, v2, v2offset + i);
            FloatVector diff = a.sub((Vector)b);
            sum = diff.fma((Vector)diff, (Vector)sum);
        }
        float res = sum.reduceLanes(VectorOperators.ADD);
        while (i < length) {
            float diff = v1.get(v1offset + i) - v2.get(v2offset + i);
            res += diff * diff;
            ++i;
        }
        return res;
    }

    float squareDistance256(VectorFloat<?> v1, int v1offset, VectorFloat<?> v2, int v2offset, int length) {
        int i;
        if (length == FloatVector.SPECIES_256.length()) {
            return this.squareDistance256(v1, v1offset, v2, v2offset);
        }
        int vectorizedLength = FloatVector.SPECIES_256.loopBound(length);
        FloatVector sum = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_256);
        for (i = 0; i < vectorizedLength; i += FloatVector.SPECIES_256.length()) {
            FloatVector a = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_256, v1, v1offset + i);
            FloatVector b = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_256, v2, v2offset + i);
            FloatVector diff = a.sub((Vector)b);
            sum = diff.fma((Vector)diff, (Vector)sum);
        }
        float res = sum.reduceLanes(VectorOperators.ADD);
        while (i < length) {
            float diff = v1.get(v1offset + i) - v2.get(v2offset + i);
            res += diff * diff;
            ++i;
        }
        return res;
    }

    float squareDistancePreferred(VectorFloat<?> v1, int v1offset, VectorFloat<?> v2, int v2offset, int length) {
        int i;
        if (length == FloatVector.SPECIES_PREFERRED.length()) {
            return this.squareDistancePreferred(v1, v1offset, v2, v2offset);
        }
        int vectorizedLength = FloatVector.SPECIES_PREFERRED.loopBound(length);
        FloatVector sum = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_PREFERRED);
        for (i = 0; i < vectorizedLength; i += FloatVector.SPECIES_PREFERRED.length()) {
            FloatVector a = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, v1, v1offset + i);
            FloatVector b = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, v2, v2offset + i);
            FloatVector diff = a.sub((Vector)b);
            sum = diff.fma((Vector)diff, (Vector)sum);
        }
        float res = sum.reduceLanes(VectorOperators.ADD);
        while (i < length) {
            float diff = v1.get(v1offset + i) - v2.get(v2offset + i);
            res += diff * diff;
            ++i;
        }
        return res;
    }

    void addInPlace64(VectorFloat<?> v1, VectorFloat<?> v2) {
        FloatVector a = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_64, v1, 0);
        FloatVector b = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_64, v2, 0);
        this.intoVectorFloat(a.add((Vector)b), v1, 0);
    }

    void addInPlace64(VectorFloat<?> v1, float value) {
        FloatVector a = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_64, v1, 0);
        this.intoVectorFloat(a.add(value), v1, 0);
    }

    @Override
    public void addInPlace(VectorFloat<?> v1, VectorFloat<?> v2) {
        int i;
        if (v1.length() != v2.length()) {
            throw new IllegalArgumentException("Vectors must have the same length");
        }
        if (v1.length() == 2) {
            this.addInPlace64(v1, v2);
            return;
        }
        int vectorizedLength = FloatVector.SPECIES_PREFERRED.loopBound(v1.length());
        for (i = 0; i < vectorizedLength; i += FloatVector.SPECIES_PREFERRED.length()) {
            FloatVector a = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, v1, i);
            FloatVector b = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, v2, i);
            this.intoVectorFloat(a.add((Vector)b), v1, i);
        }
        for (i = vectorizedLength; i < v1.length(); ++i) {
            v1.set(i, v1.get(i) + v2.get(i));
        }
    }

    @Override
    public void addInPlace(VectorFloat<?> v1, float value) {
        int i;
        if (v1.length() == 2) {
            this.addInPlace64(v1, value);
            return;
        }
        int vectorizedLength = FloatVector.SPECIES_PREFERRED.loopBound(v1.length());
        for (i = 0; i < vectorizedLength; i += FloatVector.SPECIES_PREFERRED.length()) {
            FloatVector a = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, v1, i);
            this.intoVectorFloat(a.add(value), v1, i);
        }
        for (i = vectorizedLength; i < v1.length(); ++i) {
            v1.set(i, v1.get(i) + value);
        }
    }

    @Override
    public void subInPlace(VectorFloat<?> v1, VectorFloat<?> v2) {
        int i;
        if (v1.length() != v2.length()) {
            throw new IllegalArgumentException("Vectors must have the same length");
        }
        int vectorizedLength = FloatVector.SPECIES_PREFERRED.loopBound(v1.length());
        for (i = 0; i < vectorizedLength; i += FloatVector.SPECIES_PREFERRED.length()) {
            FloatVector a = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, v1, i);
            FloatVector b = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, v2, i);
            this.intoVectorFloat(a.sub((Vector)b), v1, i);
        }
        for (i = vectorizedLength; i < v1.length(); ++i) {
            v1.set(i, v1.get(i) - v2.get(i));
        }
    }

    @Override
    public void subInPlace(VectorFloat<?> vector, float value) {
        int i;
        int vectorizedLength = FloatVector.SPECIES_PREFERRED.loopBound(vector.length());
        for (i = 0; i < vectorizedLength; i += FloatVector.SPECIES_PREFERRED.length()) {
            FloatVector a = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, vector, i);
            this.intoVectorFloat(a.sub(value), vector, i);
        }
        for (i = vectorizedLength; i < vector.length(); ++i) {
            vector.set(i, vector.get(i) - value);
        }
    }

    @Override
    public VectorFloat<?> sub(VectorFloat<?> a, float value) {
        return this.sub(a, 0, value, a.length());
    }

    @Override
    public VectorFloat<?> sub(VectorFloat<?> a, VectorFloat<?> b) {
        return this.sub(a, 0, b, 0, a.length());
    }

    @Override
    public VectorFloat<?> sub(VectorFloat<?> a, int aOffset, VectorFloat<?> b, int bOffset, int length) {
        int i;
        int vectorizedLength = FloatVector.SPECIES_PREFERRED.loopBound(length);
        VectorFloat<?> res = VectorizationProvider.getInstance().getVectorTypeSupport().createFloatVector(length);
        for (i = 0; i < vectorizedLength; i += FloatVector.SPECIES_PREFERRED.length()) {
            FloatVector lhs = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, a, aOffset + i);
            FloatVector rhs = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, b, bOffset + i);
            FloatVector subResult = lhs.sub((Vector)rhs);
            this.intoVectorFloat(subResult, res, i);
        }
        for (i = vectorizedLength; i < length; ++i) {
            res.set(i, a.get(aOffset + i) - b.get(bOffset + i));
        }
        return res;
    }

    public VectorFloat<?> sub(VectorFloat<?> a, int aOffset, float value, int length) {
        int i;
        int vectorizedLength = FloatVector.SPECIES_PREFERRED.loopBound(length);
        VectorFloat<?> res = VectorizationProvider.getInstance().getVectorTypeSupport().createFloatVector(length);
        for (i = 0; i < vectorizedLength; i += FloatVector.SPECIES_PREFERRED.length()) {
            FloatVector lhs = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, a, aOffset + i);
            FloatVector subResult = lhs.sub(value);
            this.intoVectorFloat(subResult, res, i);
        }
        for (i = vectorizedLength; i < length; ++i) {
            res.set(i, a.get(aOffset + i) - value);
        }
        return res;
    }

    @Override
    public void minInPlace(VectorFloat<?> v1, VectorFloat<?> v2) {
        int i;
        if (v1.length() != v2.length()) {
            throw new IllegalArgumentException("Vectors must have the same length");
        }
        int vectorizedLength = FloatVector.SPECIES_PREFERRED.loopBound(v1.length());
        for (i = 0; i < vectorizedLength; i += FloatVector.SPECIES_PREFERRED.length()) {
            FloatVector a = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, v1, i);
            FloatVector b = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, v2, i);
            this.intoVectorFloat(a.min((Vector)b), v1, i);
        }
        for (i = vectorizedLength; i < v1.length(); ++i) {
            v1.set(i, Math.min(v1.get(i), v2.get(i)));
        }
    }

    @Override
    public float assembleAndSum(VectorFloat<?> data, int dataBase, ByteSequence<?> baseOffsets) {
        return this.assembleAndSum(data, dataBase, baseOffsets, 0, baseOffsets.length());
    }

    @Override
    public float assembleAndSum(VectorFloat<?> data, int dataBase, ByteSequence<?> baseOffsets, int baseOffsetsOffset, int baseOffsetsLength) {
        return switch (PREFERRED_BIT_SIZE) {
            case 512 -> this.assembleAndSum512(data, dataBase, baseOffsets, baseOffsetsOffset, baseOffsetsLength);
            case 256 -> this.assembleAndSum256(data, dataBase, baseOffsets, baseOffsetsOffset, baseOffsetsLength);
            case 128 -> this.assembleAndSum128(data, dataBase, baseOffsets, baseOffsetsOffset, baseOffsetsLength);
            default -> throw new IllegalStateException("Unsupported vector width: " + PREFERRED_BIT_SIZE);
        };
    }

    float assembleAndSum512(VectorFloat<?> data, int dataBase, ByteSequence<?> baseOffsets, int baseOffsetsOffset, int baseOffsetsLength) {
        int i;
        int[] convOffsets = scratchInt512.get();
        FloatVector sum = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_512);
        int limit = ByteVector.SPECIES_128.loopBound(baseOffsetsLength);
        IntVector scale = IntVector.zero((VectorSpecies)IntVector.SPECIES_512).addIndex(dataBase);
        for (i = 0; i < limit; i += ByteVector.SPECIES_128.length()) {
            this.fromByteSequence((VectorSpecies<Byte>)ByteVector.SPECIES_128, baseOffsets, i + baseOffsets.offset() + baseOffsetsOffset).convertShape(VectorOperators.B2I, IntVector.SPECIES_512, 0).lanewise((VectorOperators.Binary)VectorOperators.AND, (Vector)BYTE_TO_INT_MASK_512).reinterpretAsInts().add((Vector)scale).intoArray(convOffsets, 0);
            int offset = i * dataBase;
            sum = sum.add((Vector)this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_512, data, offset, convOffsets, 0));
        }
        float res = sum.reduceLanes(VectorOperators.ADD);
        while (i < baseOffsetsLength) {
            res += data.get(dataBase * i + Byte.toUnsignedInt(baseOffsets.get(i + baseOffsetsOffset)));
            ++i;
        }
        return res;
    }

    float assembleAndSum256(VectorFloat<?> data, int dataBase, ByteSequence<?> baseOffsets, int baseOffsetsOffset, int baseOffsetsLength) {
        int i;
        int[] convOffsets = scratchInt256.get();
        FloatVector sum = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_256);
        int limit = ByteVector.SPECIES_64.loopBound(baseOffsetsLength);
        IntVector scale = IntVector.zero((VectorSpecies)IntVector.SPECIES_256).addIndex(dataBase);
        for (i = 0; i < limit; i += ByteVector.SPECIES_64.length()) {
            this.fromByteSequence((VectorSpecies<Byte>)ByteVector.SPECIES_64, baseOffsets, i + baseOffsets.offset() + baseOffsetsOffset).convertShape(VectorOperators.B2I, IntVector.SPECIES_256, 0).lanewise((VectorOperators.Binary)VectorOperators.AND, (Vector)BYTE_TO_INT_MASK_256).reinterpretAsInts().add((Vector)scale).intoArray(convOffsets, 0);
            int offset = i * dataBase;
            sum = sum.add((Vector)this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_256, data, offset, convOffsets, 0));
        }
        float res = sum.reduceLanes(VectorOperators.ADD);
        while (i < baseOffsetsLength) {
            res += data.get(dataBase * i + Byte.toUnsignedInt(baseOffsets.get(i + baseOffsetsOffset)));
            ++i;
        }
        return res;
    }

    float assembleAndSum128(VectorFloat<?> data, int dataBase, ByteSequence<?> baseOffsets, int baseOffsetsOffset, int baseOffsetsLength) {
        float sum = 0.0f;
        for (int i = 0; i < baseOffsetsLength; ++i) {
            sum += data.get(dataBase * i + Byte.toUnsignedInt(baseOffsets.get(i + baseOffsetsOffset)));
        }
        return sum;
    }

    @Override
    public float assembleAndSumPQ(VectorFloat<?> codebookPartialSums, int subspaceCount, ByteSequence<?> vector1Ordinals, int vector1OrdinalOffset, ByteSequence<?> vector2Ordinals, int vector2OrdinalOffset, int clusterCount) {
        return switch (PREFERRED_BIT_SIZE) {
            case 512 -> this.assembleAndSumPQ_512(codebookPartialSums, subspaceCount, vector1Ordinals, vector1OrdinalOffset, vector2Ordinals, vector2OrdinalOffset, clusterCount);
            case 256 -> this.assembleAndSumPQ_256(codebookPartialSums, subspaceCount, vector1Ordinals, vector1OrdinalOffset, vector2Ordinals, vector2OrdinalOffset, clusterCount);
            case 128 -> this.assembleAndSumPQ_128(codebookPartialSums, subspaceCount, vector1Ordinals, vector1OrdinalOffset, vector2Ordinals, vector2OrdinalOffset, clusterCount);
            default -> throw new IllegalStateException("Unsupported vector width: " + PREFERRED_BIT_SIZE);
        };
    }

    float assembleAndSumPQ_128(VectorFloat<?> data, int subspaceCount, ByteSequence<?> baseOffsets1, int baseOffsetsOffset1, ByteSequence<?> baseOffsets2, int baseOffsetsOffset2, int clusterCount) {
        int k = clusterCount;
        int blockSize = k * (k + 1) / 2;
        float res = 0.0f;
        for (int i = 0; i < subspaceCount; ++i) {
            int c1 = Byte.toUnsignedInt(baseOffsets1.get(i + baseOffsetsOffset1));
            int c2 = Byte.toUnsignedInt(baseOffsets2.get(i + baseOffsetsOffset2));
            int r = Math.min(c1, c2);
            int c = Math.max(c1, c2);
            int offsetRow = r * k - r * (r - 1) / 2;
            int idxInBlock = offsetRow + (c - r);
            int base = i * blockSize;
            res += data.get(base + idxInBlock);
        }
        return res;
    }

    float assembleAndSumPQ_256(VectorFloat<?> data, int subspaceCount, ByteSequence<?> baseOffsets1, int baseOffsetsOffset1, ByteSequence<?> baseOffsets2, int baseOffsetsOffset2, int clusterCount) {
        VectorSpecies FSPECIES = FloatVector.SPECIES_256;
        int LANES = FSPECIES.length();
        int k = clusterCount;
        int blockSize = k * (k + 1) / 2;
        int M = subspaceCount;
        int[] convOffsets = scratchInt256.get();
        FloatVector sum = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_256);
        FloatVector scale = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_256).addIndex(blockSize);
        FloatVector kvec = FloatVector.broadcast((VectorSpecies)FloatVector.SPECIES_256, (long)k);
        FloatVector onevec = FloatVector.broadcast((VectorSpecies)FloatVector.SPECIES_256, (long)1L);
        FloatVector twovec = FloatVector.broadcast((VectorSpecies)FloatVector.SPECIES_256, (float)0.5f);
        int i = 0;
        while (i + LANES <= M) {
            FloatVector c1v = this.fromByteSequence((VectorSpecies<Byte>)ByteVector.SPECIES_64, baseOffsets1, i + baseOffsets1.offset() + baseOffsetsOffset1).convertShape(VectorOperators.B2I, IntVector.SPECIES_256, 0).lanewise((VectorOperators.Binary)VectorOperators.AND, (Vector)BYTE_TO_INT_MASK_256).convertShape(VectorOperators.I2F, FSPECIES, 0).reinterpretAsFloats();
            FloatVector c2v = this.fromByteSequence((VectorSpecies<Byte>)ByteVector.SPECIES_64, baseOffsets2, i + baseOffsets2.offset() + baseOffsetsOffset2).convertShape(VectorOperators.B2I, IntVector.SPECIES_256, 0).lanewise((VectorOperators.Binary)VectorOperators.AND, (Vector)BYTE_TO_INT_MASK_256).convertShape(VectorOperators.I2F, FSPECIES, 0).reinterpretAsFloats();
            FloatVector r = c1v.min((Vector)c2v);
            FloatVector c = c1v.max((Vector)c2v);
            FloatVector rk = r.mul((Vector)kvec);
            FloatVector triangular = r.mul((Vector)r.sub((Vector)onevec)).mul((Vector)twovec);
            FloatVector offsetRow = rk.sub((Vector)triangular);
            offsetRow.add((Vector)c.sub((Vector)r)).add((Vector)scale).convertShape(VectorOperators.F2I, IntVector.SPECIES_256, 0).reinterpretAsInts().intoArray(convOffsets, 0);
            FloatVector chunk = this.fromVectorFloat((VectorSpecies<Float>)FSPECIES, data, i * blockSize, convOffsets, 0);
            sum = sum.add((Vector)chunk);
            i += LANES;
        }
        float res = sum.reduceLanes(VectorOperators.ADD);
        for (int i2 = M / LANES * LANES; i2 < M; ++i2) {
            int c1 = Byte.toUnsignedInt(baseOffsets1.get(i2 + baseOffsetsOffset1));
            int c2 = Byte.toUnsignedInt(baseOffsets2.get(i2 + baseOffsetsOffset2));
            int r = Math.min(c1, c2);
            int c = Math.max(c1, c2);
            int offsetRow = r * k - r * (r - 1) / 2;
            int idxInBlock = offsetRow + (c - r);
            int base = i2 * blockSize;
            res += data.get(base + idxInBlock);
        }
        return res;
    }

    float assembleAndSumPQ_512(VectorFloat<?> data, int subspaceCount, ByteSequence<?> baseOffsets1, int baseOffsetsOffset1, ByteSequence<?> baseOffsets2, int baseOffsetsOffset2, int clusterCount) {
        VectorSpecies FSPECIES = FloatVector.SPECIES_512;
        int LANES = FSPECIES.length();
        int k = clusterCount;
        int blockSize = k * (k + 1) / 2;
        int M = subspaceCount;
        int[] convOffsets = scratchInt512.get();
        FloatVector sum = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_512);
        FloatVector scale = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_512).addIndex(blockSize);
        FloatVector kvec = FloatVector.broadcast((VectorSpecies)FloatVector.SPECIES_512, (long)k);
        FloatVector onevec = FloatVector.broadcast((VectorSpecies)FloatVector.SPECIES_512, (long)1L);
        FloatVector twovec = FloatVector.broadcast((VectorSpecies)FloatVector.SPECIES_512, (float)0.5f);
        int i = 0;
        while (i + LANES <= M) {
            FloatVector c1v = this.fromByteSequence((VectorSpecies<Byte>)ByteVector.SPECIES_128, baseOffsets1, i + baseOffsets1.offset() + baseOffsetsOffset1).convertShape(VectorOperators.B2I, IntVector.SPECIES_512, 0).lanewise((VectorOperators.Binary)VectorOperators.AND, (Vector)BYTE_TO_INT_MASK_512).convertShape(VectorOperators.I2F, FSPECIES, 0).reinterpretAsFloats();
            FloatVector c2v = this.fromByteSequence((VectorSpecies<Byte>)ByteVector.SPECIES_128, baseOffsets2, i + baseOffsets2.offset() + baseOffsetsOffset2).convertShape(VectorOperators.B2I, IntVector.SPECIES_512, 0).lanewise((VectorOperators.Binary)VectorOperators.AND, (Vector)BYTE_TO_INT_MASK_512).convertShape(VectorOperators.I2F, FSPECIES, 0).reinterpretAsFloats();
            FloatVector r = c1v.min((Vector)c2v);
            FloatVector c = c1v.max((Vector)c2v);
            FloatVector rk = r.mul((Vector)kvec);
            FloatVector triangular = r.mul((Vector)r.sub((Vector)onevec)).mul((Vector)twovec);
            FloatVector offsetRow = rk.sub((Vector)triangular);
            offsetRow.add((Vector)c.sub((Vector)r)).add((Vector)scale).convertShape(VectorOperators.F2I, IntVector.SPECIES_512, 0).reinterpretAsInts().intoArray(convOffsets, 0);
            FloatVector chunk = this.fromVectorFloat((VectorSpecies<Float>)FSPECIES, data, i * blockSize, convOffsets, 0);
            sum = sum.add((Vector)chunk);
            i += LANES;
        }
        float res = sum.reduceLanes(VectorOperators.ADD);
        for (int i2 = M / LANES * LANES; i2 < M; ++i2) {
            int c1 = Byte.toUnsignedInt(baseOffsets1.get(i2 + baseOffsetsOffset1));
            int c2 = Byte.toUnsignedInt(baseOffsets2.get(i2 + baseOffsetsOffset2));
            int r = Math.min(c1, c2);
            int c = Math.max(c1, c2);
            int offsetRow = r * k - r * (r - 1) / 2;
            int idxInBlock = offsetRow + (c - r);
            int base = i2 * blockSize;
            res += data.get(base + idxInBlock);
        }
        return res;
    }

    @Override
    public int hammingDistance(long[] a, long[] b) {
        LongVector sum = LongVector.zero((VectorSpecies)LongVector.SPECIES_PREFERRED);
        int vectorizedLength = LongVector.SPECIES_PREFERRED.loopBound(a.length);
        for (int i = 0; i < vectorizedLength; i += LongVector.SPECIES_PREFERRED.length()) {
            LongVector va = LongVector.fromArray((VectorSpecies)LongVector.SPECIES_PREFERRED, (long[])a, (int)i);
            LongVector vb = LongVector.fromArray((VectorSpecies)LongVector.SPECIES_PREFERRED, (long[])b, (int)i);
            LongVector xorResult = va.lanewise((VectorOperators.Binary)VectorOperators.XOR, (Vector)vb);
            sum = sum.add((Vector)xorResult.lanewise(VectorOperators.BIT_COUNT));
        }
        int res = (int)sum.reduceLanes(VectorOperators.ADD);
        for (int i = vectorizedLength; i < a.length; ++i) {
            res += Long.bitCount(a[i] ^ b[i]);
        }
        return res;
    }

    @Override
    public float max(VectorFloat<?> v) {
        FloatVector accum = FloatVector.broadcast((VectorSpecies)FloatVector.SPECIES_PREFERRED, (float)-3.4028235E38f);
        int vectorizedLength = FloatVector.SPECIES_PREFERRED.loopBound(v.length());
        for (int i = 0; i < vectorizedLength; i += FloatVector.SPECIES_PREFERRED.length()) {
            FloatVector a = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, v, i);
            accum = accum.max((Vector)a);
        }
        float max = accum.reduceLanes(VectorOperators.MAX);
        for (int i = vectorizedLength; i < v.length(); ++i) {
            max = Math.max(max, v.get(i));
        }
        return max;
    }

    @Override
    public float min(VectorFloat<?> v) {
        FloatVector accum = FloatVector.broadcast((VectorSpecies)FloatVector.SPECIES_PREFERRED, (float)Float.MAX_VALUE);
        int vectorizedLength = FloatVector.SPECIES_PREFERRED.loopBound(v.length());
        for (int i = 0; i < vectorizedLength; i += FloatVector.SPECIES_PREFERRED.length()) {
            FloatVector a = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, v, i);
            accum = accum.min((Vector)a);
        }
        float min = accum.reduceLanes(VectorOperators.MIN);
        for (int i = vectorizedLength; i < v.length(); ++i) {
            min = Math.min(min, v.get(i));
        }
        return min;
    }

    @Override
    public void quantizePartials(float delta, VectorFloat<?> partials, VectorFloat<?> partialBases, ByteSequence<?> quantizedPartials) {
        int codebookSize = partials.length() / partialBases.length();
        int codebookCount = partialBases.length();
        for (int i = 0; i < codebookCount; ++i) {
            int j;
            int vectorizedLength = FloatVector.SPECIES_512.loopBound(codebookSize);
            float codebookBase = partialBases.get(i);
            FloatVector codebookBaseVector = FloatVector.broadcast((VectorSpecies)FloatVector.SPECIES_512, (float)codebookBase);
            for (j = 0; j < vectorizedLength; j += FloatVector.SPECIES_512.length()) {
                FloatVector partialVector = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_512, partials, i * codebookSize + j);
                FloatVector quantized = partialVector.sub((Vector)codebookBaseVector).div(delta);
                quantized = quantized.max((Vector)FloatVector.zero((VectorSpecies)FloatVector.SPECIES_512)).min((Vector)FloatVector.broadcast((VectorSpecies)FloatVector.SPECIES_512, (long)65535L));
                ShortVector quantizedBytes = (ShortVector)quantized.convertShape(VectorOperators.F2S, ShortVector.SPECIES_256, 0);
                this.intoByteSequence(quantizedBytes.reinterpretAsBytes(), quantizedPartials, 2 * (i * codebookSize + j));
            }
            while (j < codebookSize) {
                float val = partials.get(i * codebookSize + j);
                short quantized = (short)Math.min((val - codebookBase) / delta, 65535.0f);
                quantizedPartials.setLittleEndianShort(i * codebookSize + j, quantized);
                ++j;
            }
        }
    }

    @Override
    public float pqDecodedCosineSimilarity(ByteSequence<?> encoded, int encodedOffset, int encodedLength, int clusterCount, VectorFloat<?> partialSums, VectorFloat<?> aMagnitude, float bMagnitude) {
        return switch (PREFERRED_BIT_SIZE) {
            case 512 -> this.pqDecodedCosineSimilarity512(encoded, encodedOffset, encodedLength, clusterCount, partialSums, aMagnitude, bMagnitude);
            case 256 -> this.pqDecodedCosineSimilarity256(encoded, encodedOffset, encodedLength, clusterCount, partialSums, aMagnitude, bMagnitude);
            case 128 -> this.pqDecodedCosineSimilarity128(encoded, encodedOffset, encodedLength, clusterCount, partialSums, aMagnitude, bMagnitude);
            default -> throw new IllegalStateException("Unsupported vector width: " + PREFERRED_BIT_SIZE);
        };
    }

    float pqDecodedCosineSimilarity512(ByteSequence<?> baseOffsets, int baseOffsetsOffset, int baseOffsetsLength, int clusterCount, VectorFloat<?> partialSums, VectorFloat<?> aMagnitude, float bMagnitude) {
        int i;
        FloatVector sum = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_512);
        FloatVector vaMagnitude = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_512);
        int[] convOffsets = scratchInt512.get();
        int limit = i + ByteVector.SPECIES_128.loopBound(baseOffsetsLength);
        IntVector scale = IntVector.zero((VectorSpecies)IntVector.SPECIES_512).addIndex(clusterCount);
        for (i = 0; i < limit; i += ByteVector.SPECIES_128.length()) {
            this.fromByteSequence((VectorSpecies<Byte>)ByteVector.SPECIES_128, baseOffsets, i + baseOffsets.offset() + baseOffsetsOffset).convertShape(VectorOperators.B2I, IntVector.SPECIES_512, 0).lanewise((VectorOperators.Binary)VectorOperators.AND, (Vector)BYTE_TO_INT_MASK_512).reinterpretAsInts().add((Vector)scale).intoArray(convOffsets, 0);
            int offset = i * clusterCount;
            sum = sum.add((Vector)this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_512, partialSums, offset, convOffsets, 0));
            vaMagnitude = vaMagnitude.add((Vector)this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_512, aMagnitude, offset, convOffsets, 0));
        }
        float sumResult = sum.reduceLanes(VectorOperators.ADD);
        float aMagnitudeResult = vaMagnitude.reduceLanes(VectorOperators.ADD);
        while (i < baseOffsetsLength) {
            int offset = clusterCount * i + Byte.toUnsignedInt(baseOffsets.get(i + baseOffsetsOffset));
            sumResult += partialSums.get(offset);
            aMagnitudeResult += aMagnitude.get(offset);
            ++i;
        }
        return (float)((double)sumResult / Math.sqrt(aMagnitudeResult * bMagnitude));
    }

    float pqDecodedCosineSimilarity256(ByteSequence<?> baseOffsets, int baseOffsetsOffset, int baseOffsetsLength, int clusterCount, VectorFloat<?> partialSums, VectorFloat<?> aMagnitude, float bMagnitude) {
        int i;
        FloatVector sum = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_256);
        FloatVector vaMagnitude = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_256);
        int[] convOffsets = scratchInt256.get();
        int limit = ByteVector.SPECIES_64.loopBound(baseOffsetsLength);
        IntVector scale = IntVector.zero((VectorSpecies)IntVector.SPECIES_256).addIndex(clusterCount);
        for (i = 0; i < limit; i += ByteVector.SPECIES_64.length()) {
            this.fromByteSequence((VectorSpecies<Byte>)ByteVector.SPECIES_64, baseOffsets, i + baseOffsets.offset() + baseOffsetsOffset).convertShape(VectorOperators.B2I, IntVector.SPECIES_256, 0).lanewise((VectorOperators.Binary)VectorOperators.AND, (Vector)BYTE_TO_INT_MASK_256).reinterpretAsInts().add((Vector)scale).intoArray(convOffsets, 0);
            int offset = i * clusterCount;
            sum = sum.add((Vector)this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_256, partialSums, offset, convOffsets, 0));
            vaMagnitude = vaMagnitude.add((Vector)this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_256, aMagnitude, offset, convOffsets, 0));
        }
        float sumResult = sum.reduceLanes(VectorOperators.ADD);
        float aMagnitudeResult = vaMagnitude.reduceLanes(VectorOperators.ADD);
        while (i < baseOffsetsLength) {
            int offset = clusterCount * i + Byte.toUnsignedInt(baseOffsets.get(i + baseOffsetsOffset));
            sumResult += partialSums.get(offset);
            aMagnitudeResult += aMagnitude.get(offset);
            ++i;
        }
        return (float)((double)sumResult / Math.sqrt(aMagnitudeResult * bMagnitude));
    }

    float pqDecodedCosineSimilarity128(ByteSequence<?> baseOffsets, int baseOffsetsOffset, int baseOffsetsLength, int clusterCount, VectorFloat<?> partialSums, VectorFloat<?> aMagnitude, float bMagnitude) {
        float sum = 0.0f;
        float aMag = 0.0f;
        for (int m = 0; m < baseOffsetsLength; ++m) {
            int centroidIndex = Byte.toUnsignedInt(baseOffsets.get(m + baseOffsetsOffset));
            int index = m * clusterCount + centroidIndex;
            sum += partialSums.get(index);
            aMag += aMagnitude.get(index);
        }
        return (float)((double)sum / Math.sqrt(aMag * bMagnitude));
    }

    FloatVector logisticNQT(FloatVector vector, float alpha, float x0) {
        FloatVector temp = vector.fma(alpha, -alpha * x0);
        VectorMask isPositive = temp.test(VectorOperators.IS_NEGATIVE).not();
        IntVector p = temp.add(1.0f, isPositive).convert(VectorOperators.F2I, 0).reinterpretAsInts();
        FloatVector e = p.convert(VectorOperators.I2F, 0).reinterpretAsFloats();
        IntVector m = temp.sub((Vector)e).fma(0.5f, 1.0f).reinterpretAsInts();
        temp = m.add((Vector)p.lanewise(VectorOperators.LSHL, 23)).reinterpretAsFloats();
        return temp.div((Vector)temp.add(1.0f));
    }

    float logisticNQT(float value, float alpha, float x0) {
        float temp = Math.fma(value, alpha, -alpha * x0);
        int p = (int)Math.floor(temp + 1.0f);
        int m = Float.floatToIntBits(Math.fma(temp - (float)p, 0.5f, 1.0f));
        temp = Float.intBitsToFloat(m + (p << 23));
        return temp / (temp + 1.0f);
    }

    FloatVector logitNQT(FloatVector vector, float inverseAlpha, float x0) {
        FloatVector z = vector.div((Vector)const1f.sub((Vector)vector));
        IntVector temp = z.reinterpretAsInts();
        FloatVector p = temp.and(2139095040).lanewise(VectorOperators.LSHR, 23).sub(128).convert(VectorOperators.I2F, 0).reinterpretAsFloats();
        FloatVector m = temp.lanewise((VectorOperators.Binary)VectorOperators.AND, 0x7FFFFF).add(1065353216).reinterpretAsFloats();
        return m.add((Vector)p).fma(inverseAlpha, x0);
    }

    float logitNQT(float value, float inverseAlpha, float x0) {
        float z = value / (1.0f - value);
        int temp = Float.floatToIntBits(z);
        int e = temp & 0x7F800000;
        float p = (e >> 23) - 128;
        float m = Float.intBitsToFloat((temp & 0x7FFFFF) + 1065353216);
        return Math.fma(m + p, inverseAlpha, x0);
    }

    FloatVector nvqDequantize8bit(ByteVector bytes, float inverseAlpha, float x0, float logisticScale, float logisticBias, int part) {
        FloatVector arr = bytes.reinterpretAsInts().lanewise(VectorOperators.LSHR, 8 * part).lanewise((VectorOperators.Binary)VectorOperators.AND, 255).convert(VectorOperators.I2F, 0).reinterpretAsFloats();
        arr = arr.fma(logisticScale, logisticBias);
        return this.logitNQT(arr, inverseAlpha, x0);
    }

    @Override
    public void nvqQuantize8bit(VectorFloat<?> vector, float alpha, float x0, float minValue, float maxValue, ByteSequence<?> destination) {
        int vectorizedLength = FloatVector.SPECIES_PREFERRED.loopBound(vector.length());
        VectorMask mask = ByteVector.SPECIES_PREFERRED.indexInRange(0, FloatVector.SPECIES_PREFERRED.length());
        float delta = maxValue - minValue;
        float scaledAlpha = alpha / delta;
        float scaledX0 = x0 * delta;
        float logisticBias = this.logisticNQT(minValue, scaledAlpha, scaledX0);
        float invLogisticScale = 255.0f / (this.logisticNQT(maxValue, scaledAlpha, scaledX0) - logisticBias);
        for (int i = 0; i < vectorizedLength; i += FloatVector.SPECIES_PREFERRED.length()) {
            FloatVector arr = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, vector, i);
            arr = this.logisticNQT(arr, scaledAlpha, scaledX0);
            arr = arr.sub(logisticBias).mul(invLogisticScale);
            ByteVector bytes = arr.add((Vector)const05f).convertShape(VectorOperators.F2B, ByteVector.SPECIES_PREFERRED, 0).reinterpretAsBytes();
            this.intoByteSequence(bytes, destination, i, (VectorMask<Byte>)mask);
        }
        for (int d = vectorizedLength; d < vector.length(); ++d) {
            float value = vector.get(d);
            value = this.logisticNQT(value, scaledAlpha, scaledX0);
            value = (value - logisticBias) * invLogisticScale;
            int quantizedValue = Math.round(value);
            destination.set(d, (byte)quantizedValue);
        }
    }

    @Override
    public float nvqLoss(VectorFloat<?> vector, float alpha, float x0, float minValue, float maxValue, int nBits) {
        int constant = (1 << nBits) - 1;
        int vectorizedLength = FloatVector.SPECIES_PREFERRED.loopBound(vector.length());
        FloatVector squaredSumVec = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_PREFERRED);
        float delta = maxValue - minValue;
        float scaledAlpha = alpha / delta;
        float invScaledAlpha = 1.0f / scaledAlpha;
        float scaledX0 = x0 * delta;
        float logisticBias = this.logisticNQT(minValue, scaledAlpha, scaledX0);
        float logisticScale = (this.logisticNQT(maxValue, scaledAlpha, scaledX0) - logisticBias) / (float)constant;
        float invLogisticScale = 1.0f / logisticScale;
        for (int i = 0; i < vectorizedLength; i += FloatVector.SPECIES_PREFERRED.length()) {
            FloatVector arr = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, vector, i);
            FloatVector recArr = this.logisticNQT(arr, scaledAlpha, scaledX0);
            recArr = recArr.sub(logisticBias).mul(invLogisticScale);
            recArr = recArr.add((Vector)const05f).convert(VectorOperators.F2I, 0).reinterpretAsInts().convert(VectorOperators.I2F, 0).reinterpretAsFloats();
            recArr = recArr.fma(logisticScale, logisticBias);
            recArr = this.logitNQT(recArr, invScaledAlpha, scaledX0);
            FloatVector diff = arr.sub((Vector)recArr);
            squaredSumVec = diff.fma((Vector)diff, (Vector)squaredSumVec);
        }
        float squaredSum = squaredSumVec.reduceLanes(VectorOperators.ADD);
        for (int i = vectorizedLength; i < vector.length(); ++i) {
            float value = vector.get(i);
            float recValue = this.logisticNQT(value, scaledAlpha, scaledX0);
            recValue = (recValue - logisticBias) * invLogisticScale;
            recValue = Math.round(recValue);
            recValue = Math.fma(logisticScale, recValue, logisticBias);
            recValue = this.logitNQT(recValue, invScaledAlpha, scaledX0);
            squaredSum += MathUtil.square(value - recValue);
        }
        return squaredSum;
    }

    @Override
    public float nvqUniformLoss(VectorFloat<?> vector, float minValue, float maxValue, int nBits) {
        float constant = (1 << nBits) - 1;
        float delta = maxValue - minValue;
        int vectorizedLength = FloatVector.SPECIES_PREFERRED.loopBound(vector.length());
        FloatVector squaredSumVec = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_PREFERRED);
        for (int i = 0; i < vectorizedLength; i += FloatVector.SPECIES_PREFERRED.length()) {
            FloatVector arr = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, vector, i);
            FloatVector recArr = arr.sub(minValue).mul(constant / delta);
            recArr = recArr.add((Vector)const05f).convert(VectorOperators.F2I, 0).reinterpretAsInts().convert(VectorOperators.I2F, 0).reinterpretAsFloats();
            recArr = recArr.fma(delta / constant, minValue);
            FloatVector diff = arr.sub((Vector)recArr);
            squaredSumVec = diff.fma((Vector)diff, (Vector)squaredSumVec);
        }
        float squaredSum = squaredSumVec.reduceLanes(VectorOperators.ADD);
        for (int i = vectorizedLength; i < vector.length(); ++i) {
            float value = vector.get(i);
            float recValue = (value - minValue) / delta;
            recValue = (float)Math.round(constant * recValue) / constant;
            recValue = recValue * delta + minValue;
            squaredSum += MathUtil.square(value - recValue);
        }
        return squaredSum;
    }

    @Override
    public float nvqSquareL2Distance8bit(VectorFloat<?> vector, ByteSequence<?> quantizedVector, float alpha, float x0, float minValue, float maxValue) {
        FloatVector squaredSumVec = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_PREFERRED);
        int vectorizedLength = ByteVector.SPECIES_PREFERRED.loopBound(quantizedVector.length());
        int floatStep = FloatVector.SPECIES_PREFERRED.length();
        float delta = maxValue - minValue;
        float scaledAlpha = alpha / delta;
        float invScaledAlpha = 1.0f / scaledAlpha;
        float scaledX0 = x0 * delta;
        float logisticBias = this.logisticNQT(minValue, scaledAlpha, scaledX0);
        float logisticScale = (this.logisticNQT(maxValue, scaledAlpha, scaledX0) - logisticBias) / 255.0f;
        for (int i = 0; i < vectorizedLength; i += ByteVector.SPECIES_PREFERRED.length()) {
            ByteVector byteArr = this.fromByteSequence((VectorSpecies<Byte>)ByteVector.SPECIES_PREFERRED, quantizedVector, i);
            for (int j = 0; j < 4; ++j) {
                FloatVector v1 = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, vector, i + floatStep * j);
                FloatVector v2 = this.nvqDequantize8bit(byteArr, invScaledAlpha, scaledX0, logisticScale, logisticBias, j);
                FloatVector diff = v1.sub((Vector)v2);
                squaredSumVec = diff.fma((Vector)diff, (Vector)squaredSumVec);
            }
        }
        float squaredSum = squaredSumVec.reduceLanes(VectorOperators.ADD);
        for (int i = vectorizedLength; i < quantizedVector.length(); ++i) {
            float value2 = Byte.toUnsignedInt(quantizedVector.get(i));
            value2 = Math.fma(logisticScale, value2, logisticBias);
            value2 = this.logitNQT(value2, invScaledAlpha, scaledX0);
            float diff = vector.get(i) - value2;
            squaredSum += MathUtil.square(diff);
        }
        return squaredSum;
    }

    @Override
    public float nvqDotProduct8bit(VectorFloat<?> vector, ByteSequence<?> quantizedVector, float alpha, float x0, float minValue, float maxValue) {
        FloatVector dotProdVec = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_PREFERRED);
        int vectorizedLength = ByteVector.SPECIES_PREFERRED.loopBound(quantizedVector.length());
        int floatStep = FloatVector.SPECIES_PREFERRED.length();
        float delta = maxValue - minValue;
        float scaledAlpha = alpha / delta;
        float invScaledAlpha = 1.0f / scaledAlpha;
        float scaledX0 = x0 * delta;
        float logisticBias = this.logisticNQT(minValue, scaledAlpha, scaledX0);
        float logisticScale = (this.logisticNQT(maxValue, scaledAlpha, scaledX0) - logisticBias) / 255.0f;
        for (int i = 0; i < vectorizedLength; i += ByteVector.SPECIES_PREFERRED.length()) {
            ByteVector byteArr = this.fromByteSequence((VectorSpecies<Byte>)ByteVector.SPECIES_PREFERRED, quantizedVector, i);
            for (int j = 0; j < 4; ++j) {
                FloatVector v1 = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, vector, i + floatStep * j);
                FloatVector v2 = this.nvqDequantize8bit(byteArr, invScaledAlpha, scaledX0, logisticScale, logisticBias, j);
                dotProdVec = v1.fma((Vector)v2, (Vector)dotProdVec);
            }
        }
        float dotProd = dotProdVec.reduceLanes(VectorOperators.ADD);
        for (int i = vectorizedLength; i < quantizedVector.length(); ++i) {
            float value2 = Byte.toUnsignedInt(quantizedVector.get(i));
            value2 = Math.fma(logisticScale, value2, logisticBias);
            value2 = this.logitNQT(value2, invScaledAlpha, scaledX0);
            dotProd = Math.fma(vector.get(i), value2, dotProd);
        }
        return dotProd;
    }

    @Override
    public float[] nvqCosine8bit(VectorFloat<?> vector, ByteSequence<?> quantizedVector, float alpha, float x0, float minValue, float maxValue, VectorFloat<?> centroid) {
        if (vector.length() != centroid.length()) {
            throw new IllegalArgumentException("Vectors must have the same length");
        }
        float delta = maxValue - minValue;
        float scaledAlpha = alpha / delta;
        float invScaledAlpha = 1.0f / scaledAlpha;
        float scaledX0 = x0 * delta;
        float logisticBias = this.logisticNQT(minValue, scaledAlpha, scaledX0);
        float logisticScale = (this.logisticNQT(maxValue, scaledAlpha, scaledX0) - logisticBias) / 255.0f;
        FloatVector vsum = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_PREFERRED);
        FloatVector vbMagnitude = FloatVector.zero((VectorSpecies)FloatVector.SPECIES_PREFERRED);
        int vectorizedLength = ByteVector.SPECIES_PREFERRED.loopBound(vector.length());
        int floatStep = FloatVector.SPECIES_PREFERRED.length();
        for (int i = 0; i < vectorizedLength; i += ByteVector.SPECIES_PREFERRED.length()) {
            ByteVector byteArr = this.fromByteSequence((VectorSpecies<Byte>)ByteVector.SPECIES_PREFERRED, quantizedVector, i);
            for (int j = 0; j < 4; ++j) {
                FloatVector va = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, vector, i + floatStep * j);
                FloatVector vb = this.nvqDequantize8bit(byteArr, invScaledAlpha, scaledX0, logisticScale, logisticBias, j);
                FloatVector vCentroid = this.fromVectorFloat((VectorSpecies<Float>)FloatVector.SPECIES_PREFERRED, centroid, i + floatStep * j);
                vb = vb.add((Vector)vCentroid);
                vsum = va.fma((Vector)vb, (Vector)vsum);
                vbMagnitude = vb.fma((Vector)vb, (Vector)vbMagnitude);
            }
        }
        float sum = vsum.reduceLanes(VectorOperators.ADD);
        float bMagnitude = vbMagnitude.reduceLanes(VectorOperators.ADD);
        for (int i = vectorizedLength; i < vector.length(); ++i) {
            float value2 = Byte.toUnsignedInt(quantizedVector.get(i));
            value2 = Math.fma(logisticScale, value2, logisticBias);
            value2 = this.logitNQT(value2, invScaledAlpha, scaledX0) + centroid.get(i);
            sum = Math.fma(vector.get(i), value2, sum);
            bMagnitude = Math.fma(value2, value2, bMagnitude);
        }
        return new float[]{sum, bMagnitude};
    }

    void transpose(VectorFloat<?> arr, int first, int last, int nRows) {
        int mn1 = last - first - 1;
        int n = (last - first) / nRows;
        boolean[] visited = new boolean[last - first];
        int cycle = first;
        while (++cycle != last) {
            if (visited[cycle - first]) continue;
            int a = cycle - first;
            do {
                a = a == mn1 ? mn1 : n * a % mn1;
                float temp = arr.get(first + a);
                arr.set(first + a, arr.get(cycle));
                arr.set(cycle, temp);
                visited[a] = true;
            } while (first + a != cycle);
        }
    }

    @Override
    public void nvqShuffleQueryInPlace8bit(VectorFloat<?> vector) {
        int vectorizedLength = FloatVector.SPECIES_PREFERRED.loopBound(vector.length());
        int step = FloatVector.SPECIES_PREFERRED.length() * 4;
        int i = 0;
        while (i + step <= vectorizedLength) {
            this.transpose(vector, i, i + step, 4);
            i += step;
        }
    }

    @Override
    public void calculatePartialSums(VectorFloat<?> codebook, int codebookIndex, int size, int clusterCount, VectorFloat<?> query, int queryOffset, VectorSimilarityFunction vsf, VectorFloat<?> partialSums) {
        int codebookBase = codebookIndex * clusterCount;
        block4: for (int i = 0; i < clusterCount; ++i) {
            switch (vsf) {
                case DOT_PRODUCT: {
                    partialSums.set(codebookBase + i, this.dotProduct(codebook, i * size, query, queryOffset, size));
                    continue block4;
                }
                case EUCLIDEAN: {
                    partialSums.set(codebookBase + i, this.squareDistance(codebook, i * size, query, queryOffset, size));
                    continue block4;
                }
                default: {
                    throw new UnsupportedOperationException("Unsupported similarity function " + String.valueOf((Object)vsf));
                }
            }
        }
    }

    @Override
    public void calculatePartialSums(VectorFloat<?> codebook, int codebookIndex, int size, int clusterCount, VectorFloat<?> query, int queryOffset, VectorSimilarityFunction vsf, VectorFloat<?> partialSums, VectorFloat<?> partialBest) {
        float best = vsf == VectorSimilarityFunction.EUCLIDEAN ? Float.MAX_VALUE : -3.4028235E38f;
        int codebookBase = codebookIndex * clusterCount;
        block4: for (int i = 0; i < clusterCount; ++i) {
            switch (vsf) {
                case DOT_PRODUCT: {
                    float val = this.dotProduct(codebook, i * size, query, queryOffset, size);
                    partialSums.set(codebookBase + i, val);
                    best = Math.max(best, val);
                    continue block4;
                }
                case EUCLIDEAN: {
                    float val = this.squareDistance(codebook, i * size, query, queryOffset, size);
                    partialSums.set(codebookBase + i, val);
                    best = Math.min(best, val);
                    continue block4;
                }
                default: {
                    throw new UnsupportedOperationException("Unsupported similarity function " + String.valueOf((Object)vsf));
                }
            }
        }
        partialBest.set(codebookIndex, best);
    }

    @Override
    public float pqDecodedCosineSimilarity(ByteSequence<?> encoded, int clusterCount, VectorFloat<?> partialSums, VectorFloat<?> aMagnitude, float bMagnitude) {
        return this.pqDecodedCosineSimilarity(encoded, 0, encoded.length(), clusterCount, partialSums, aMagnitude, bMagnitude);
    }
}

