/*
 * Decompiled with CFR 0.152.
 */
package org.elasticsearch.simdvec.internal.vectorization;

import jdk.incubator.vector.ByteVector;
import jdk.incubator.vector.IntVector;
import jdk.incubator.vector.LongVector;
import jdk.incubator.vector.Vector;
import jdk.incubator.vector.VectorOperators;
import jdk.incubator.vector.VectorShape;
import jdk.incubator.vector.VectorSpecies;
import org.apache.lucene.util.Constants;
import org.elasticsearch.simdvec.internal.vectorization.DefaultESVectorUtilSupport;
import org.elasticsearch.simdvec.internal.vectorization.ESVectorUtilSupport;

public final class PanamaESVectorUtilSupport
implements ESVectorUtilSupport {
    static final int VECTOR_BITSIZE = VectorShape.preferredShape().vectorBitSize();
    static final boolean HAS_FAST_INTEGER_VECTORS;
    private static final VectorSpecies<Byte> BYTE_SPECIES_128;
    private static final VectorSpecies<Byte> BYTE_SPECIES_256;

    @Override
    public long ipByteBinByte(byte[] q, byte[] d) {
        if (d.length >= 16 && HAS_FAST_INTEGER_VECTORS) {
            if (VECTOR_BITSIZE >= 256) {
                return PanamaESVectorUtilSupport.ipByteBin256(q, d);
            }
            if (VECTOR_BITSIZE == 128) {
                return PanamaESVectorUtilSupport.ipByteBin128(q, d);
            }
        }
        return DefaultESVectorUtilSupport.ipByteBinByteImpl(q, d);
    }

    static long ipByteBin256(byte[] q, byte[] d) {
        LongVector vd;
        LongVector vq3;
        LongVector vq2;
        LongVector vq1;
        LongVector vq0;
        int i;
        long subRet0 = 0L;
        long subRet1 = 0L;
        long subRet2 = 0L;
        long subRet3 = 0L;
        if (d.length >= ByteVector.SPECIES_256.vectorByteSize() * 2) {
            int limit = ByteVector.SPECIES_256.loopBound(d.length);
            LongVector sum0 = LongVector.zero((VectorSpecies)LongVector.SPECIES_256);
            LongVector sum1 = LongVector.zero((VectorSpecies)LongVector.SPECIES_256);
            LongVector sum2 = LongVector.zero((VectorSpecies)LongVector.SPECIES_256);
            LongVector sum3 = LongVector.zero((VectorSpecies)LongVector.SPECIES_256);
            for (i = 0; i < limit; i += ByteVector.SPECIES_256.length()) {
                vq0 = ByteVector.fromArray(BYTE_SPECIES_256, (byte[])q, (int)i).reinterpretAsLongs();
                vq1 = ByteVector.fromArray(BYTE_SPECIES_256, (byte[])q, (int)(i + d.length)).reinterpretAsLongs();
                vq2 = ByteVector.fromArray(BYTE_SPECIES_256, (byte[])q, (int)(i + d.length * 2)).reinterpretAsLongs();
                vq3 = ByteVector.fromArray(BYTE_SPECIES_256, (byte[])q, (int)(i + d.length * 3)).reinterpretAsLongs();
                vd = ByteVector.fromArray(BYTE_SPECIES_256, (byte[])d, (int)i).reinterpretAsLongs();
                sum0 = sum0.add((Vector)vq0.and((Vector)vd).lanewise(VectorOperators.BIT_COUNT));
                sum1 = sum1.add((Vector)vq1.and((Vector)vd).lanewise(VectorOperators.BIT_COUNT));
                sum2 = sum2.add((Vector)vq2.and((Vector)vd).lanewise(VectorOperators.BIT_COUNT));
                sum3 = sum3.add((Vector)vq3.and((Vector)vd).lanewise(VectorOperators.BIT_COUNT));
            }
            subRet0 += sum0.reduceLanes(VectorOperators.ADD);
            subRet1 += sum1.reduceLanes(VectorOperators.ADD);
            subRet2 += sum2.reduceLanes(VectorOperators.ADD);
            subRet3 += sum3.reduceLanes(VectorOperators.ADD);
        }
        if (d.length - i >= ByteVector.SPECIES_128.vectorByteSize()) {
            LongVector sum0 = LongVector.zero((VectorSpecies)LongVector.SPECIES_128);
            LongVector sum1 = LongVector.zero((VectorSpecies)LongVector.SPECIES_128);
            LongVector sum2 = LongVector.zero((VectorSpecies)LongVector.SPECIES_128);
            LongVector sum3 = LongVector.zero((VectorSpecies)LongVector.SPECIES_128);
            int limit = ByteVector.SPECIES_128.loopBound(d.length);
            while (i < limit) {
                vq0 = ByteVector.fromArray(BYTE_SPECIES_128, (byte[])q, (int)i).reinterpretAsLongs();
                vq1 = ByteVector.fromArray(BYTE_SPECIES_128, (byte[])q, (int)(i + d.length)).reinterpretAsLongs();
                vq2 = ByteVector.fromArray(BYTE_SPECIES_128, (byte[])q, (int)(i + d.length * 2)).reinterpretAsLongs();
                vq3 = ByteVector.fromArray(BYTE_SPECIES_128, (byte[])q, (int)(i + d.length * 3)).reinterpretAsLongs();
                vd = ByteVector.fromArray(BYTE_SPECIES_128, (byte[])d, (int)i).reinterpretAsLongs();
                sum0 = sum0.add((Vector)vq0.and((Vector)vd).lanewise(VectorOperators.BIT_COUNT));
                sum1 = sum1.add((Vector)vq1.and((Vector)vd).lanewise(VectorOperators.BIT_COUNT));
                sum2 = sum2.add((Vector)vq2.and((Vector)vd).lanewise(VectorOperators.BIT_COUNT));
                sum3 = sum3.add((Vector)vq3.and((Vector)vd).lanewise(VectorOperators.BIT_COUNT));
                i += ByteVector.SPECIES_128.length();
            }
            subRet0 += sum0.reduceLanes(VectorOperators.ADD);
            subRet1 += sum1.reduceLanes(VectorOperators.ADD);
            subRet2 += sum2.reduceLanes(VectorOperators.ADD);
            subRet3 += sum3.reduceLanes(VectorOperators.ADD);
        }
        while (i < d.length) {
            subRet0 += (long)Integer.bitCount(q[i] & d[i] & 0xFF);
            subRet1 += (long)Integer.bitCount(q[i + d.length] & d[i] & 0xFF);
            subRet2 += (long)Integer.bitCount(q[i + 2 * d.length] & d[i] & 0xFF);
            subRet3 += (long)Integer.bitCount(q[i + 3 * d.length] & d[i] & 0xFF);
            ++i;
        }
        return subRet0 + (subRet1 << 1) + (subRet2 << 2) + (subRet3 << 3);
    }

    public static long ipByteBin128(byte[] q, byte[] d) {
        int i;
        long subRet0 = 0L;
        long subRet1 = 0L;
        long subRet2 = 0L;
        long subRet3 = 0L;
        IntVector sum0 = IntVector.zero((VectorSpecies)IntVector.SPECIES_128);
        IntVector sum1 = IntVector.zero((VectorSpecies)IntVector.SPECIES_128);
        IntVector sum2 = IntVector.zero((VectorSpecies)IntVector.SPECIES_128);
        IntVector sum3 = IntVector.zero((VectorSpecies)IntVector.SPECIES_128);
        int limit = ByteVector.SPECIES_128.loopBound(d.length);
        for (i = 0; i < limit; i += ByteVector.SPECIES_128.length()) {
            IntVector vd = ByteVector.fromArray(BYTE_SPECIES_128, (byte[])d, (int)i).reinterpretAsInts();
            IntVector vq0 = ByteVector.fromArray(BYTE_SPECIES_128, (byte[])q, (int)i).reinterpretAsInts();
            IntVector vq1 = ByteVector.fromArray(BYTE_SPECIES_128, (byte[])q, (int)(i + d.length)).reinterpretAsInts();
            IntVector vq2 = ByteVector.fromArray(BYTE_SPECIES_128, (byte[])q, (int)(i + d.length * 2)).reinterpretAsInts();
            IntVector vq3 = ByteVector.fromArray(BYTE_SPECIES_128, (byte[])q, (int)(i + d.length * 3)).reinterpretAsInts();
            sum0 = sum0.add((Vector)vd.and((Vector)vq0).lanewise(VectorOperators.BIT_COUNT));
            sum1 = sum1.add((Vector)vd.and((Vector)vq1).lanewise(VectorOperators.BIT_COUNT));
            sum2 = sum2.add((Vector)vd.and((Vector)vq2).lanewise(VectorOperators.BIT_COUNT));
            sum3 = sum3.add((Vector)vd.and((Vector)vq3).lanewise(VectorOperators.BIT_COUNT));
        }
        subRet0 += (long)sum0.reduceLanes(VectorOperators.ADD);
        subRet1 += (long)sum1.reduceLanes(VectorOperators.ADD);
        subRet2 += (long)sum2.reduceLanes(VectorOperators.ADD);
        subRet3 += (long)sum3.reduceLanes(VectorOperators.ADD);
        while (i < d.length) {
            byte dValue = d[i];
            subRet0 += (long)Integer.bitCount(dValue & q[i] & 0xFF);
            subRet1 += (long)Integer.bitCount(dValue & q[i + d.length] & 0xFF);
            subRet2 += (long)Integer.bitCount(dValue & q[i + 2 * d.length] & 0xFF);
            subRet3 += (long)Integer.bitCount(dValue & q[i + 3 * d.length] & 0xFF);
            ++i;
        }
        return subRet0 + (subRet1 << 1) + (subRet2 << 2) + (subRet3 << 3);
    }

    static {
        boolean isAMD64withoutAVX2 = Constants.OS_ARCH.equals("amd64") && VECTOR_BITSIZE < 256;
        HAS_FAST_INTEGER_VECTORS = !isAMD64withoutAVX2;
        BYTE_SPECIES_128 = ByteVector.SPECIES_128;
        BYTE_SPECIES_256 = ByteVector.SPECIES_256;
    }
}

