/*
 * Decompiled with CFR 0.152.
 */
package org.bouncycastle.pqc.crypto.mayo;

import java.security.SecureRandom;
import org.bouncycastle.crypto.CipherParameters;
import org.bouncycastle.crypto.CryptoServicesRegistrar;
import org.bouncycastle.crypto.digests.SHAKEDigest;
import org.bouncycastle.crypto.params.ParametersWithRandom;
import org.bouncycastle.pqc.crypto.MessageSigner;
import org.bouncycastle.pqc.crypto.mayo.GF16Utils;
import org.bouncycastle.pqc.crypto.mayo.MayoParameters;
import org.bouncycastle.pqc.crypto.mayo.MayoPrivateKeyParameters;
import org.bouncycastle.pqc.crypto.mayo.MayoPublicKeyParameters;
import org.bouncycastle.pqc.crypto.mayo.Utils;
import org.bouncycastle.util.Arrays;
import org.bouncycastle.util.Bytes;
import org.bouncycastle.util.GF16;
import org.bouncycastle.util.Longs;
import org.bouncycastle.util.Pack;

public class MayoSigner
implements MessageSigner {
    private SecureRandom random;
    private MayoParameters params;
    private MayoPublicKeyParameters pubKey;
    private MayoPrivateKeyParameters privKey;
    private static final int F_TAIL_LEN = 4;
    private static final long EVEN_BYTES = 0xFF00FF00FF00FFL;
    private static final long EVEN_2BYTES = 0xFFFF0000FFFFL;

    public void init(boolean forSigning, CipherParameters param) {
        if (forSigning) {
            this.pubKey = null;
            if (param instanceof ParametersWithRandom) {
                ParametersWithRandom withRandom = (ParametersWithRandom)param;
                this.privKey = (MayoPrivateKeyParameters)withRandom.getParameters();
                this.random = withRandom.getRandom();
            } else {
                this.privKey = (MayoPrivateKeyParameters)param;
                this.random = CryptoServicesRegistrar.getSecureRandom();
            }
            this.params = this.privKey.getParameters();
        } else {
            this.pubKey = (MayoPublicKeyParameters)param;
            this.params = this.pubKey.getParameters();
            this.privKey = null;
            this.random = null;
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public byte[] generateSignature(byte[] message) {
        byte[] byArray;
        int k = this.params.getK();
        int v = this.params.getV();
        int o = this.params.getO();
        int n = this.params.getN();
        int m = this.params.getM();
        int vbytes = this.params.getVBytes();
        int oBytes = this.params.getOBytes();
        int saltBytes = this.params.getSaltBytes();
        int mVecLimbs = this.params.getMVecLimbs();
        int p1Limbs = this.params.getP1Limbs();
        int pk_seed_bytes = this.params.getPkSeedBytes();
        int digestBytes = this.params.getDigestBytes();
        int skSeedBytes = this.params.getSkSeedBytes();
        byte[] tenc = new byte[this.params.getMBytes()];
        byte[] t = new byte[m];
        byte[] y = new byte[m];
        byte[] salt = new byte[saltBytes];
        byte[] V = new byte[k * vbytes + this.params.getRBytes()];
        byte[] Vdec = new byte[v * k];
        int ok = k * o;
        int nk = k * n;
        byte[] A = new byte[(m + 7) / 8 * 8 * (ok + 1)];
        byte[] x = new byte[nk];
        byte[] r = new byte[ok + 1];
        byte[] s = new byte[nk];
        byte[] tmp = new byte[digestBytes + saltBytes + skSeedBytes + 1];
        byte[] sig = new byte[this.params.getSigBytes()];
        long[] P = new long[p1Limbs + this.params.getP2Limbs()];
        byte[] O = new byte[v * o];
        long[] Mtmp = new long[ok * mVecLimbs];
        long[] vPv = new long[k * k * mVecLimbs];
        SHAKEDigest shake = new SHAKEDigest(256);
        try {
            byte[] seed_sk = this.privKey.getSeedSk();
            int totalS = pk_seed_bytes + oBytes;
            byte[] seed_pk = new byte[totalS];
            shake.update(seed_sk, 0, seed_sk.length);
            shake.doFinal(seed_pk, 0, totalS);
            GF16.decode(seed_pk, pk_seed_bytes, O, 0, O.length);
            Utils.expandP1P2(this.params, P, seed_pk);
            int bsMatEntriesUsed = 0;
            int omVecLimbs = o * mVecLimbs;
            int i = 0;
            int io = 0;
            int iomVecLimbs = 0;
            while (i < v) {
                int c = i;
                int co = io;
                int comVecLimbs = iomVecLimbs;
                while (c < v) {
                    if (c == i) {
                        bsMatEntriesUsed += mVecLimbs;
                    } else {
                        int j = 0;
                        int jmVecLimbs = p1Limbs;
                        while (j < o) {
                            GF16Utils.mVecMulAdd(mVecLimbs, P, bsMatEntriesUsed, O[co + j], P, iomVecLimbs + jmVecLimbs);
                            GF16Utils.mVecMulAdd(mVecLimbs, P, bsMatEntriesUsed, O[io + j], P, comVecLimbs + jmVecLimbs);
                            ++j;
                            jmVecLimbs += mVecLimbs;
                        }
                        bsMatEntriesUsed += mVecLimbs;
                    }
                    ++c;
                    co += o;
                    comVecLimbs += omVecLimbs;
                }
                ++i;
                io += o;
                iomVecLimbs += omVecLimbs;
            }
            Arrays.fill(seed_pk, (byte)0);
            shake.update(message, 0, message.length);
            shake.doFinal(tmp, 0, digestBytes);
            this.random.nextBytes(salt);
            System.arraycopy(salt, 0, tmp, digestBytes, salt.length);
            System.arraycopy(seed_sk, 0, tmp, digestBytes + saltBytes, skSeedBytes);
            shake.update(tmp, 0, digestBytes + saltBytes + skSeedBytes);
            shake.doFinal(salt, 0, saltBytes);
            System.arraycopy(salt, 0, tmp, digestBytes, saltBytes);
            shake.update(tmp, 0, digestBytes + saltBytes);
            shake.doFinal(tenc, 0, this.params.getMBytes());
            GF16.decode(tenc, t, m);
            int size = v * k * mVecLimbs;
            long[] Pv = new long[size];
            byte[] Ox = new byte[v];
            for (int ctr = 0; ctr <= 255; ++ctr) {
                tmp[tmp.length - 1] = (byte)ctr;
                shake.update(tmp, 0, tmp.length);
                shake.doFinal(V, 0, V.length);
                for (int i2 = 0; i2 < k; ++i2) {
                    GF16.decode(V, i2 * vbytes, Vdec, i2 * v, v);
                }
                GF16Utils.mulAddMatXMMat(mVecLimbs, Vdec, P, p1Limbs, Mtmp, k, v, o);
                GF16Utils.mulAddMUpperTriangularMatXMatTrans(mVecLimbs, P, Vdec, Pv, v, k);
                GF16Utils.mulAddMatXMMat(mVecLimbs, Vdec, Pv, vPv, k, v);
                this.computeRHS(vPv, t, y);
                this.computeA(Mtmp, A);
                GF16.decode(V, k * vbytes, r, 0, ok);
                if (this.sampleSolution(A, y, r, x)) break;
                Arrays.fill(Mtmp, 0L);
                Arrays.fill(vPv, 0L);
            }
            int i3 = 0;
            int io2 = 0;
            int in = 0;
            int iv = 0;
            while (i3 < k) {
                GF16Utils.matMul(O, x, io2, Ox, o, v);
                Bytes.xor(v, Vdec, iv, Ox, s, in);
                System.arraycopy(x, io2, s, in + v, o);
                ++i3;
                io2 += o;
                in += n;
                iv += v;
            }
            GF16.encode(s, sig, nk);
            System.arraycopy(salt, 0, sig, sig.length - saltBytes, saltBytes);
            byArray = Arrays.concatenate(sig, message);
            Object var48_50 = null;
        }
        catch (Throwable throwable) {
            Object var48_51 = null;
            Arrays.fill(tenc, (byte)0);
            Arrays.fill(t, (byte)0);
            Arrays.fill(y, (byte)0);
            Arrays.fill(salt, (byte)0);
            Arrays.fill(V, (byte)0);
            Arrays.fill(Vdec, (byte)0);
            Arrays.fill(A, (byte)0);
            Arrays.fill(x, (byte)0);
            Arrays.fill(r, (byte)0);
            Arrays.fill(s, (byte)0);
            Arrays.fill(tmp, (byte)0);
            throw throwable;
        }
        Arrays.fill(tenc, (byte)0);
        Arrays.fill(t, (byte)0);
        Arrays.fill(y, (byte)0);
        Arrays.fill(salt, (byte)0);
        Arrays.fill(V, (byte)0);
        Arrays.fill(Vdec, (byte)0);
        Arrays.fill(A, (byte)0);
        Arrays.fill(x, (byte)0);
        Arrays.fill(r, (byte)0);
        Arrays.fill(s, (byte)0);
        Arrays.fill(tmp, (byte)0);
        return byArray;
    }

    public boolean verifySignature(byte[] message, byte[] signature) {
        int m = this.params.getM();
        int n = this.params.getN();
        int k = this.params.getK();
        int kn = k * n;
        int p1Limbs = this.params.getP1Limbs();
        int p2Limbs = this.params.getP2Limbs();
        int p3Limbs = this.params.getP3Limbs();
        int mBytes = this.params.getMBytes();
        int sigBytes = this.params.getSigBytes();
        int digestBytes = this.params.getDigestBytes();
        int saltBytes = this.params.getSaltBytes();
        int mVecLimbs = this.params.getMVecLimbs();
        byte[] tEnc = new byte[mBytes];
        byte[] t = new byte[m];
        byte[] y = new byte[m << 1];
        byte[] s = new byte[kn];
        long[] pk = new long[p1Limbs + p2Limbs + p3Limbs];
        byte[] tmp = new byte[digestBytes + saltBytes];
        byte[] cpk = this.pubKey.getEncoded();
        Utils.expandP1P2(this.params, pk, cpk);
        Utils.unpackMVecs(cpk, this.params.getPkSeedBytes(), pk, p1Limbs + p2Limbs, p3Limbs / mVecLimbs, m);
        SHAKEDigest shake = new SHAKEDigest(256);
        shake.update(message, 0, message.length);
        shake.doFinal(tmp, 0, digestBytes);
        shake.update(tmp, 0, digestBytes);
        shake.update(signature, sigBytes - saltBytes, saltBytes);
        shake.doFinal(tEnc, 0, mBytes);
        GF16.decode(tEnc, t, m);
        GF16.decode(signature, s, kn);
        long[] SPS = new long[k * k * mVecLimbs];
        long[] PS = new long[kn * mVecLimbs];
        MayoSigner.mayoGenericMCalculatePS(this.params, pk, p1Limbs, p1Limbs + p2Limbs, s, this.params.getV(), this.params.getO(), k, PS);
        MayoSigner.mayoGenericMCalculateSPS(PS, s, mVecLimbs, k, n, SPS);
        byte[] zero = new byte[m];
        this.computeRHS(SPS, zero, y);
        return Arrays.constantTimeAreEqual(m, y, 0, t, 0);
    }

    void computeRHS(long[] vPv, byte[] t, byte[] y) {
        int i;
        int m = this.params.getM();
        int mVecLimbs = this.params.getMVecLimbs();
        int k = this.params.getK();
        int[] fTail = this.params.getFTail();
        int topPos = (m - 1 & 0xF) << 2;
        if ((m & 0xF) != 0) {
            long mask = (1L << ((m & 0xF) << 2)) - 1L;
            int kSquared = k * k;
            i = 0;
            int index = mVecLimbs - 1;
            while (i < kSquared) {
                int n = index;
                vPv[n] = vPv[n] & mask;
                ++i;
                index += mVecLimbs;
            }
        }
        long[] temp = new long[mVecLimbs];
        byte[] tempBytes = new byte[mVecLimbs << 3];
        int kmVecLimbs = k * mVecLimbs;
        i = k - 1;
        int imVecLimbs = i * mVecLimbs;
        int ikmVecLimbs = imVecLimbs * k;
        while (i >= 0) {
            int j = i;
            int jmVecLimbs = imVecLimbs;
            int jkmVecLimbs = ikmVecLimbs;
            while (j < k) {
                int top = (int)(temp[mVecLimbs - 1] >>> topPos & 0xFL);
                int n = mVecLimbs - 1;
                temp[n] = temp[n] << 4;
                int limb = mVecLimbs - 2;
                while (limb >= 0) {
                    int n2 = limb + 1;
                    temp[n2] = temp[n2] ^ temp[limb] >>> 60;
                    int n3 = limb--;
                    temp[n3] = temp[n3] << 4;
                }
                Pack.longToLittleEndian(temp, tempBytes, 0);
                for (int jj = 0; jj < 4; ++jj) {
                    int ft = fTail[jj];
                    if (ft == 0) continue;
                    long product = GF16.mul(top, ft);
                    if ((jj & 1) == 0) {
                        int n4 = jj >> 1;
                        tempBytes[n4] = (byte)(tempBytes[n4] ^ (byte)(product & 0xFL));
                        continue;
                    }
                    int n5 = jj >> 1;
                    tempBytes[n5] = (byte)(tempBytes[n5] ^ (byte)((product & 0xFL) << 4));
                }
                Pack.littleEndianToLong(tempBytes, 0, temp);
                int matrixIndex = ikmVecLimbs + jmVecLimbs;
                int symmetricIndex = jkmVecLimbs + imVecLimbs;
                boolean isDiagonal = i == j;
                int limb2 = 0;
                while (limb2 < mVecLimbs) {
                    long value = vPv[matrixIndex + limb2];
                    if (!isDiagonal) {
                        value ^= vPv[symmetricIndex + limb2];
                    }
                    int n6 = limb2++;
                    temp[n6] = temp[n6] ^ value;
                }
                ++j;
                jmVecLimbs += mVecLimbs;
                jkmVecLimbs += kmVecLimbs;
            }
            --i;
            imVecLimbs -= mVecLimbs;
            ikmVecLimbs -= kmVecLimbs;
        }
        Pack.longToLittleEndian(temp, tempBytes, 0);
        for (i = 0; i < m; i += 2) {
            int bytePos = i >> 1;
            y[i] = (byte)(t[i] ^ tempBytes[bytePos] & 0xF);
            y[i + 1] = (byte)(t[i + 1] ^ tempBytes[bytePos] >>> 4 & 0xF);
        }
    }

    void computeA(long[] Mtmp, byte[] AOut) {
        int r;
        int k = this.params.getK();
        int o = this.params.getO();
        int m = this.params.getM();
        int mVecLimbs = this.params.getMVecLimbs();
        int ACols = this.params.getACols();
        int[] fTailArr = this.params.getFTail();
        int bitsToShift = 0;
        int wordsToShift = 0;
        int MAYO_M_OVER_8 = m + 7 >>> 3;
        int ok = o * k;
        int omVecLimbs = o * mVecLimbs;
        int AWidth = ok + 15 >> 4 << 4;
        long[] A = new long[AWidth * MAYO_M_OVER_8 << 4];
        if ((m & 0xF) != 0) {
            long mask = 1L << ((m & 0xF) << 2);
            --mask;
            int i = 0;
            int idx = mVecLimbs - 1;
            while (i < ok) {
                int n = idx;
                Mtmp[n] = Mtmp[n] & mask;
                ++i;
                idx += mVecLimbs;
            }
        }
        int i = 0;
        int io = 0;
        int iomVecLimbs = 0;
        while (i < k) {
            int j = k - 1;
            int jomVecLimbs = j * omVecLimbs;
            int jo = j * o;
            while (j >= i) {
                int aIndex;
                long value;
                int limbAWidhth;
                int limb;
                int c = 0;
                int cmVecLimbs = 0;
                while (c < o) {
                    limb = 0;
                    limbAWidhth = 0;
                    while (limb < mVecLimbs) {
                        value = Mtmp[jomVecLimbs + limb + cmVecLimbs];
                        int n = aIndex = io + c + wordsToShift + limbAWidhth;
                        A[n] = A[n] ^ value << bitsToShift;
                        if (bitsToShift > 0) {
                            int n2 = aIndex + AWidth;
                            A[n2] = A[n2] ^ value >>> 64 - bitsToShift;
                        }
                        ++limb;
                        limbAWidhth += AWidth;
                    }
                    ++c;
                    cmVecLimbs += mVecLimbs;
                }
                if (i != j) {
                    c = 0;
                    cmVecLimbs = 0;
                    while (c < o) {
                        limb = 0;
                        limbAWidhth = 0;
                        while (limb < mVecLimbs) {
                            value = Mtmp[iomVecLimbs + limb + cmVecLimbs];
                            int n = aIndex = jo + c + wordsToShift + limbAWidhth;
                            A[n] = A[n] ^ value << bitsToShift;
                            if (bitsToShift > 0) {
                                int n3 = aIndex + AWidth;
                                A[n3] = A[n3] ^ value >>> 64 - bitsToShift;
                            }
                            ++limb;
                            limbAWidhth += AWidth;
                        }
                        ++c;
                        cmVecLimbs += mVecLimbs;
                    }
                }
                if ((bitsToShift += 4) == 64) {
                    wordsToShift += AWidth;
                    bitsToShift = 0;
                }
                --j;
                jomVecLimbs -= omVecLimbs;
                jo -= o;
            }
            ++i;
            io += o;
            iomVecLimbs += omVecLimbs;
        }
        for (int c = 0; c < AWidth * (m + ((k + 1) * k >> 1) + 15 >>> 4); c += 16) {
            MayoSigner.transpose16x16Nibbles(A, c);
        }
        byte[] tab = new byte[16];
        int idx = 0;
        for (int i2 = 0; i2 < 4; ++i2) {
            int ft = fTailArr[i2];
            tab[idx++] = (byte)GF16.mul(ft, 1);
            tab[idx++] = (byte)GF16.mul(ft, 2);
            tab[idx++] = (byte)GF16.mul(ft, 4);
            tab[idx++] = (byte)GF16.mul(ft, 8);
        }
        for (int c = 0; c < AWidth; c += 16) {
            for (r = m; r < m + ((k + 1) * k >>> 1); ++r) {
                int pos = (r >>> 4) * AWidth + c + (r & 0xF);
                long t0 = A[pos] & 0x1111111111111111L;
                long t1 = A[pos] >>> 1 & 0x1111111111111111L;
                long t2 = A[pos] >>> 2 & 0x1111111111111111L;
                long t3 = A[pos] >>> 3 & 0x1111111111111111L;
                int t = 0;
                int t4 = 0;
                while (t < 4) {
                    int targetPos;
                    int targetRow = r + t - m;
                    int n = targetPos = (targetRow >> 4) * AWidth + c + (targetRow & 0xF);
                    A[n] = A[n] ^ (t0 * (long)tab[t4] ^ t1 * (long)tab[t4 + 1] ^ t2 * (long)tab[t4 + 2] ^ t3 * (long)tab[t4 + 3]);
                    ++t;
                    t4 += 4;
                }
            }
        }
        byte[] Abytes = Pack.longToLittleEndian(A);
        for (r = 0; r < m; r += 16) {
            for (int c = 0; c < ACols - 1; c += 16) {
                int i3 = 0;
                while (i3 + r < m) {
                    GF16.decode(Abytes, (r * AWidth >> 4) + c + i3 << 3, AOut, (r + i3) * ACols + c, Math.min(16, ACols - 1 - c));
                    ++i3;
                }
            }
        }
    }

    private static void transpose16x16Nibbles(long[] M, int offset) {
        long t1;
        int i;
        for (i = 0; i < 16; i += 2) {
            int idx1 = offset + i;
            int idx2 = idx1 + 1;
            long t = (M[idx1] >>> 4 ^ M[idx2]) & 0xF0F0F0F0F0F0F0FL;
            int n = idx1;
            M[n] = M[n] ^ t << 4;
            int n2 = idx2;
            M[n2] = M[n2] ^ t;
        }
        int base = offset;
        for (i = 0; i < 16; i += 4) {
            long t0 = (M[base] >>> 8 ^ M[base + 2]) & 0xFF00FF00FF00FFL;
            t1 = (M[base + 1] >>> 8 ^ M[base + 3]) & 0xFF00FF00FF00FFL;
            int n = base++;
            M[n] = M[n] ^ t0 << 8;
            int n3 = base++;
            M[n3] = M[n3] ^ t1 << 8;
            int n4 = base++;
            M[n4] = M[n4] ^ t0;
            int n5 = base++;
            M[n5] = M[n5] ^ t1;
        }
        for (i = 0; i < 4; ++i) {
            base = offset + i;
            long t0 = (M[base] >>> 16 ^ M[base + 4]) & 0xFFFF0000FFFFL;
            t1 = (M[base + 8] >>> 16 ^ M[base + 12]) & 0xFFFF0000FFFFL;
            int n = base;
            M[n] = M[n] ^ t0 << 16;
            int n6 = base + 8;
            M[n6] = M[n6] ^ t1 << 16;
            int n7 = base + 4;
            M[n7] = M[n7] ^ t0;
            int n8 = base + 12;
            M[n8] = M[n8] ^ t1;
        }
        for (i = 0; i < 8; ++i) {
            base = offset + i;
            long t = (M[base] >>> 32 ^ M[base + 8]) & 0xFFFFFFFFL;
            int n = base;
            M[n] = M[n] ^ t << 32;
            int n9 = base + 8;
            M[n9] = M[n9] ^ t;
        }
    }

    boolean sampleSolution(byte[] A, byte[] y, byte[] r, byte[] x) {
        int k = this.params.getK();
        int o = this.params.getO();
        int m = this.params.getM();
        int aCols = this.params.getACols();
        int ok = k * o;
        System.arraycopy(r, 0, x, 0, ok);
        byte[] Ar = new byte[m];
        GF16Utils.matMul(A, r, 0, Ar, ok + 1, m);
        int i = 0;
        int idx = ok;
        while (i < m) {
            A[idx] = (byte)(y[i] ^ Ar[i]);
            ++i;
            idx += ok + 1;
        }
        this.ef(A, m, aCols);
        boolean fullRank = false;
        int i2 = 0;
        int idx2 = (m - 1) * aCols;
        while (i2 < aCols - 1) {
            fullRank |= A[idx2] != 0;
            ++i2;
            ++idx2;
        }
        if (!fullRank) {
            return false;
        }
        int row = m - 1;
        int rowAcols = row * aCols;
        while (row >= 0) {
            int finished = 0;
            int colUpperBound = Math.min(row + 32 / (m - row), ok);
            for (int col = row; col <= colUpperBound; ++col) {
                byte correctCol = (byte)(-(A[rowAcols + col] & 0xFF) >> 31);
                byte u = (byte)(correctCol & ~finished & A[rowAcols + aCols - 1]);
                int n = col;
                x[n] = (byte)(x[n] ^ u);
                int i3 = 0;
                int iaCols_col = col;
                int iaCols_aCols1 = aCols - 1;
                while (i3 < row) {
                    long tmp = 0L;
                    int j = 0;
                    int jaCols = 0;
                    while (j < 8) {
                        tmp ^= (long)(A[iaCols_col + jaCols] & 0xFF) << (j << 3);
                        ++j;
                        jaCols += aCols;
                    }
                    tmp = GF16Utils.mulFx8(u, tmp);
                    j = 0;
                    jaCols = 0;
                    while (j < 8) {
                        int n2 = iaCols_aCols1 + jaCols;
                        A[n2] = (byte)(A[n2] ^ (byte)(tmp >> (j << 3) & 0xFL));
                        ++j;
                        jaCols += aCols;
                    }
                    i3 += 8;
                    iaCols_col += aCols << 3;
                    iaCols_aCols1 += aCols << 3;
                }
                finished = (byte)(finished | correctCol);
            }
            --row;
            rowAcols -= aCols;
        }
        return true;
    }

    void ef(byte[] A, int nrows, int ncols) {
        int rowLen = ncols + 15 >> 4;
        long[] pivotRow = new long[rowLen];
        long[] pivotRow2 = new long[rowLen];
        long[] packedA = new long[nrows * rowLen];
        int len = this.params.getO() * this.params.getK() + 16;
        byte[] bytes = new byte[len >> 1];
        int len_4 = len >> 4;
        int i = 0;
        int incols = 0;
        int irowLen = 0;
        while (i < nrows) {
            for (int word = 0; word < rowLen; ++word) {
                long wordVal = 0L;
                for (int nibble = 0; nibble < 16; ++nibble) {
                    int col = (word << 4) + nibble;
                    if (col >= ncols) continue;
                    wordVal |= ((long)A[incols + col] & 0xFL) << (nibble << 2);
                }
                packedA[word + irowLen] = wordVal;
            }
            ++i;
            incols += ncols;
            irowLen += rowLen;
        }
        int pivotRowIndex = 0;
        for (int pivotCol = 0; pivotCol < ncols; ++pivotCol) {
            int lowerBound = Math.max(0, pivotCol + nrows - ncols);
            int upperBound = Math.min(nrows - 1, pivotCol);
            Arrays.clear(pivotRow);
            Arrays.clear(pivotRow2);
            int pivot = 0;
            long pivotIsZero = -1L;
            int searchUpper = Math.min(nrows - 1, upperBound + 32);
            int row = lowerBound;
            int rowRowLen = lowerBound * rowLen;
            while (row <= searchUpper) {
                long isPivotRow = MayoSigner.ctCompare64(row, pivotRowIndex) ^ 0xFFFFFFFFFFFFFFFFL;
                long belowPivotRow = (long)pivotRowIndex - (long)row >> 63;
                for (int j = 0; j < rowLen; ++j) {
                    int n = j;
                    pivotRow[n] = pivotRow[n] ^ (isPivotRow | belowPivotRow & pivotIsZero) & packedA[rowRowLen + j];
                }
                pivot = (int)(pivotRow[pivotCol >>> 4] >>> ((pivotCol & 0xF) << 2) & 0xFL);
                pivotIsZero = -((long)pivot) >> 63 ^ 0xFFFFFFFFFFFFFFFFL;
                ++row;
                rowRowLen += rowLen;
            }
            MayoSigner.vecMulAddU64(rowLen, pivotRow, GF16.inv((byte)pivot), pivotRow2);
            row = lowerBound;
            rowRowLen = lowerBound * rowLen;
            while (row <= upperBound) {
                long doCopy = (MayoSigner.ctCompare64(row, pivotRowIndex) ^ 0xFFFFFFFFFFFFFFFFL) & (pivotIsZero ^ 0xFFFFFFFFFFFFFFFFL);
                long doNotCopy = doCopy ^ 0xFFFFFFFFFFFFFFFFL;
                int col = 0;
                int rowRowLen_col = rowRowLen;
                while (col < rowLen) {
                    packedA[rowRowLen_col] = doNotCopy & packedA[rowRowLen_col] | doCopy & pivotRow2[col];
                    ++col;
                    ++rowRowLen_col;
                }
                ++row;
                rowRowLen += rowLen;
            }
            row = lowerBound;
            rowRowLen = lowerBound * rowLen;
            while (row < nrows) {
                int belowPivot = row > pivotRowIndex ? -1 : 0;
                int eltToElim = (int)(packedA[rowRowLen + (pivotCol >>> 4)] >>> ((pivotCol & 0xF) << 2) & 0xFL);
                MayoSigner.vecMulAddU64(rowLen, pivotRow2, (byte)(belowPivot & eltToElim), packedA, rowRowLen);
                ++row;
                rowRowLen += rowLen;
            }
            if (pivot == 0) continue;
            ++pivotRowIndex;
        }
        int outIndex = 0;
        int i2 = 0;
        int irowLen2 = 0;
        while (i2 < nrows) {
            Pack.longToLittleEndian(packedA, irowLen2, len_4, bytes, 0);
            GF16.decode(bytes, 0, A, outIndex, ncols);
            outIndex += ncols;
            ++i2;
            irowLen2 += rowLen;
        }
    }

    private static long ctCompare64(int a, int b) {
        return -((long)(a ^ b)) >> 63;
    }

    private static void vecMulAddU64(int legs, long[] in, byte a, long[] acc) {
        int tab = MayoSigner.mulTable(a & 0xFF);
        int i = 0;
        while (i < legs) {
            long val = (in[i] & 0x1111111111111111L) * (long)(tab & 0xFF) ^ (in[i] >>> 1 & 0x1111111111111111L) * (long)(tab >>> 8 & 0xF) ^ (in[i] >>> 2 & 0x1111111111111111L) * (long)(tab >>> 16 & 0xF) ^ (in[i] >>> 3 & 0x1111111111111111L) * (long)(tab >>> 24 & 0xF);
            int n = i++;
            acc[n] = acc[n] ^ val;
        }
    }

    private static void vecMulAddU64(int legs, long[] in, byte a, long[] acc, int accOffset) {
        int tab = MayoSigner.mulTable(a & 0xFF);
        for (int i = 0; i < legs; ++i) {
            long val = (in[i] & 0x1111111111111111L) * (long)(tab & 0xFF) ^ (in[i] >>> 1 & 0x1111111111111111L) * (long)(tab >>> 8 & 0xF) ^ (in[i] >>> 2 & 0x1111111111111111L) * (long)(tab >>> 16 & 0xF) ^ (in[i] >>> 3 & 0x1111111111111111L) * (long)(tab >>> 24 & 0xF);
            int n = accOffset + i;
            acc[n] = acc[n] ^ val;
        }
    }

    private static int mulTable(int b) {
        int x = b * 134480385;
        int highHalf = x & 0xF0F0F0F0;
        return x ^ highHalf >>> 4 ^ highHalf >>> 3;
    }

    private static void mayoGenericMCalculatePS(MayoParameters p, long[] P1, int p2, int p3, byte[] S, int v, int o, int k, long[] PS) {
        int n = o + v;
        int mVecLimbs = p.getMVecLimbs();
        long[] accumulator = new long[mVecLimbs * p.getK() * p.getN() * mVecLimbs << 4];
        int o_mVecLimbs = o * mVecLimbs;
        int pUsed = 0;
        int row = 0;
        int krow = 0;
        int orow_mVecLimbs = 0;
        while (row < v) {
            int j;
            for (j = row; j < v; ++j) {
                int col = 0;
                int ncol = 0;
                while (col < k) {
                    Longs.xorTo(mVecLimbs, P1, pUsed, accumulator, ((krow + col << 4) + (S[ncol + j] & 0xFF)) * mVecLimbs);
                    ++col;
                    ncol += n;
                }
                pUsed += mVecLimbs;
            }
            j = 0;
            int orow_j_mVecLimbs = orow_mVecLimbs;
            while (j < o) {
                int col = 0;
                int ncol = 0;
                while (col < k) {
                    Longs.xorTo(mVecLimbs, P1, p2 + orow_j_mVecLimbs, accumulator, ((krow + col << 4) + (S[ncol + j + v] & 0xFF)) * mVecLimbs);
                    ++col;
                    ncol += n;
                }
                ++j;
                orow_j_mVecLimbs += mVecLimbs;
            }
            ++row;
            krow += k;
            orow_mVecLimbs += o_mVecLimbs;
        }
        pUsed = 0;
        row = v;
        krow = v * k;
        while (row < n) {
            for (int j = row; j < n; ++j) {
                int col = 0;
                int ncol = 0;
                while (col < k) {
                    Longs.xorTo(mVecLimbs, P1, p3 + pUsed, accumulator, ((krow + col << 4) + (S[ncol + j] & 0xFF)) * mVecLimbs);
                    ++col;
                    ncol += n;
                }
                pUsed += mVecLimbs;
            }
            ++row;
            krow += k;
        }
        MayoSigner.mVecMultiplyBins(mVecLimbs, n * k, accumulator, PS);
    }

    private static void mayoGenericMCalculateSPS(long[] PS, byte[] S, int mVecLimbs, int k, int n, long[] SPS) {
        int kk = k * k;
        int accumulatorSize = mVecLimbs * kk << 4;
        long[] accumulator = new long[accumulatorSize];
        int kmVecLimbs = k * mVecLimbs;
        int row = 0;
        int nrow = 0;
        int krowmVecLimbs16 = 0;
        while (row < k) {
            int j = 0;
            int jkmVecLimbs = 0;
            while (j < n) {
                int sValmVecLimbs = (S[nrow + j] & 0xFF) * mVecLimbs + krowmVecLimbs16;
                int col = 0;
                int colmVecLimbs = 0;
                while (col < k) {
                    Longs.xorTo(mVecLimbs, PS, jkmVecLimbs + colmVecLimbs, accumulator, sValmVecLimbs + (colmVecLimbs << 4));
                    ++col;
                    colmVecLimbs += mVecLimbs;
                }
                ++j;
                jkmVecLimbs += kmVecLimbs;
            }
            ++row;
            nrow += n;
            krowmVecLimbs16 += kmVecLimbs << 4;
        }
        MayoSigner.mVecMultiplyBins(mVecLimbs, kk, accumulator, SPS);
    }

    private static void mVecMultiplyBins(int mVecLimbs, int len, long[] bins, long[] ps) {
        int mVecLimbs2 = mVecLimbs + mVecLimbs;
        int mVecLimbs3 = mVecLimbs2 + mVecLimbs;
        int mVecLimbs4 = mVecLimbs3 + mVecLimbs;
        int mVecLimbs5 = mVecLimbs4 + mVecLimbs;
        int mVecLimbs6 = mVecLimbs5 + mVecLimbs;
        int mVecLimbs7 = mVecLimbs6 + mVecLimbs;
        int mVecLimbs8 = mVecLimbs7 + mVecLimbs;
        int mVecLimbs9 = mVecLimbs8 + mVecLimbs;
        int mVecLimbs10 = mVecLimbs9 + mVecLimbs;
        int mVecLimbs11 = mVecLimbs10 + mVecLimbs;
        int mVecLimbs12 = mVecLimbs11 + mVecLimbs;
        int mVecLimbs13 = mVecLimbs12 + mVecLimbs;
        int mVecLimbs14 = mVecLimbs13 + mVecLimbs;
        int mVecLimbs15 = mVecLimbs14 + mVecLimbs;
        int i = 0;
        int imVecLimbs4 = 0;
        while (i < len) {
            int j = 0;
            int off = imVecLimbs4;
            while (j < mVecLimbs) {
                long b = bins[off + mVecLimbs5];
                long t = b & 0x1111111111111111L;
                b = bins[off + mVecLimbs10] ^ (b & 0xEEEEEEEEEEEEEEEEL) >>> 1 ^ (t << 3) + t;
                long a = bins[off + mVecLimbs11];
                t = (a & 0x8888888888888888L) >>> 3;
                a = bins[off + mVecLimbs12] ^ (a & 0x7777777777777777L) << 1 ^ (t << 1) + t;
                t = b & 0x1111111111111111L;
                b = bins[off + mVecLimbs7] ^ (b & 0xEEEEEEEEEEEEEEEEL) >>> 1 ^ (t << 3) + t;
                t = (a & 0x8888888888888888L) >>> 3;
                a = bins[off + mVecLimbs6] ^ (a & 0x7777777777777777L) << 1 ^ (t << 1) + t;
                t = b & 0x1111111111111111L;
                b = bins[off + mVecLimbs14] ^ (b & 0xEEEEEEEEEEEEEEEEL) >>> 1 ^ (t << 3) + t;
                t = (a & 0x8888888888888888L) >>> 3;
                a = bins[off + mVecLimbs3] ^ (a & 0x7777777777777777L) << 1 ^ (t << 1) + t;
                t = b & 0x1111111111111111L;
                b = bins[off + mVecLimbs15] ^ (b & 0xEEEEEEEEEEEEEEEEL) >>> 1 ^ (t << 3) + t;
                t = (a & 0x8888888888888888L) >>> 3;
                a = bins[off + mVecLimbs8] ^ (a & 0x7777777777777777L) << 1 ^ (t << 1) + t;
                t = b & 0x1111111111111111L;
                b = bins[off + mVecLimbs13] ^ (b & 0xEEEEEEEEEEEEEEEEL) >>> 1 ^ (t << 3) + t;
                t = (a & 0x8888888888888888L) >>> 3;
                a = bins[off + mVecLimbs4] ^ (a & 0x7777777777777777L) << 1 ^ (t << 1) + t;
                t = b & 0x1111111111111111L;
                b = bins[off + mVecLimbs9] ^ (b & 0xEEEEEEEEEEEEEEEEL) >>> 1 ^ (t << 3) + t;
                t = (a & 0x8888888888888888L) >>> 3;
                a = bins[off + mVecLimbs2] ^ (a & 0x7777777777777777L) << 1 ^ (t << 1) + t;
                t = b & 0x1111111111111111L;
                b = bins[off + mVecLimbs] ^ (b & 0xEEEEEEEEEEEEEEEEL) >>> 1 ^ (t << 3) + t;
                t = (a & 0x8888888888888888L) >>> 3;
                ps[(imVecLimbs4 >> 4) + j] = b ^ (a & 0x7777777777777777L) << 1 ^ (t << 1) + t;
                ++j;
                ++off;
            }
            ++i;
            imVecLimbs4 += mVecLimbs << 4;
        }
    }
}

