/*
 * Decompiled with CFR 0.152.
 */
package org.nd4j.linalg.api.parallel.tasks.cpu.misc;

import io.netty.buffer.ByteBuf;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Future;
import java.util.concurrent.RecursiveTask;
import org.nd4j.linalg.api.buffer.DataBuffer;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.parallel.tasks.Task;
import org.nd4j.linalg.api.parallel.tasks.TaskExecutorProvider;
import org.nd4j.linalg.factory.Nd4j;

public class CPUCol2ImTask
extends RecursiveTask<INDArray>
implements Task<INDArray> {
    protected Future<INDArray> future;
    protected List<CPUCol2ImTask> subTasks;
    protected final INDArray col;
    protected INDArray imgOut;
    protected final int kernelHeight;
    protected final int kernelWidth;
    protected final int strideY;
    protected final int strideX;
    protected final int padHeight;
    protected final int padWidth;
    protected final int imgHeight;
    protected final int imgWidth;
    protected final int parallelThreshold;
    protected final int exampleFrom;
    protected final int exampleTo;
    protected final int depthFrom;
    protected final int depthTo;

    public CPUCol2ImTask(INDArray col, int strideY, int strideX, int padHeight, int padWidth, int imgHeight, int imgWidth, int parallelThreshold) {
        this(col, CPUCol2ImTask.getNewOutputArray(col, imgHeight, imgWidth), strideY, strideX, padHeight, padWidth, imgHeight, imgWidth, 0, col.size(0), 0, col.size(1), parallelThreshold);
    }

    public CPUCol2ImTask(INDArray col, INDArray imgOut, int strideY, int strideX, int padHeight, int padWidth, int imgHeight, int imgWidth, int exampleFrom, int exampleTo, int depthFrom, int depthTo, int parallelThreshold) {
        this.col = col;
        this.imgOut = imgOut;
        this.kernelHeight = col.size(2);
        this.kernelWidth = col.size(3);
        this.strideY = strideY;
        this.strideX = strideX;
        this.padHeight = padHeight;
        this.padWidth = padWidth;
        this.imgHeight = imgHeight;
        this.imgWidth = imgWidth;
        this.parallelThreshold = parallelThreshold;
        this.exampleFrom = exampleFrom;
        this.exampleTo = exampleTo;
        this.depthFrom = depthFrom;
        this.depthTo = depthTo;
    }

    private static INDArray getNewOutputArray(INDArray col, int imgHeight, int imgWidth) {
        int n = col.size(0);
        int c = col.size(1);
        return Nd4j.create(n, c, imgHeight, imgWidth);
    }

    @Override
    protected INDArray compute() {
        this.splitOrExecute(true);
        return this.imgOut;
    }

    @Override
    public INDArray call() {
        this.splitOrExecute(true);
        return null;
    }

    private void splitOrExecute(boolean forkJoin) {
        if (!forkJoin) {
            this.subTasks = new ArrayList<CPUCol2ImTask>();
        }
        if (this.parallelThreshold != Integer.MAX_VALUE && this.opSize() > this.parallelThreshold) {
            CPUCol2ImTask second;
            CPUCol2ImTask first;
            int temp = this.exampleTo - this.exampleFrom;
            if (temp > 1) {
                int countFirst = temp / 2;
                first = new CPUCol2ImTask(this.col, this.imgOut, this.strideY, this.strideX, this.padHeight, this.padWidth, this.imgHeight, this.imgWidth, this.exampleFrom, this.exampleFrom + countFirst, this.depthFrom, this.depthTo, this.parallelThreshold);
                if (forkJoin) {
                    first.fork();
                } else {
                    first.invokeAsync();
                    this.subTasks.add(first);
                }
                second = new CPUCol2ImTask(this.col, this.imgOut, this.strideY, this.strideX, this.padHeight, this.padWidth, this.imgHeight, this.imgWidth, this.exampleFrom + countFirst, this.exampleTo, this.depthFrom, this.depthTo, this.parallelThreshold);
                if (forkJoin) {
                    second.fork();
                } else {
                    second.invokeAsync();
                    this.subTasks.add(second);
                }
            } else {
                temp = this.depthTo - this.depthFrom;
                if (temp > 1) {
                    int countFirst = temp / 2;
                    first = new CPUCol2ImTask(this.col, this.imgOut, this.strideY, this.strideX, this.padHeight, this.padWidth, this.imgHeight, this.imgWidth, this.exampleFrom, this.exampleTo, this.depthFrom, this.depthFrom + countFirst, this.parallelThreshold);
                    first.fork();
                    second = new CPUCol2ImTask(this.col, this.imgOut, this.strideY, this.strideX, this.padHeight, this.padWidth, this.imgHeight, this.imgWidth, this.exampleFrom, this.exampleTo, this.depthFrom + countFirst, this.depthTo, this.parallelThreshold);
                    second.fork();
                } else {
                    this.execute();
                    return;
                }
            }
            if (forkJoin) {
                first.join();
                second.join();
            }
        } else {
            this.execute();
        }
    }

    private int opSize() {
        return (this.exampleTo - this.exampleFrom) * (this.depthTo - this.depthFrom) * this.col.size(4) * this.col.size(5) * this.kernelHeight * this.kernelWidth;
    }

    private void execute() {
        DataBuffer dbIn = this.col.data();
        if (dbIn.allocationMode() == DataBuffer.AllocationMode.HEAP) {
            if (dbIn.dataType() == DataBuffer.Type.FLOAT) {
                this.doHeapFloat();
            } else {
                this.doHeapDouble();
            }
        } else if (dbIn.dataType() == DataBuffer.Type.FLOAT) {
            this.doDirectFloat();
        } else {
            this.doDirectDouble();
        }
    }

    private void doHeapFloat() {
        DataBuffer dbCol = this.col.data();
        DataBuffer dbOut = this.imgOut.data();
        int outArrayOffset = this.imgOut.offset();
        int[] outShape = this.imgOut.shape();
        int[] outStride = this.imgOut.stride();
        int inOffset = this.col.offset();
        int[] inShape = this.col.shape();
        int[] inStride = this.col.stride();
        int[] outIndices = new int[4];
        int[] inIndices = new int[6];
        int inStride2 = inStride[2];
        int inStride3 = inStride[3];
        int outStride2 = outStride[2];
        int outStride3 = outStride[3];
        int outShape2 = outShape[2];
        int outShape3 = outShape[3];
        int yOutTo = inShape[4];
        int xOutTo = inShape[5];
        boolean padding = this.padHeight > 0 || this.padWidth > 0;
        float[] fIn = (float[])dbCol.array();
        float[] fOut = (float[])dbOut.array();
        for (int ex = this.exampleFrom; ex < this.exampleTo; ++ex) {
            for (int d = this.depthFrom; d < this.depthTo; ++d) {
                inIndices[0] = ex;
                inIndices[1] = d;
                outIndices[0] = ex;
                outIndices[1] = d;
                for (int x = 0; x < xOutTo; ++x) {
                    for (int y = 0; y < yOutTo; ++y) {
                        int patchX;
                        int patchY;
                        int patchY2;
                        int patchX2;
                        int baseOffsetOut;
                        int j;
                        int i;
                        inIndices[4] = y;
                        inIndices[5] = x;
                        int baseOffsetIn = CPUCol2ImTask.getOffsetUnsafe6(inOffset, inShape, inStride, inIndices);
                        if (padding) {
                            i = y * this.strideY - this.padHeight;
                            j = x * this.strideX - this.padWidth;
                            outIndices[2] = i;
                            outIndices[3] = j;
                            baseOffsetOut = CPUCol2ImTask.getOffsetUnsafe4(outArrayOffset, outShape, outStride, outIndices);
                            if (inStride2 <= inStride3) {
                                for (patchX2 = 0; patchX2 < this.kernelWidth; ++patchX2) {
                                    if (j + patchX2 < 0 || j + patchX2 >= outShape3) continue;
                                    for (patchY2 = 0; patchY2 < this.kernelHeight; ++patchY2) {
                                        if (i + patchY2 < 0 || i + patchY2 >= outShape2) continue;
                                        int n = baseOffsetOut + patchY2 * outStride2 + patchX2 * outStride3;
                                        fOut[n] = fOut[n] + fIn[baseOffsetIn + patchY2 * inStride2 + patchX2 * inStride3];
                                    }
                                }
                                continue;
                            }
                            for (patchY = 0; patchY < this.kernelHeight; ++patchY) {
                                if (i + patchY < 0 || i + patchY >= outShape2) continue;
                                for (patchX = 0; patchX < this.kernelWidth; ++patchX) {
                                    if (j + patchX < 0 || j + patchX >= outShape3) continue;
                                    int n = baseOffsetOut + patchY * outStride2 + patchX * outStride3;
                                    fOut[n] = fOut[n] + fIn[baseOffsetIn + patchY * inStride2 + patchX * inStride3];
                                }
                            }
                            continue;
                        }
                        i = y * this.strideY;
                        j = x * this.strideX;
                        outIndices[2] = i;
                        outIndices[3] = j;
                        baseOffsetOut = CPUCol2ImTask.getOffsetUnsafe4(outArrayOffset, outShape, outStride, outIndices);
                        if (inStride2 <= inStride3) {
                            for (patchX2 = 0; patchX2 < this.kernelWidth; ++patchX2) {
                                for (patchY2 = 0; patchY2 < this.kernelHeight; ++patchY2) {
                                    int n = baseOffsetOut + patchY2 * outStride2 + patchX2 * outStride3;
                                    fOut[n] = fOut[n] + fIn[baseOffsetIn + patchY2 * inStride2 + patchX2 * inStride3];
                                }
                            }
                            continue;
                        }
                        for (patchY = 0; patchY < this.kernelHeight; ++patchY) {
                            for (patchX = 0; patchX < this.kernelWidth; ++patchX) {
                                int n = baseOffsetOut + patchY * outStride2 + patchX * outStride3;
                                fOut[n] = fOut[n] + fIn[baseOffsetIn + patchY * inStride2 + patchX * inStride3];
                            }
                        }
                    }
                }
            }
        }
    }

    private void doHeapDouble() {
        DataBuffer dbCol = this.col.data();
        DataBuffer dbOut = this.imgOut.data();
        int outArrayOffset = this.imgOut.offset();
        int[] outShape = this.imgOut.shape();
        int[] outStride = this.imgOut.stride();
        int inOffset = this.col.offset();
        int[] inShape = this.col.shape();
        int[] inStride = this.col.stride();
        int[] outIndices = new int[4];
        int[] inIndices = new int[6];
        int inStride2 = inStride[2];
        int inStride3 = inStride[3];
        int outStride2 = outStride[2];
        int outStride3 = outStride[3];
        int outShape2 = outShape[2];
        int outShape3 = outShape[3];
        int yOutTo = inShape[4];
        int xOutTo = inShape[5];
        boolean padding = this.padHeight > 0 || this.padWidth > 0;
        double[] dIn = (double[])dbCol.array();
        double[] dOut = (double[])dbOut.array();
        for (int ex = this.exampleFrom; ex < this.exampleTo; ++ex) {
            for (int d = this.depthFrom; d < this.depthTo; ++d) {
                inIndices[0] = ex;
                inIndices[1] = d;
                outIndices[0] = ex;
                outIndices[1] = d;
                for (int x = 0; x < xOutTo; ++x) {
                    for (int y = 0; y < yOutTo; ++y) {
                        int patchX;
                        int patchY;
                        int patchY2;
                        int patchX2;
                        int baseOffsetOut;
                        int j;
                        int i;
                        inIndices[4] = y;
                        inIndices[5] = x;
                        int baseOffsetIn = CPUCol2ImTask.getOffsetUnsafe6(inOffset, inShape, inStride, inIndices);
                        if (padding) {
                            i = y * this.strideY - this.padHeight;
                            j = x * this.strideX - this.padWidth;
                            outIndices[2] = i;
                            outIndices[3] = j;
                            baseOffsetOut = CPUCol2ImTask.getOffsetUnsafe4(outArrayOffset, outShape, outStride, outIndices);
                            if (inStride2 <= inStride3) {
                                for (patchX2 = 0; patchX2 < this.kernelWidth; ++patchX2) {
                                    if (j + patchX2 < 0 || j + patchX2 >= outShape3) continue;
                                    for (patchY2 = 0; patchY2 < this.kernelHeight; ++patchY2) {
                                        if (i + patchY2 < 0 || i + patchY2 >= outShape2) continue;
                                        int n = baseOffsetOut + patchY2 * outStride2 + patchX2 * outStride3;
                                        dOut[n] = dOut[n] + dIn[baseOffsetIn + patchY2 * inStride2 + patchX2 * inStride3];
                                    }
                                }
                                continue;
                            }
                            for (patchY = 0; patchY < this.kernelHeight; ++patchY) {
                                if (i + patchY < 0 || i + patchY >= outShape2) continue;
                                for (patchX = 0; patchX < this.kernelWidth; ++patchX) {
                                    if (j + patchX < 0 || j + patchX >= outShape3) continue;
                                    int n = baseOffsetOut + patchY * outStride2 + patchX * outStride3;
                                    dOut[n] = dOut[n] + dIn[baseOffsetIn + patchY * inStride2 + patchX * inStride3];
                                }
                            }
                            continue;
                        }
                        i = y * this.strideY;
                        j = x * this.strideX;
                        outIndices[2] = i;
                        outIndices[3] = j;
                        baseOffsetOut = CPUCol2ImTask.getOffsetUnsafe4(outArrayOffset, outShape, outStride, outIndices);
                        if (inStride2 <= inStride3) {
                            for (patchX2 = 0; patchX2 < this.kernelWidth; ++patchX2) {
                                for (patchY2 = 0; patchY2 < this.kernelHeight; ++patchY2) {
                                    int n = baseOffsetOut + patchY2 * outStride2 + patchX2 * outStride3;
                                    dOut[n] = dOut[n] + dIn[baseOffsetIn + patchY2 * inStride2 + patchX2 * inStride3];
                                }
                            }
                            continue;
                        }
                        for (patchY = 0; patchY < this.kernelHeight; ++patchY) {
                            for (patchX = 0; patchX < this.kernelWidth; ++patchX) {
                                int n = baseOffsetOut + patchY * outStride2 + patchX * outStride3;
                                dOut[n] = dOut[n] + dIn[baseOffsetIn + patchY * inStride2 + patchX * inStride3];
                            }
                        }
                    }
                }
            }
        }
    }

    private void doDirectFloat() {
        DataBuffer dbCol = this.col.data();
        DataBuffer dbOut = this.imgOut.data();
        int outArrayOffset = this.imgOut.offset();
        int[] outShape = this.imgOut.shape();
        int[] outStride = this.imgOut.stride();
        int inOffset = this.col.offset();
        int[] inShape = this.col.shape();
        int[] inStride = this.col.stride();
        int[] outIndices = new int[4];
        int[] inIndices = new int[6];
        int inStride2_times4 = inStride[2] * 4;
        int inStride3_times4 = inStride[3] * 4;
        int outStride2_times4 = outStride[2] * 4;
        int outStride3_times4 = outStride[3] * 4;
        int outShape2 = outShape[2];
        int outShape3 = outShape[3];
        int yOutTo = inShape[4];
        int xOutTo = inShape[5];
        boolean padding = this.padHeight > 0 || this.padWidth > 0;
        ByteBuf nbbIn = dbCol.asNetty();
        ByteBuf nbbOut = dbOut.asNetty();
        for (int ex = this.exampleFrom; ex < this.exampleTo; ++ex) {
            for (int d = this.depthFrom; d < this.depthTo; ++d) {
                inIndices[0] = ex;
                inIndices[1] = d;
                outIndices[0] = ex;
                outIndices[1] = d;
                for (int x = 0; x < xOutTo; ++x) {
                    for (int y = 0; y < yOutTo; ++y) {
                        int patchX;
                        int inBufferIdxYBytes;
                        int outBufferIdxYBytes;
                        int patchY;
                        int byteOffset;
                        int patchY2;
                        int inBufferIdxXBytes;
                        int outBufferIdxXBytes;
                        int patchX2;
                        int baseOffsetOutBytes;
                        int j;
                        int i;
                        inIndices[4] = y;
                        inIndices[5] = x;
                        int baseOffsetInBytes = 4 * CPUCol2ImTask.getOffsetUnsafe6(inOffset, inShape, inStride, inIndices);
                        if (padding) {
                            i = y * this.strideY - this.padHeight;
                            j = x * this.strideX - this.padWidth;
                            outIndices[2] = i;
                            outIndices[3] = j;
                            baseOffsetOutBytes = 4 * CPUCol2ImTask.getOffsetUnsafe4(outArrayOffset, outShape, outStride, outIndices);
                            if (inStride2_times4 <= inStride3_times4) {
                                for (patchX2 = 0; patchX2 < this.kernelWidth; ++patchX2) {
                                    if (j + patchX2 < 0 || j + patchX2 >= outShape3) continue;
                                    outBufferIdxXBytes = baseOffsetOutBytes + patchX2 * outStride3_times4;
                                    inBufferIdxXBytes = baseOffsetInBytes + patchX2 * inStride3_times4;
                                    for (patchY2 = 0; patchY2 < this.kernelHeight; ++patchY2) {
                                        if (i + patchY2 < 0 || i + patchY2 >= outShape2) continue;
                                        byteOffset = outBufferIdxXBytes + patchY2 * outStride2_times4;
                                        nbbOut.setFloat(byteOffset, nbbOut.getFloat(byteOffset) + nbbIn.getFloat(inBufferIdxXBytes + patchY2 * inStride2_times4));
                                    }
                                }
                                continue;
                            }
                            for (patchY = 0; patchY < this.kernelHeight; ++patchY) {
                                if (i + patchY < 0 || i + patchY >= outShape2) continue;
                                outBufferIdxYBytes = baseOffsetOutBytes + patchY * outStride2_times4;
                                inBufferIdxYBytes = baseOffsetInBytes + patchY * inStride2_times4;
                                for (patchX = 0; patchX < this.kernelWidth; ++patchX) {
                                    if (j + patchX < 0 || j + patchX >= outShape3) continue;
                                    byteOffset = outBufferIdxYBytes + patchX * outStride3_times4;
                                    nbbOut.setFloat(byteOffset, nbbOut.getFloat(byteOffset) + nbbIn.getFloat(inBufferIdxYBytes + patchX * inStride3_times4));
                                }
                            }
                            continue;
                        }
                        i = y * this.strideY;
                        j = x * this.strideX;
                        outIndices[2] = i;
                        outIndices[3] = j;
                        baseOffsetOutBytes = 4 * CPUCol2ImTask.getOffsetUnsafe4(outArrayOffset, outShape, outStride, outIndices);
                        if (inStride2_times4 <= inStride3_times4) {
                            for (patchX2 = 0; patchX2 < this.kernelWidth; ++patchX2) {
                                outBufferIdxXBytes = baseOffsetOutBytes + patchX2 * outStride3_times4;
                                inBufferIdxXBytes = baseOffsetInBytes + patchX2 * inStride3_times4;
                                for (patchY2 = 0; patchY2 < this.kernelHeight; ++patchY2) {
                                    byteOffset = outBufferIdxXBytes + patchY2 * outStride2_times4;
                                    nbbOut.setFloat(byteOffset, nbbOut.getFloat(byteOffset) + nbbIn.getFloat(inBufferIdxXBytes + patchY2 * inStride2_times4));
                                }
                            }
                            continue;
                        }
                        for (patchY = 0; patchY < this.kernelHeight; ++patchY) {
                            outBufferIdxYBytes = baseOffsetOutBytes + patchY * outStride2_times4;
                            inBufferIdxYBytes = baseOffsetInBytes + patchY * inStride2_times4;
                            for (patchX = 0; patchX < this.kernelWidth; ++patchX) {
                                byteOffset = outBufferIdxYBytes + patchX * outStride3_times4;
                                nbbOut.setFloat(byteOffset, nbbOut.getFloat(byteOffset) + nbbIn.getFloat(inBufferIdxYBytes + patchX * inStride3_times4));
                            }
                        }
                    }
                }
            }
        }
    }

    private void doDirectDouble() {
        DataBuffer dbCol = this.col.data();
        DataBuffer dbOut = this.imgOut.data();
        int outArrayOffset = this.imgOut.offset();
        int[] outShape = this.imgOut.shape();
        int[] outStride = this.imgOut.stride();
        int inOffset = this.col.offset();
        int[] inShape = this.col.shape();
        int[] inStride = this.col.stride();
        int[] outIndices = new int[4];
        int[] inIndices = new int[6];
        int inStride2_times8 = inStride[2] * 8;
        int inStride3_times8 = inStride[3] * 8;
        int outStride2_times8 = outStride[2] * 8;
        int outStride3_times8 = outStride[3] * 8;
        int outShape2 = outShape[2];
        int outShape3 = outShape[3];
        int yOutTo = inShape[4];
        int xOutTo = inShape[5];
        boolean padding = this.padHeight > 0 || this.padWidth > 0;
        ByteBuf nbbIn = dbCol.asNetty();
        ByteBuf nbbOut = dbOut.asNetty();
        for (int ex = this.exampleFrom; ex < this.exampleTo; ++ex) {
            for (int d = this.depthFrom; d < this.depthTo; ++d) {
                inIndices[0] = ex;
                inIndices[1] = d;
                outIndices[0] = ex;
                outIndices[1] = d;
                for (int x = 0; x < xOutTo; ++x) {
                    for (int y = 0; y < yOutTo; ++y) {
                        int patchX;
                        int inBufferIdxYBytes;
                        int outBufferIdxYBytes;
                        int patchY;
                        int byteOffset;
                        int patchY2;
                        int inBufferIdxXBytes;
                        int outBufferIdxXBytes;
                        int patchX2;
                        int baseOffsetOutBytes;
                        int j;
                        int i;
                        inIndices[4] = y;
                        inIndices[5] = x;
                        int baseOffsetInBytes = 8 * CPUCol2ImTask.getOffsetUnsafe6(inOffset, inShape, inStride, inIndices);
                        if (padding) {
                            i = y * this.strideY - this.padHeight;
                            j = x * this.strideX - this.padWidth;
                            outIndices[2] = i;
                            outIndices[3] = j;
                            baseOffsetOutBytes = 8 * CPUCol2ImTask.getOffsetUnsafe4(outArrayOffset, outShape, outStride, outIndices);
                            if (inStride2_times8 <= inStride3_times8) {
                                for (patchX2 = 0; patchX2 < this.kernelWidth; ++patchX2) {
                                    if (j + patchX2 < 0 || j + patchX2 >= outShape3) continue;
                                    outBufferIdxXBytes = baseOffsetOutBytes + patchX2 * outStride3_times8;
                                    inBufferIdxXBytes = baseOffsetInBytes + patchX2 * inStride3_times8;
                                    for (patchY2 = 0; patchY2 < this.kernelHeight; ++patchY2) {
                                        if (i + patchY2 < 0 || i + patchY2 >= outShape2) continue;
                                        byteOffset = outBufferIdxXBytes + patchY2 * outStride2_times8;
                                        nbbOut.setDouble(byteOffset, nbbOut.getDouble(byteOffset) + nbbIn.getDouble(inBufferIdxXBytes + patchY2 * inStride2_times8));
                                    }
                                }
                                continue;
                            }
                            for (patchY = 0; patchY < this.kernelHeight; ++patchY) {
                                if (i + patchY < 0 || i + patchY >= outShape2) continue;
                                outBufferIdxYBytes = baseOffsetOutBytes + patchY * outStride2_times8;
                                inBufferIdxYBytes = baseOffsetInBytes + patchY * inStride2_times8;
                                for (patchX = 0; patchX < this.kernelWidth; ++patchX) {
                                    if (j + patchX < 0 || j + patchX >= outShape3) continue;
                                    byteOffset = outBufferIdxYBytes + patchX * outStride3_times8;
                                    nbbOut.setDouble(byteOffset, nbbOut.getDouble(byteOffset) + nbbIn.getDouble(inBufferIdxYBytes + patchX * inStride3_times8));
                                }
                            }
                            continue;
                        }
                        i = y * this.strideY;
                        j = x * this.strideX;
                        outIndices[2] = i;
                        outIndices[3] = j;
                        baseOffsetOutBytes = 8 * CPUCol2ImTask.getOffsetUnsafe4(outArrayOffset, outShape, outStride, outIndices);
                        if (inStride2_times8 <= inStride3_times8) {
                            for (patchX2 = 0; patchX2 < this.kernelWidth; ++patchX2) {
                                outBufferIdxXBytes = baseOffsetOutBytes + patchX2 * outStride3_times8;
                                inBufferIdxXBytes = baseOffsetInBytes + patchX2 * inStride3_times8;
                                for (patchY2 = 0; patchY2 < this.kernelHeight; ++patchY2) {
                                    byteOffset = outBufferIdxXBytes + patchY2 * outStride2_times8;
                                    nbbOut.setDouble(byteOffset, nbbOut.getDouble(byteOffset) + nbbIn.getDouble(inBufferIdxXBytes + patchY2 * inStride2_times8));
                                }
                            }
                            continue;
                        }
                        for (patchY = 0; patchY < this.kernelHeight; ++patchY) {
                            outBufferIdxYBytes = baseOffsetOutBytes + patchY * outStride2_times8;
                            inBufferIdxYBytes = baseOffsetInBytes + patchY * inStride2_times8;
                            for (patchX = 0; patchX < this.kernelWidth; ++patchX) {
                                byteOffset = outBufferIdxYBytes + patchX * outStride3_times8;
                                nbbOut.setDouble(byteOffset, nbbOut.getDouble(byteOffset) + nbbIn.getDouble(inBufferIdxYBytes + patchX * inStride3_times8));
                            }
                        }
                    }
                }
            }
        }
    }

    private static int getOffsetUnsafe4(int baseOffset, int[] shape, int[] stride, int[] indices) {
        int offset = baseOffset;
        if (shape[0] != 1) {
            offset += indices[0] * stride[0];
        }
        if (shape[1] != 1) {
            offset += indices[1] * stride[1];
        }
        if (shape[2] != 1) {
            offset += indices[2] * stride[2];
        }
        if (shape[3] != 1) {
            offset += indices[3] * stride[3];
        }
        return offset;
    }

    private static int getOffsetUnsafe6(int baseOffset, int[] shape, int[] stride, int[] indices) {
        int offset = baseOffset;
        if (shape[0] != 1) {
            offset += indices[0] * stride[0];
        }
        if (shape[1] != 1) {
            offset += indices[1] * stride[1];
        }
        if (shape[4] != 1) {
            offset += indices[4] * stride[4];
        }
        if (shape[5] != 1) {
            offset += indices[5] * stride[5];
        }
        return offset;
    }

    @Override
    public INDArray invokeBlocking() {
        this.invokeAsync();
        return this.blockUntilComplete();
    }

    @Override
    public void invokeAsync() {
        this.future = TaskExecutorProvider.getTaskExecutor().executeAsync(this);
    }

    @Override
    public INDArray blockUntilComplete() {
        try {
            this.future.get();
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
        if (this.subTasks != null) {
            for (CPUCol2ImTask task : this.subTasks) {
                task.blockUntilComplete();
            }
        }
        return this.imgOut;
    }
}

