/*
 * Decompiled with CFR 0.152.
 */
package org.apache.iceberg.mr.hive.vector;

import com.google.common.collect.Lists;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.io.encoded.MemoryBufferOrBuffers;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.llap.io.api.LlapProxy;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.io.SyntheticFileId;
import org.apache.hadoop.hive.ql.io.orc.OrcSplit;
import org.apache.hadoop.hive.ql.io.orc.VectorizedOrcInputFormat;
import org.apache.hadoop.hive.ql.io.parquet.VectorizedParquetInputFormat;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hive.iceberg.org.apache.orc.OrcConf;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.FileContent;
import org.apache.iceberg.FileFormat;
import org.apache.iceberg.FileScanTask;
import org.apache.iceberg.MetadataColumns;
import org.apache.iceberg.PartitionField;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Schema;
import org.apache.iceberg.Table;
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.io.CloseableIterator;
import org.apache.iceberg.mr.InputFormatConfig;
import org.apache.iceberg.mr.hive.HiveIcebergInputFormat;
import org.apache.iceberg.mr.hive.vector.HiveBatchContext;
import org.apache.iceberg.mr.hive.vector.HiveBatchIterator;
import org.apache.iceberg.mr.hive.vector.HiveDeleteFilter;
import org.apache.iceberg.mr.hive.vector.ParquetSchemaFieldNameVisitor;
import org.apache.iceberg.mr.mapred.MapredIcebergInputFormat;
import org.apache.iceberg.orc.VectorizedReadUtils;
import org.apache.iceberg.parquet.ParquetFooterInputFromCache;
import org.apache.iceberg.parquet.ParquetSchemaUtil;
import org.apache.iceberg.parquet.TypeWithSchemaVisitor;
import org.apache.iceberg.types.Types;
import org.apache.orc.impl.OrcTail;
import org.apache.parquet.format.converter.ParquetMetadataConverter;
import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.parquet.schema.MessageType;

public class HiveVectorizedReader {
    private HiveVectorizedReader() {
    }

    public static CloseableIterable<HiveBatchContext> reader(Table table, Path path, FileScanTask task, Map<Integer, ?> idToConstant, TaskAttemptContext context, Expression residual, Schema readSchema) {
        HiveDeleteFilter deleteFilter = null;
        Schema requiredSchema = readSchema;
        if (!task.deletes().isEmpty()) {
            deleteFilter = new HiveDeleteFilter(table.io(), task, table.schema(), HiveVectorizedReader.prepareSchemaForDeleteFilter(readSchema), context.getConfiguration());
            requiredSchema = deleteFilter.requiredSchema();
            if (task.deletes().stream().anyMatch(d -> d.content() == FileContent.EQUALITY_DELETES)) {
                throw new UnsupportedOperationException("Vectorized reading with equality deletes is not supported yet.");
            }
        }
        JobConf job = new JobConf(context.getConfiguration());
        FileFormat format = ((DataFile)task.file()).format();
        Reporter reporter = ((MapredIcebergInputFormat.CompatibilityTaskAttemptContextImpl)context).getLegacyReporter();
        int[] partitionColIndices = null;
        Object[] partitionValues = null;
        PartitionSpec partitionSpec = task.spec();
        List readColumnIds = ColumnProjectionUtils.getReadColumnIDs((Configuration)job);
        if (!partitionSpec.isUnpartitioned()) {
            List<PartitionField> fields = partitionSpec.fields();
            LinkedList<Integer> partitionColIndicesList = org.apache.iceberg.relocated.com.google.common.collect.Lists.newLinkedList();
            LinkedList<?> partitionValuesList = org.apache.iceberg.relocated.com.google.common.collect.Lists.newLinkedList();
            block6: for (PartitionField partitionField : fields) {
                if (!partitionField.transform().isIdentity()) continue;
                List<Types.NestedField> columns = task.spec().schema().columns();
                for (int colIdx = 0; colIdx < columns.size(); ++colIdx) {
                    if (columns.get(colIdx).fieldId() != partitionField.sourceId()) continue;
                    readColumnIds.remove((Object)colIdx);
                    partitionColIndicesList.add(colIdx);
                    partitionValuesList.add(idToConstant.get(partitionField.sourceId()));
                    continue block6;
                }
            }
            partitionColIndices = ArrayUtils.toPrimitive((Integer[])partitionColIndicesList.toArray(new Integer[0]));
            partitionValues = partitionValuesList.toArray(new Object[0]);
            ColumnProjectionUtils.setReadColumns((Configuration)job, (List)readColumnIds);
        }
        try {
            long start = task.start();
            long length = task.length();
            SyntheticFileId fileId = new SyntheticFileId(path, ((DataFile)task.file()).fileSizeInBytes(), Long.MIN_VALUE);
            fileId.toJobConf(job);
            RecordReader<NullWritable, VectorizedRowBatch> recordReader = null;
            switch (format) {
                case ORC: {
                    recordReader = HiveVectorizedReader.orcRecordReader(job, reporter, task, path, start, length, readColumnIds, fileId, residual, table.name());
                    break;
                }
                case PARQUET: {
                    recordReader = HiveVectorizedReader.parquetRecordReader(job, reporter, task, path, start, length, fileId);
                    break;
                }
                default: {
                    throw new UnsupportedOperationException("Vectorized Hive reading unimplemented for format: " + format);
                }
            }
            CloseableIterable<HiveBatchContext> vrbIterable = HiveVectorizedReader.createVectorizedRowBatchIterable(recordReader, job, partitionColIndices, partitionValues, idToConstant);
            return deleteFilter != null ? deleteFilter.filterBatch(vrbIterable) : vrbIterable;
        }
        catch (IOException ioe) {
            throw new RuntimeException("Error creating vectorized record reader for " + path, ioe);
        }
    }

    private static RecordReader<NullWritable, VectorizedRowBatch> orcRecordReader(JobConf job, Reporter reporter, FileScanTask task, Path path, long start, long length, List<Integer> readColumnIds, SyntheticFileId fileId, Expression residual, String tableName) throws IOException {
        RecordReader recordReader = null;
        job.setBoolean(OrcConf.FORCE_POSITIONAL_EVOLUTION.getHiveConfName(), false);
        ByteBuffer serializedOrcTail = VectorizedReadUtils.getSerializedOrcTail(path, fileId, job);
        OrcTail orcTail = VectorizedReadUtils.deserializeToOrcTail(serializedOrcTail);
        VectorizedReadUtils.handleIcebergProjection(task, job, VectorizedReadUtils.deserializeToShadedOrcTail(serializedOrcTail).getSchema(), residual);
        if (HiveConf.getBoolVar((Configuration)job, (HiveConf.ConfVars)HiveConf.ConfVars.LLAP_IO_ENABLED, (boolean)LlapProxy.isDaemon()) && LlapProxy.getIo() != null && task.deletes().isEmpty() && !InputFormatConfig.fetchVirtualColumns((Configuration)job)) {
            boolean isDisableVectorization = job.getBoolean(HiveIcebergInputFormat.getVectorizationConfName(tableName), false);
            if (isDisableVectorization) {
                HiveConf.setVar((Configuration)job, (HiveConf.ConfVars)HiveConf.ConfVars.HIVE_VECTORIZED_INPUT_FORMAT_SUPPORTS_ENABLED, (String)"");
            }
            recordReader = LlapProxy.getIo().llapVectorizedOrcReaderForPath((Object)fileId, path, null, readColumnIds, job, start, length, reporter);
        }
        if (recordReader == null) {
            OrcSplit split = new OrcSplit(path, (Object)fileId, start, length, (String[])null, orcTail, false, false, (List)Lists.newArrayList(), 0L, length, path.getParent(), null);
            recordReader = new VectorizedOrcInputFormat().getRecordReader((InputSplit)split, job, reporter);
        }
        return recordReader;
    }

    private static RecordReader<NullWritable, VectorizedRowBatch> parquetRecordReader(JobConf job, Reporter reporter, FileScanTask task, Path path, long start, long length, SyntheticFileId fileId) throws IOException {
        FileSplit split = new FileSplit(path, start, length, job);
        VectorizedParquetInputFormat inputFormat = new VectorizedParquetInputFormat();
        MemoryBufferOrBuffers footerData = null;
        if (HiveConf.getBoolVar((Configuration)job, (HiveConf.ConfVars)HiveConf.ConfVars.LLAP_IO_ENABLED, (boolean)LlapProxy.isDaemon()) && LlapProxy.getIo() != null && LlapProxy.getIo().usingLowLevelCache()) {
            LlapProxy.getIo().initCacheOnlyInputFormat((InputFormat)inputFormat);
            footerData = LlapProxy.getIo().getParquetFooterBuffersFromCache(path, job, (Object)fileId);
        }
        ParquetMetadata parquetMetadata = footerData != null ? ParquetFileReader.readFooter(new ParquetFooterInputFromCache(footerData), ParquetMetadataConverter.NO_FILTER) : ParquetFileReader.readFooter((Configuration)job, path);
        inputFormat.setMetadata(parquetMetadata);
        MessageType fileSchema = parquetMetadata.getFileMetaData().getSchema();
        MessageType typeWithIds = null;
        Schema expectedSchema = task.spec().schema();
        typeWithIds = ParquetSchemaUtil.hasIds(fileSchema) ? ParquetSchemaUtil.pruneColumns(fileSchema, expectedSchema) : ParquetSchemaUtil.pruneColumnsFallback(ParquetSchemaUtil.addFallbackIds(fileSchema), expectedSchema);
        ParquetSchemaFieldNameVisitor psv = new ParquetSchemaFieldNameVisitor(fileSchema);
        TypeWithSchemaVisitor.visit(expectedSchema.asStruct(), typeWithIds, psv);
        job.set("columns", psv.retrieveColumnNameList());
        return inputFormat.getRecordReader((InputSplit)split, job, reporter);
    }

    private static CloseableIterable<HiveBatchContext> createVectorizedRowBatchIterable(RecordReader<NullWritable, VectorizedRowBatch> hiveRecordReader, JobConf job, int[] partitionColIndices, Object[] partitionValues, Map<Integer, ?> idToConstant) {
        final HiveBatchIterator iterator = new HiveBatchIterator(hiveRecordReader, job, partitionColIndices, partitionValues, idToConstant);
        return new CloseableIterable<HiveBatchContext>(){

            @Override
            public CloseableIterator iterator() {
                return iterator;
            }

            @Override
            public void close() throws IOException {
                iterator.close();
            }
        };
    }

    private static Schema prepareSchemaForDeleteFilter(Schema schema) {
        ArrayList<Types.NestedField> columns = org.apache.iceberg.relocated.com.google.common.collect.Lists.newArrayList(schema.columns());
        columns.add(MetadataColumns.IS_DELETED);
        return new Schema(columns);
    }
}

