/*
 * Decompiled with CFR 0.152.
 */
package org.apache.iceberg.mr.mapreduce;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.function.BiFunction;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.iceberg.CombinedScanTask;
import org.apache.iceberg.ContentScanTask;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.DataTableScan;
import org.apache.iceberg.FileScanTask;
import org.apache.iceberg.MetadataColumns;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Schema;
import org.apache.iceberg.SchemaParser;
import org.apache.iceberg.SerializableTable;
import org.apache.iceberg.StructLike;
import org.apache.iceberg.SystemConfigs;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableScan;
import org.apache.iceberg.avro.Avro;
import org.apache.iceberg.common.DynMethods;
import org.apache.iceberg.data.GenericDeleteFilter;
import org.apache.iceberg.data.IdentityPartitionConverters;
import org.apache.iceberg.data.InternalRecordWrapper;
import org.apache.iceberg.data.avro.DataReader;
import org.apache.iceberg.data.orc.GenericOrcReader;
import org.apache.iceberg.data.parquet.GenericParquetReaders;
import org.apache.iceberg.encryption.EncryptedFiles;
import org.apache.iceberg.encryption.EncryptionManager;
import org.apache.iceberg.expressions.Evaluator;
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.hive.HiveVersion;
import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.io.CloseableIterator;
import org.apache.iceberg.io.FileIO;
import org.apache.iceberg.io.InputFile;
import org.apache.iceberg.mapping.NameMappingParser;
import org.apache.iceberg.mr.Catalogs;
import org.apache.iceberg.mr.InputFormatConfig;
import org.apache.iceberg.mr.hive.HiveIcebergStorageHandler;
import org.apache.iceberg.mr.mapreduce.IcebergSplit;
import org.apache.iceberg.orc.ORC;
import org.apache.iceberg.parquet.Parquet;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.TypeUtil;
import org.apache.iceberg.util.PartitionUtil;
import org.apache.iceberg.util.SerializationUtil;
import org.apache.iceberg.util.ThreadPools;
import org.apache.orc.TypeDescription;
import org.apache.parquet.schema.MessageType;

public class IcebergInputFormat<T>
extends InputFormat<Void, T> {
    public static InputFormatConfig.ConfigBuilder configure(Job job) {
        job.setInputFormatClass(IcebergInputFormat.class);
        return new InputFormatConfig.ConfigBuilder(job.getConfiguration());
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public List<InputSplit> getSplits(JobContext context) {
        Configuration conf = context.getConfiguration();
        Table table = Optional.ofNullable(HiveIcebergStorageHandler.table(conf, conf.get("iceberg.mr.table.identifier"))).orElseGet(() -> Catalogs.loadTable(conf));
        ExecutorService workerPool = ThreadPools.newFixedThreadPool((String)"iceberg-plan-worker-pool", (int)conf.getInt(SystemConfigs.WORKER_THREAD_POOL_SIZE.propertyKey(), ThreadPools.WORKER_THREAD_POOL_SIZE));
        try {
            List<InputSplit> list = this.planInputSplits(table, conf, workerPool);
            return list;
        }
        finally {
            workerPool.shutdown();
        }
    }

    private List<InputSplit> planInputSplits(Table table, Configuration conf, ExecutorService workerPool) {
        Expression filter;
        String[] selectedColumns;
        String schemaStr;
        long splitSize;
        long asOfTime;
        TableScan scan = (TableScan)table.newScan().caseSensitive(conf.getBoolean("iceberg.mr.case.sensitive", true));
        long snapshotId = conf.getLong("iceberg.mr.snapshot.id", -1L);
        if (snapshotId != -1L) {
            scan = scan.useSnapshot(snapshotId);
        }
        if ((asOfTime = conf.getLong("iceberg.mr.as.of.time", -1L)) != -1L) {
            scan = scan.asOfTime(asOfTime);
        }
        if ((splitSize = conf.getLong("iceberg.mr.split.size", 0L)) > 0L) {
            scan = (TableScan)scan.option("read.split.target-size", String.valueOf(splitSize));
        }
        if ((schemaStr = conf.get("iceberg.mr.read.schema")) != null) {
            scan.project(SchemaParser.fromJson((String)schemaStr));
        }
        if ((selectedColumns = conf.getStrings("iceberg.mr.selected.columns")) != null) {
            scan.select(selectedColumns);
        }
        if ((filter = (Expression)SerializationUtil.deserializeFromBase64((String)conf.get("iceberg.mr.filter.expression"))) != null) {
            scan = (TableScan)scan.filter(filter);
        }
        ArrayList splits = Lists.newArrayList();
        boolean applyResidual = !conf.getBoolean("skip.residual.filtering", false);
        InputFormatConfig.InMemoryDataModel model = (InputFormatConfig.InMemoryDataModel)conf.getEnum("iceberg.mr.in.memory.data.model", (Enum)InputFormatConfig.InMemoryDataModel.GENERIC);
        scan = (TableScan)scan.planWith(workerPool);
        try (CloseableIterable tasksIterable = scan.planTasks();){
            Table serializableTable = SerializableTable.copyOf((Table)table);
            tasksIterable.forEach(task -> {
                if (applyResidual && (model == InputFormatConfig.InMemoryDataModel.HIVE || model == InputFormatConfig.InMemoryDataModel.PIG)) {
                    IcebergInputFormat.checkResiduals(task);
                }
                splits.add(new IcebergSplit(serializableTable, conf, (CombinedScanTask)task));
            });
        }
        catch (IOException e) {
            throw new UncheckedIOException(String.format("Failed to close table scan: %s", scan), e);
        }
        if (scan instanceof DataTableScan) {
            HiveIcebergStorageHandler.checkAndSkipIoConfigSerialization(conf, table);
        }
        return splits;
    }

    private static void checkResiduals(CombinedScanTask task) {
        task.files().forEach(fileScanTask -> {
            Expression residual = fileScanTask.residual();
            if (residual != null && !residual.equals(Expressions.alwaysTrue())) {
                throw new UnsupportedOperationException(String.format("Filter expression %s is not completely satisfied. Additional rows can be returned not satisfied by the filter expression", residual));
            }
        });
    }

    public RecordReader<Void, T> createRecordReader(InputSplit split, TaskAttemptContext context) {
        return new IcebergRecordReader();
    }

    private static final class IcebergRecordReader<T>
    extends RecordReader<Void, T> {
        private static final String HIVE_VECTORIZED_READER_CLASS = "org.apache.iceberg.mr.hive.vector.HiveVectorizedReader";
        private static final DynMethods.StaticMethod HIVE_VECTORIZED_READER_BUILDER = HiveVersion.min((HiveVersion)HiveVersion.HIVE_3) ? DynMethods.builder((String)"reader").impl("org.apache.iceberg.mr.hive.vector.HiveVectorizedReader", new Class[]{InputFile.class, FileScanTask.class, Map.class, TaskAttemptContext.class}).buildStatic() : null;
        private TaskAttemptContext context;
        private Schema tableSchema;
        private Schema expectedSchema;
        private String nameMapping;
        private boolean reuseContainers;
        private boolean caseSensitive;
        private InputFormatConfig.InMemoryDataModel inMemoryDataModel;
        private Iterator<FileScanTask> tasks;
        private T current;
        private CloseableIterator<T> currentIterator;
        private FileIO io;
        private EncryptionManager encryptionManager;

        private IcebergRecordReader() {
        }

        public void initialize(InputSplit split, TaskAttemptContext newContext) {
            Configuration conf = newContext.getConfiguration();
            CombinedScanTask task = ((IcebergSplit)split).task();
            this.context = newContext;
            Table table = ((IcebergSplit)split).table();
            HiveIcebergStorageHandler.checkAndSetIoConfig(conf, table);
            this.io = table.io();
            this.encryptionManager = table.encryption();
            this.tasks = task.files().iterator();
            this.tableSchema = InputFormatConfig.tableSchema(conf);
            this.nameMapping = (String)table.properties().get("schema.name-mapping.default");
            this.caseSensitive = conf.getBoolean("iceberg.mr.case.sensitive", true);
            this.expectedSchema = IcebergRecordReader.readSchema(conf, this.tableSchema, this.caseSensitive);
            this.reuseContainers = conf.getBoolean("iceberg.mr.reuse.containers", false);
            this.inMemoryDataModel = (InputFormatConfig.InMemoryDataModel)conf.getEnum("iceberg.mr.in.memory.data.model", (Enum)InputFormatConfig.InMemoryDataModel.GENERIC);
            this.currentIterator = this.open(this.tasks.next(), this.expectedSchema).iterator();
        }

        public boolean nextKeyValue() throws IOException {
            while (true) {
                if (this.currentIterator.hasNext()) {
                    this.current = this.currentIterator.next();
                    return true;
                }
                if (!this.tasks.hasNext()) break;
                this.currentIterator.close();
                this.currentIterator = this.open(this.tasks.next(), this.expectedSchema).iterator();
            }
            this.currentIterator.close();
            return false;
        }

        public Void getCurrentKey() {
            return null;
        }

        public T getCurrentValue() {
            return this.current;
        }

        public float getProgress() {
            return this.context.getProgress();
        }

        public void close() throws IOException {
            this.currentIterator.close();
        }

        private CloseableIterable<T> openTask(FileScanTask currentTask, Schema readSchema) {
            CloseableIterable<T> iterable;
            DataFile file = (DataFile)currentTask.file();
            InputFile inputFile = this.encryptionManager.decrypt(EncryptedFiles.encryptedInput((InputFile)this.io.newInputFile(file.path().toString()), (ByteBuffer)file.keyMetadata()));
            switch (file.format()) {
                case AVRO: {
                    iterable = this.newAvroIterable(inputFile, currentTask, readSchema);
                    break;
                }
                case ORC: {
                    iterable = this.newOrcIterable(inputFile, currentTask, readSchema);
                    break;
                }
                case PARQUET: {
                    iterable = this.newParquetIterable(inputFile, currentTask, readSchema);
                    break;
                }
                default: {
                    throw new UnsupportedOperationException(String.format("Cannot read %s file: %s", file.format().name(), file.path()));
                }
            }
            return iterable;
        }

        private CloseableIterable<T> open(FileScanTask currentTask, Schema readSchema) {
            switch (this.inMemoryDataModel) {
                case PIG: {
                    throw new UnsupportedOperationException("Pig and Hive object models are not supported.");
                }
                case HIVE: {
                    return this.openTask(currentTask, readSchema);
                }
                case GENERIC: {
                    GenericDeleteFilter deletes = new GenericDeleteFilter(this.io, currentTask, this.tableSchema, readSchema);
                    Schema requiredSchema = deletes.requiredSchema();
                    return deletes.filter(this.openTask(currentTask, requiredSchema));
                }
            }
            throw new UnsupportedOperationException("Unsupported memory model");
        }

        private CloseableIterable<T> applyResidualFiltering(CloseableIterable<T> iter, Expression residual, Schema readSchema) {
            boolean applyResidual;
            boolean bl = applyResidual = !this.context.getConfiguration().getBoolean("skip.residual.filtering", false);
            if (applyResidual && residual != null && residual != Expressions.alwaysTrue()) {
                InternalRecordWrapper wrapper = new InternalRecordWrapper(readSchema.asStruct());
                Evaluator filter = new Evaluator(readSchema.asStruct(), residual, this.caseSensitive);
                return CloseableIterable.filter(iter, record -> filter.eval((StructLike)wrapper.wrap((StructLike)record)));
            }
            return iter;
        }

        private CloseableIterable<T> newAvroIterable(InputFile inputFile, FileScanTask task, Schema readSchema) {
            Avro.ReadBuilder avroReadBuilder = Avro.read((InputFile)inputFile).project(readSchema).split(task.start(), task.length());
            if (this.reuseContainers) {
                avroReadBuilder.reuseContainers();
            }
            if (this.nameMapping != null) {
                avroReadBuilder.withNameMapping(NameMappingParser.fromJson((String)this.nameMapping));
            }
            switch (this.inMemoryDataModel) {
                case PIG: 
                case HIVE: {
                    throw new UnsupportedOperationException("Avro support not yet supported for Pig and Hive");
                }
                case GENERIC: {
                    avroReadBuilder.createReaderFunc((expIcebergSchema, expAvroSchema) -> DataReader.create((Schema)expIcebergSchema, (org.apache.avro.Schema)expAvroSchema, this.constantsMap(task, IdentityPartitionConverters::convertConstant)));
                }
            }
            return this.applyResidualFiltering((CloseableIterable<T>)avroReadBuilder.build(), task.residual(), readSchema);
        }

        private CloseableIterable<T> newParquetIterable(InputFile inputFile, FileScanTask task, Schema readSchema) {
            Map<Integer, ?> idToConstant = this.constantsMap(task, IdentityPartitionConverters::convertConstant);
            CloseableIterable parquetIterator = null;
            switch (this.inMemoryDataModel) {
                case PIG: {
                    throw new UnsupportedOperationException("Parquet support not yet supported for Pig");
                }
                case HIVE: {
                    if (HiveVersion.min((HiveVersion)HiveVersion.HIVE_3)) {
                        parquetIterator = (CloseableIterable)HIVE_VECTORIZED_READER_BUILDER.invoke(new Object[]{inputFile, task, idToConstant, this.context});
                        break;
                    }
                    throw new UnsupportedOperationException("Vectorized read is unsupported for Hive 2 integration.");
                }
                case GENERIC: {
                    Parquet.ReadBuilder parquetReadBuilder = Parquet.read((InputFile)inputFile).project(readSchema).filter(task.residual()).caseSensitive(this.caseSensitive).split(task.start(), task.length());
                    if (this.reuseContainers) {
                        parquetReadBuilder.reuseContainers();
                    }
                    if (this.nameMapping != null) {
                        parquetReadBuilder.withNameMapping(NameMappingParser.fromJson((String)this.nameMapping));
                    }
                    parquetReadBuilder.createReaderFunc(fileSchema -> GenericParquetReaders.buildReader((Schema)readSchema, (MessageType)fileSchema, this.constantsMap(task, IdentityPartitionConverters::convertConstant)));
                    parquetIterator = parquetReadBuilder.build();
                }
            }
            return this.applyResidualFiltering(parquetIterator, task.residual(), readSchema);
        }

        private CloseableIterable<T> newOrcIterable(InputFile inputFile, FileScanTask task, Schema readSchema) {
            Map<Integer, ?> idToConstant = this.constantsMap(task, IdentityPartitionConverters::convertConstant);
            Schema readSchemaWithoutConstantAndMetadataFields = TypeUtil.selectNot((Schema)readSchema, (Set)Sets.union(idToConstant.keySet(), (Set)MetadataColumns.metadataFieldIds()));
            CloseableIterable orcIterator = null;
            switch (this.inMemoryDataModel) {
                case PIG: {
                    throw new UnsupportedOperationException("ORC support not yet supported for Pig");
                }
                case HIVE: {
                    if (HiveVersion.min((HiveVersion)HiveVersion.HIVE_3)) {
                        orcIterator = (CloseableIterable)HIVE_VECTORIZED_READER_BUILDER.invoke(new Object[]{inputFile, task, idToConstant, this.context});
                        break;
                    }
                    throw new UnsupportedOperationException("Vectorized read is unsupported for Hive 2 integration.");
                }
                case GENERIC: {
                    ORC.ReadBuilder orcReadBuilder = ORC.read((InputFile)inputFile).project(readSchemaWithoutConstantAndMetadataFields).filter(task.residual()).caseSensitive(this.caseSensitive).split(task.start(), task.length());
                    orcReadBuilder.createReaderFunc(fileSchema -> GenericOrcReader.buildReader((Schema)readSchema, (TypeDescription)fileSchema, (Map)idToConstant));
                    if (this.nameMapping != null) {
                        orcReadBuilder.withNameMapping(NameMappingParser.fromJson((String)this.nameMapping));
                    }
                    orcIterator = orcReadBuilder.build();
                }
            }
            return this.applyResidualFiltering(orcIterator, task.residual(), readSchema);
        }

        private Map<Integer, ?> constantsMap(FileScanTask task, BiFunction<Type, Object, Object> converter) {
            boolean projectsIdentityPartitionColumns;
            PartitionSpec spec = task.spec();
            Set idColumns = spec.identitySourceIds();
            Schema partitionSchema = TypeUtil.select((Schema)this.expectedSchema, (Set)idColumns);
            boolean bl = projectsIdentityPartitionColumns = !partitionSchema.columns().isEmpty();
            if (projectsIdentityPartitionColumns) {
                return PartitionUtil.constantsMap((ContentScanTask)task, converter);
            }
            return Collections.emptyMap();
        }

        private static Schema readSchema(Configuration conf, Schema tableSchema, boolean caseSensitive) {
            Schema readSchema = InputFormatConfig.readSchema(conf);
            if (readSchema != null) {
                return readSchema;
            }
            String[] selectedColumns = InputFormatConfig.selectedColumns(conf);
            if (selectedColumns == null) {
                return tableSchema;
            }
            return caseSensitive ? tableSchema.select(selectedColumns) : tableSchema.caseInsensitiveSelect(selectedColumns);
        }
    }
}

