package org.archive.hadoop;

import java.io.IOException;
import java.util.logging.Logger;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.archive.extract.ExtractingResourceFactoryMapper;
import org.archive.extract.ExtractingResourceProducer;
import org.archive.format.gzip.GZIPMemberSeries;
import org.archive.resource.MetaData;
import org.archive.resource.Resource;
import org.archive.resource.ResourceParseException;
import org.archive.resource.ResourceProducer;
import org.archive.resource.TransformingResourceProducer;
import org.archive.resource.arc.ARCResourceFactory;
import org.archive.resource.gzip.GZIPResourceContainer;
import org.archive.resource.warc.WARCResourceFactory;
import org.archive.streamcontext.HDFSStream;
import org.archive.streamcontext.Stream;
import org.archive.util.StreamCopy;

/* loaded from: input_file:WEB-INF/lib/ia-web-commons-1.0-SNAPSHOT.jar:org/archive/hadoop/ResourceRecordReader.class */
public class ResourceRecordReader extends RecordReader<ResourceContext, MetaData> {
    private static final Logger LOG = Logger.getLogger(ResourceRecordReader.class.getName());
    WARCResourceFactory wf = new WARCResourceFactory();
    ARCResourceFactory af = new ARCResourceFactory();
    Stream stream;
    GZIPMemberSeries series;
    private ResourceProducer producer;
    private String name;
    private long startOffset;
    private long length;
    private ResourceContext cachedK;
    private MetaData cachedV;

    @Override // org.apache.hadoop.mapreduce.RecordReader, java.io.Closeable, java.lang.AutoCloseable
    public void close() throws IOException {
        this.producer.close();
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // org.apache.hadoop.mapreduce.RecordReader
    public ResourceContext getCurrentKey() throws IOException, InterruptedException {
        return this.cachedK;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // org.apache.hadoop.mapreduce.RecordReader
    public MetaData getCurrentValue() throws IOException, InterruptedException {
        return this.cachedV;
    }

    @Override // org.apache.hadoop.mapreduce.RecordReader
    public float getProgress() throws IOException, InterruptedException {
        if (this.length == 0) {
            return 0.0f;
        }
        return ((float) (this.stream.getOffset() - this.startOffset)) / ((float) this.length);
    }

    @Override // org.apache.hadoop.mapreduce.RecordReader
    public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
        TransformingResourceProducer transformingResourceProducer;
        if (!(inputSplit instanceof FileSplit)) {
            throw new IOException("Need FileSplit input...");
        }
        FileSplit fileSplit = (FileSplit) inputSplit;
        Path path = fileSplit.getPath();
        FSDataInputStream open = path.getFileSystem(taskAttemptContext.getConfiguration()).open(path);
        String name = path.getName();
        this.name = path.getName();
        this.stream = new HDFSStream(open);
        this.startOffset = fileSplit.getStart();
        this.length = fileSplit.getLength();
        long j = this.startOffset + this.length;
        this.stream.setOffset(this.startOffset);
        this.series = new GZIPMemberSeries(this.stream, this.name, this.startOffset);
        GZIPResourceContainer gZIPResourceContainer = new GZIPResourceContainer(this.series, j);
        if (name.endsWith(".warc.gz") || name.endsWith(".wat.gz")) {
            transformingResourceProducer = new TransformingResourceProducer(gZIPResourceContainer, this.wf);
        } else {
            if (!name.endsWith(".arc.gz")) {
                throw new IOException("arguments must be arc.gz or warc.gz");
            }
            transformingResourceProducer = new TransformingResourceProducer(gZIPResourceContainer, this.af);
        }
        this.producer = new ExtractingResourceProducer(transformingResourceProducer, new ExtractingResourceFactoryMapper());
    }

    @Override // org.apache.hadoop.mapreduce.RecordReader
    public boolean nextKeyValue() throws IOException, InterruptedException {
        try {
            Resource next = this.producer.getNext();
            if (next == null) {
                return false;
            }
            StreamCopy.readToEOF(next.getInputStream());
            LOG.info(String.format("Extracted offset %d\n", Long.valueOf(this.series.getCurrentMemberStartOffset())));
            this.cachedK = new ResourceContext(this.name, this.series.getCurrentMemberStartOffset());
            this.cachedV = next.getMetaData().getTopMetaData();
            return true;
        } catch (ResourceParseException e) {
            e.printStackTrace();
            throw new IOException(String.format("ResourceParseException at(%s)(%d)", this.name, Long.valueOf(this.series.getCurrentMemberStartOffset())), e);
        }
    }
}
