package it.unimi.dsi.big.mg4j.document;

import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.UnflaggedOption;
import it.unimi.dsi.big.mg4j.document.PropertyBasedDocumentFactory;
import it.unimi.dsi.big.mg4j.util.MG4JClassParser;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.objects.Reference2ObjectArrayMap;
import it.unimi.dsi.io.MultipleInputStream;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.ObjectInputStream;
import java.io.Serializable;
import java.lang.reflect.InvocationTargetException;

/* loaded from: input_file:it/unimi/dsi/big/mg4j/document/CSVDocumentCollection.class */
public class CSVDocumentCollection extends AbstractDocumentSequence implements Serializable {
    private static final long serialVersionUID = 1;
    private final String fileName;
    private final String separator;
    private final String[] column;
    private final int titleColumn;
    private final DocumentFactory factory;
    private transient BufferedReader reader;
    private transient int readLines;

    public CSVDocumentCollection(String str, String str2, String[] strArr, int i, DocumentFactory documentFactory) throws FileNotFoundException {
        this.fileName = str;
        this.separator = str2;
        this.column = strArr;
        if (i >= strArr.length) {
            throw new IllegalArgumentException("The title column (" + i + ") is larger than or equal to the number of columns (" + strArr.length + ")");
        }
        this.titleColumn = i;
        this.factory = documentFactory;
        this.reader = new BufferedReader(new InputStreamReader(new FileInputStream(str)));
        this.readLines = -1;
    }

    private void readObject(ObjectInputStream objectInputStream) throws IOException, ClassNotFoundException {
        objectInputStream.defaultReadObject();
        this.reader = new BufferedReader(new InputStreamReader(new FileInputStream(this.fileName)));
        this.readLines = -1;
    }

    @Override // it.unimi.dsi.big.mg4j.document.DocumentSequence
    public DocumentIterator iterator() {
        return new AbstractDocumentIterator() { // from class: it.unimi.dsi.big.mg4j.document.CSVDocumentCollection.1
            final Reference2ObjectArrayMap<Enum<?>, Object> metadata = new Reference2ObjectArrayMap<>(2);

            @Override // it.unimi.dsi.big.mg4j.document.DocumentIterator
            public Document nextDocument() throws IOException {
                String readLine = CSVDocumentCollection.this.reader.readLine();
                if (readLine == null) {
                    return null;
                }
                CSVDocumentCollection.access$108(CSVDocumentCollection.this);
                String[] split = readLine.split(CSVDocumentCollection.this.separator);
                if (split.length != CSVDocumentCollection.this.column.length) {
                    throw new IOException("Line " + CSVDocumentCollection.this.readLines + " has less (" + split.length + ") fields than the number of columns (" + CSVDocumentCollection.this.column.length + ").");
                }
                InputStream[] inputStreamArr = new InputStream[CSVDocumentCollection.this.column.length];
                for (int i = 0; i < CSVDocumentCollection.this.column.length; i++) {
                    inputStreamArr[i] = new ByteArrayInputStream(split[i].getBytes());
                }
                this.metadata.put(PropertyBasedDocumentFactory.MetadataKeys.TITLE, CSVDocumentCollection.this.titleColumn >= 0 ? split[CSVDocumentCollection.this.titleColumn] : Integer.toString(CSVDocumentCollection.this.readLines));
                this.metadata.put(PropertyBasedDocumentFactory.MetadataKeys.URI, Integer.toString(CSVDocumentCollection.this.readLines));
                return CSVDocumentCollection.this.factory.getDocument(MultipleInputStream.getStream(inputStreamArr), this.metadata);
            }
        };
    }

    @Override // it.unimi.dsi.big.mg4j.document.DocumentSequence
    public DocumentFactory factory() {
        return this.factory;
    }

    @Override // it.unimi.dsi.big.mg4j.document.AbstractDocumentSequence, it.unimi.dsi.big.mg4j.document.DocumentSequence, java.io.Closeable, java.lang.AutoCloseable
    public void close() throws IOException {
        super.close();
        this.reader.close();
    }

    public static void main(String[] strArr) throws JSAPException, IllegalAccessException, InvocationTargetException, NoSuchMethodException, IOException, InstantiationException {
        SimpleJSAP simpleJSAP = new SimpleJSAP(JdbcDocumentCollection.class.getName(), "Saves a serialised document collection based on a set of database rows.", new Parameter[]{new FlaggedOption("separator", JSAP.STRING_PARSER, ",", false, 's', "separator", "The regexp used to split lines into fields."), new FlaggedOption("factory", MG4JClassParser.getParser(), IdentityDocumentFactory.class.getName(), false, 'f', "factory", "A document factory with a standard constructor."), new FlaggedOption("property", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'p', "property", "A 'key=value' specification, or the name of a property file").setAllowMultipleDeclarations(true), new FlaggedOption("titleColumn", JSAP.INTEGER_PARSER, "-1", false, 't', "title-column", "The index of the column to be used as a title (starting from 0)."), new UnflaggedOption("collection", JSAP.STRING_PARSER, true, "The filename for the serialised collection."), new UnflaggedOption("fileName", JSAP.STRING_PARSER, true, "The filename of the source CSV file."), new UnflaggedOption("column", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, true, true, "Columns names that will be indexed.")});
        JSAPResult parse = simpleJSAP.parse(strArr);
        if (simpleJSAP.messagePrinted()) {
            return;
        }
        int i = parse.getInt("titleColumn");
        String string = parse.getString("collection");
        String string2 = parse.getString("fileName");
        String string3 = parse.getString("separator").equals("\\t") ? "\t" : parse.getString("separator");
        String[] stringArray = parse.getStringArray("column");
        DocumentFactory[] documentFactoryArr = new DocumentFactory[stringArray.length];
        for (int i2 = 0; i2 < documentFactoryArr.length; i2++) {
            documentFactoryArr[i2] = PropertyBasedDocumentFactory.getInstance((Class<?>) parse.getClass("factory"), parse.getStringArray("property"));
        }
        BinIO.storeObject(new CSVDocumentCollection(string2, string3, stringArray, i, CompositeDocumentFactory.getFactory(documentFactoryArr, stringArray)), string);
    }

    static /* synthetic */ int access$108(CSVDocumentCollection cSVDocumentCollection) {
        int i = cSVDocumentCollection.readLines;
        cSVDocumentCollection.readLines = i + 1;
        return i;
    }
}
