/*
 * Decompiled with CFR 0.152.
 */
package org.openpdf.text.pdf.parser;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import org.openpdf.text.ExceptionConverter;
import org.openpdf.text.pdf.PRIndirectReference;
import org.openpdf.text.pdf.PRStream;
import org.openpdf.text.pdf.PRTokeniser;
import org.openpdf.text.pdf.PdfArray;
import org.openpdf.text.pdf.PdfContentParser;
import org.openpdf.text.pdf.PdfDictionary;
import org.openpdf.text.pdf.PdfLiteral;
import org.openpdf.text.pdf.PdfName;
import org.openpdf.text.pdf.PdfObject;
import org.openpdf.text.pdf.PdfReader;
import org.openpdf.text.pdf.RandomAccessFileOrArray;
import org.openpdf.text.pdf.parser.MarkedUpTextAssembler;
import org.openpdf.text.pdf.parser.PdfContentStreamHandler;
import org.openpdf.text.pdf.parser.TextAssembler;

public class PdfTextExtractor {
    private final PdfReader reader;
    private final TextAssembler renderListener;

    public PdfTextExtractor(PdfReader reader) {
        this(reader, new MarkedUpTextAssembler(reader));
    }

    public PdfTextExtractor(PdfReader reader, boolean usePdfMarkupElements) {
        this(reader, new MarkedUpTextAssembler(reader, usePdfMarkupElements));
    }

    public PdfTextExtractor(PdfReader reader, TextAssembler renderListener) {
        this.reader = reader;
        this.renderListener = renderListener;
    }

    private byte[] getContentBytesForPage(int pageNum) throws IOException {
        try (RandomAccessFileOrArray ignored = this.reader.getSafeFile();){
            PdfDictionary pageDictionary = this.reader.getPageN(pageNum);
            PdfObject contentObject = pageDictionary.get(PdfName.CONTENTS);
            byte[] byArray = this.getContentBytesFromContentObject(contentObject);
            return byArray;
        }
    }

    private byte[] getContentBytesFromContentObject(PdfObject contentObject) throws IOException {
        return switch (contentObject.type()) {
            case 10 -> {
                PRIndirectReference ref = (PRIndirectReference)contentObject;
                PdfObject directObject = PdfReader.getPdfObject(ref);
                yield this.getContentBytesFromContentObject(directObject);
            }
            case 7 -> {
                PRStream stream = (PRStream)PdfReader.getPdfObject(contentObject);
                yield PdfReader.getStreamBytes(stream);
            }
            case 5 -> {
                ByteArrayOutputStream allBytes = new ByteArrayOutputStream();
                PdfArray contentArray = (PdfArray)contentObject;
                for (PdfObject pdfObject : contentArray.getElements()) {
                    allBytes.write(this.getContentBytesFromContentObject(pdfObject));
                }
                yield allBytes.toByteArray();
            }
            default -> throw new IllegalStateException("Unable to handle Content of type " + String.valueOf(contentObject.getClass()));
        };
    }

    public String getTextFromPage(int page) throws IOException {
        return this.getTextFromPage(page, false);
    }

    public String getTextFromPage(int page, boolean useContainerMarkup) throws IOException {
        PdfDictionary pageDict = this.reader.getPageN(page);
        if (pageDict == null) {
            return "";
        }
        PdfDictionary resources = pageDict.getAsDict(PdfName.RESOURCES);
        this.renderListener.reset();
        this.renderListener.setPage(page);
        PdfContentStreamHandler handler = new PdfContentStreamHandler(this.renderListener);
        this.processContent(this.getContentBytesForPage(page), resources, handler);
        return handler.getResultantText();
    }

    public void processContent(byte[] contentBytes, PdfDictionary resources, PdfContentStreamHandler handler) {
        handler.pushContext("div class='t-extracted-page'");
        try {
            PdfContentParser ps = new PdfContentParser(new PRTokeniser(contentBytes));
            ArrayList<PdfObject> operands = new ArrayList<PdfObject>();
            while (ps.parse(operands).size() > 0) {
                PdfLiteral operator = (PdfLiteral)operands.get(operands.size() - 1);
                handler.invokeOperator(operator, operands, resources);
            }
        }
        catch (Exception e) {
            throw new ExceptionConverter(e);
        }
        handler.popContext();
    }
}

