/*
 * Decompiled with CFR 0.152.
 */
package org.springframework.ai.reader.pdf;

import java.awt.Rectangle;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.io.RandomAccessReadBuffer;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.jspecify.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.ai.document.Document;
import org.springframework.ai.document.DocumentReader;
import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig;
import org.springframework.ai.reader.pdf.layout.PDFLayoutTextStripperByArea;
import org.springframework.core.io.DefaultResourceLoader;
import org.springframework.core.io.Resource;
import org.springframework.util.CollectionUtils;
import org.springframework.util.StringUtils;

public class PagePdfDocumentReader
implements DocumentReader {
    public static final String METADATA_START_PAGE_NUMBER = "page_number";
    public static final String METADATA_END_PAGE_NUMBER = "end_page_number";
    public static final String METADATA_FILE_NAME = "file_name";
    private static final String PDF_PAGE_REGION = "pdfPageRegion";
    protected final PDDocument document;
    private final Logger logger = LoggerFactory.getLogger(this.getClass());
    protected @Nullable String resourceFileName;
    private final PdfDocumentReaderConfig config;

    public PagePdfDocumentReader(String resourceUrl) {
        this(new DefaultResourceLoader().getResource(resourceUrl));
    }

    public PagePdfDocumentReader(Resource pdfResource) {
        this(pdfResource, PdfDocumentReaderConfig.defaultConfig());
    }

    public PagePdfDocumentReader(String resourceUrl, PdfDocumentReaderConfig config) {
        this(new DefaultResourceLoader().getResource(resourceUrl), config);
    }

    public PagePdfDocumentReader(Resource pdfResource, PdfDocumentReaderConfig config) {
        try {
            PDFParser pdfParser = new PDFParser((RandomAccessRead)new RandomAccessReadBuffer(pdfResource.getInputStream()));
            this.document = pdfParser.parse();
            this.resourceFileName = pdfResource.getFilename();
            this.config = config;
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public List<Document> get() {
        ArrayList<Document> readDocuments = new ArrayList<Document>();
        try {
            PDFLayoutTextStripperByArea pdfTextStripper = new PDFLayoutTextStripperByArea();
            int pageNumber = 1;
            int startPageNumber = 1;
            ArrayList<String> pageTextGroupList = new ArrayList<String>();
            PDPageTree pages = this.document.getDocumentCatalog().getPages();
            int totalPages = pages.getCount();
            int logFrequency = totalPages > 10 ? totalPages / 10 : 1;
            int pagesPerDocument = this.getPagesPerDocument(totalPages);
            for (PDPage page : pages) {
                if ((pageNumber - 1) % logFrequency == 0) {
                    this.logger.info("Processing PDF page: {}", (Object)pageNumber);
                }
                this.handleSinglePage(page, pageNumber, pdfTextStripper, pageTextGroupList);
                if (pageNumber % pagesPerDocument == 0 || pageNumber == totalPages) {
                    if (!CollectionUtils.isEmpty(pageTextGroupList)) {
                        readDocuments.add(this.toDocument(pageTextGroupList.stream().collect(Collectors.joining()), startPageNumber, pageNumber));
                        pageTextGroupList.clear();
                    }
                    startPageNumber = pageNumber + 1;
                }
                ++pageNumber;
            }
            this.logger.info("Processed total {} pages", (Object)totalPages);
            return readDocuments;
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private void handleSinglePage(PDPage page, int pageNumber, PDFLayoutTextStripperByArea pdfTextStripper, List<String> pageTextGroupList) throws IOException {
        int x0 = (int)page.getMediaBox().getLowerLeftX();
        int xW = (int)page.getMediaBox().getWidth();
        int y0 = (int)page.getMediaBox().getLowerLeftY() + this.config.pageTopMargin;
        int yW = (int)page.getMediaBox().getHeight() - (this.config.pageTopMargin + this.config.pageBottomMargin);
        pdfTextStripper.addRegion(PDF_PAGE_REGION, new Rectangle(x0, y0, xW, yW));
        pdfTextStripper.extractRegions(page);
        String pageText = pdfTextStripper.getTextForRegion(PDF_PAGE_REGION);
        if (StringUtils.hasText((String)pageText)) {
            pageText = this.config.pageExtractedTextFormatter.format(pageText, pageNumber);
            pageTextGroupList.add(pageText);
        }
        pdfTextStripper.removeRegion(PDF_PAGE_REGION);
    }

    private int getPagesPerDocument(int totalPages) {
        if (this.config.pagesPerDocument == 0) {
            return totalPages;
        }
        return this.config.pagesPerDocument;
    }

    protected Document toDocument(String docText, int startPageNumber, int endPageNumber) {
        Document doc = new Document(docText);
        doc.getMetadata().put(METADATA_START_PAGE_NUMBER, startPageNumber);
        if (startPageNumber != endPageNumber) {
            doc.getMetadata().put(METADATA_END_PAGE_NUMBER, endPageNumber);
        }
        if (this.resourceFileName != null) {
            doc.getMetadata().put(METADATA_FILE_NAME, this.resourceFileName);
        }
        return doc;
    }
}

