/*
 * Decompiled with CFR 0.152.
 */
package org.springframework.ai.reader.pdf;

import java.awt.Rectangle;
import java.util.ArrayList;
import java.util.List;
import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.io.RandomAccessReadBuffer;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.jspecify.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.ai.document.Document;
import org.springframework.ai.document.DocumentReader;
import org.springframework.ai.reader.pdf.config.ParagraphManager;
import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig;
import org.springframework.ai.reader.pdf.layout.PDFLayoutTextStripperByArea;
import org.springframework.core.io.DefaultResourceLoader;
import org.springframework.core.io.Resource;
import org.springframework.util.CollectionUtils;
import org.springframework.util.StringUtils;

public class ParagraphPdfDocumentReader
implements DocumentReader {
    private static final String METADATA_START_PAGE = "page_number";
    private static final String METADATA_END_PAGE = "end_page_number";
    private static final String METADATA_TITLE = "title";
    private static final String METADATA_LEVEL = "level";
    private static final String METADATA_FILE_NAME = "file_name";
    protected final PDDocument document;
    private final Logger logger = LoggerFactory.getLogger(this.getClass());
    private final ParagraphManager paragraphTextExtractor;
    protected @Nullable String resourceFileName;
    private PdfDocumentReaderConfig config;

    public ParagraphPdfDocumentReader(String resourceUrl) {
        this(new DefaultResourceLoader().getResource(resourceUrl));
    }

    public ParagraphPdfDocumentReader(Resource pdfResource) {
        this(pdfResource, PdfDocumentReaderConfig.defaultConfig());
    }

    public ParagraphPdfDocumentReader(String resourceUrl, PdfDocumentReaderConfig config) {
        this(new DefaultResourceLoader().getResource(resourceUrl), config);
    }

    public ParagraphPdfDocumentReader(Resource pdfResource, PdfDocumentReaderConfig config) {
        try {
            PDFParser pdfParser = new PDFParser((RandomAccessRead)new RandomAccessReadBuffer(pdfResource.getInputStream()));
            this.document = pdfParser.parse();
            this.config = config;
            this.paragraphTextExtractor = new ParagraphManager(this.document);
            this.resourceFileName = pdfResource.getFilename();
        }
        catch (IllegalArgumentException iae) {
            throw iae;
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public List<Document> get() {
        List<ParagraphManager.Paragraph> paragraphs = this.paragraphTextExtractor.flatten();
        ArrayList<Document> documents = new ArrayList<Document>();
        if (CollectionUtils.isEmpty(paragraphs)) {
            return documents;
        }
        this.logger.info("Start processing paragraphs from PDF");
        for (int i = 0; i < paragraphs.size(); ++i) {
            ParagraphManager.Paragraph to;
            ParagraphManager.Paragraph from = paragraphs.get(i);
            Document document = this.toDocument(from, to = i + 1 < paragraphs.size() ? paragraphs.get(i + 1) : from);
            if (document == null || !StringUtils.hasText((String)document.getText())) continue;
            documents.add(document);
        }
        this.logger.info("End processing paragraphs from PDF");
        return documents;
    }

    protected @Nullable Document toDocument(ParagraphManager.Paragraph from, ParagraphManager.Paragraph to) {
        String docText = this.getTextBetweenParagraphs(from, to);
        if (!StringUtils.hasText((String)docText)) {
            return null;
        }
        Document document = new Document(docText);
        this.addMetadata(from, to, document);
        return document;
    }

    protected void addMetadata(ParagraphManager.Paragraph from, ParagraphManager.Paragraph to, Document document) {
        document.getMetadata().put(METADATA_TITLE, from.title());
        document.getMetadata().put(METADATA_START_PAGE, from.startPageNumber());
        document.getMetadata().put(METADATA_END_PAGE, from.endPageNumber());
        document.getMetadata().put(METADATA_LEVEL, from.level());
        if (this.resourceFileName != null) {
            document.getMetadata().put(METADATA_FILE_NAME, this.resourceFileName);
        }
    }

    public String getTextBetweenParagraphs(ParagraphManager.Paragraph fromParagraph, ParagraphManager.Paragraph toParagraph) {
        if (fromParagraph.startPageNumber() < 1) {
            this.logger.warn("Skipping paragraph titled '{}' because it has an invalid start page number: {}", (Object)fromParagraph.title(), (Object)fromParagraph.startPageNumber());
            return "";
        }
        int startPage = fromParagraph.startPageNumber() - 1;
        int endPage = toParagraph.startPageNumber() - 1;
        if (fromParagraph == toParagraph || endPage < startPage) {
            endPage = startPage;
        }
        try {
            StringBuilder sb = new StringBuilder();
            PDFLayoutTextStripperByArea pdfTextStripper = new PDFLayoutTextStripperByArea();
            pdfTextStripper.setSortByPosition(true);
            for (int pageNumber = startPage; pageNumber <= endPage; ++pageNumber) {
                int h;
                int y;
                PDPage page = this.document.getPage(pageNumber);
                float pageHeight = page.getMediaBox().getHeight();
                int fromPos = fromParagraph.position();
                int toPos = fromParagraph != toParagraph ? toParagraph.position() : 0;
                int x = (int)page.getMediaBox().getLowerLeftX();
                int w = (int)page.getMediaBox().getWidth();
                if (pageNumber == startPage && pageNumber == endPage) {
                    y = toPos;
                    h = fromPos - toPos;
                } else if (pageNumber == startPage) {
                    y = 0;
                    h = fromPos;
                } else if (pageNumber == endPage) {
                    y = toPos;
                    h = (int)pageHeight - toPos;
                } else {
                    y = 0;
                    h = (int)pageHeight;
                }
                if (h < 0) {
                    h = 0;
                }
                pdfTextStripper.addRegion("pdfPageRegion", new Rectangle(x, y, w, h));
                pdfTextStripper.extractRegions(page);
                String text = pdfTextStripper.getTextForRegion("pdfPageRegion");
                if (StringUtils.hasText((String)text)) {
                    sb.append(text);
                }
                pdfTextStripper.removeRegion("pdfPageRegion");
            }
            String text = sb.toString();
            if (StringUtils.hasText((String)text)) {
                text = this.config.pageExtractedTextFormatter.format(text, startPage);
            }
            return text;
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
}

