/*
 * Decompiled with CFR 0.152.
 */
package org.apache.pdfbox.examples.util;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.text.PDFTextStripper;

public class PDFHighlighter
extends PDFTextStripper {
    private Writer highlighterOutput = null;
    private String[] searchedWords;
    private ByteArrayOutputStream textOS = null;
    private Writer textWriter = null;
    private static final String ENCODING = "UTF-16";

    public PDFHighlighter() throws IOException {
        super.setLineSeparator("");
        super.setWordSeparator("");
        super.setShouldSeparateByBeads(false);
        super.setSuppressDuplicateOverlappingText(false);
    }

    public void generateXMLHighlight(PDDocument pdDocument, String highlightWord, Writer xmlOutput) throws IOException {
        this.generateXMLHighlight(pdDocument, new String[]{highlightWord}, xmlOutput);
    }

    public void generateXMLHighlight(PDDocument pdDocument, String[] sWords, Writer xmlOutput) throws IOException {
        this.highlighterOutput = xmlOutput;
        this.searchedWords = sWords;
        this.highlighterOutput.write("<XML>\n<Body units=characters  version=2>\n<Highlight>\n");
        this.textOS = new ByteArrayOutputStream();
        this.textWriter = new OutputStreamWriter((OutputStream)this.textOS, ENCODING);
        this.writeText(pdDocument, this.textWriter);
        this.highlighterOutput.write("</Highlight>\n</Body>\n</XML>");
        this.highlighterOutput.flush();
    }

    protected void endPage(PDPage pdPage) throws IOException {
        this.textWriter.flush();
        String page = new String(this.textOS.toByteArray(), ENCODING);
        this.textOS.reset();
        if (page.indexOf(97) != -1) {
            page = page.replaceAll("a[0-9]{1,3}", ".");
        }
        for (String searchedWord : this.searchedWords) {
            Pattern pattern = Pattern.compile(searchedWord, 2);
            Matcher matcher = pattern.matcher(page);
            while (matcher.find()) {
                int begin = matcher.start();
                int end = matcher.end();
                this.highlighterOutput.write("    <loc pg=" + (this.getCurrentPageNo() - 1) + " pos=" + begin + " len=" + (end - begin) + ">\n");
            }
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public static void main(String[] args) throws IOException {
        PDFHighlighter xmlExtractor = new PDFHighlighter();
        PDDocument doc = null;
        try {
            if (args.length < 2) {
                PDFHighlighter.usage();
            }
            String[] highlightStrings = new String[args.length - 1];
            System.arraycopy(args, 1, highlightStrings, 0, highlightStrings.length);
            doc = PDDocument.load((File)new File(args[0]));
            xmlExtractor.generateXMLHighlight(doc, highlightStrings, (Writer)new OutputStreamWriter(System.out));
        }
        finally {
            if (doc != null) {
                doc.close();
            }
        }
    }

    private static void usage() {
        System.err.println("usage: java " + PDFHighlighter.class.getName() + " <pdf file> word1 word2 word3 ...");
        System.exit(1);
    }
}

