/*
 * Decompiled with CFR 0.152.
 */
package de.l3s.boilerpipe.sax;

import de.l3s.boilerpipe.BoilerpipeExtractor;
import de.l3s.boilerpipe.BoilerpipeProcessingException;
import de.l3s.boilerpipe.document.TextBlock;
import de.l3s.boilerpipe.document.TextDocument;
import de.l3s.boilerpipe.sax.BoilerpipeSAXInput;
import de.l3s.boilerpipe.sax.HTMLDocument;
import de.l3s.boilerpipe.sax.HTMLFetcher;
import java.io.IOException;
import java.io.StringReader;
import java.net.URL;
import java.util.BitSet;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.xerces.parsers.AbstractSAXParser;
import org.apache.xerces.xni.parser.XMLParserConfiguration;
import org.cyberneko.html.HTMLConfiguration;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;

public final class HTMLHighlighter {
    private Map<String, Set<String>> tagWhitelist = null;
    private static final Pattern PAT_TAG_NO_TEXT = Pattern.compile("<[^/][^>]*></[^>]*>");
    private static final Pattern PAT_SUPER_TAG = Pattern.compile("^<[^>]*>(<.*?>)</[^>]*>$");
    private boolean outputHighlightOnly = false;
    private String extraStyleSheet = "\n<style type=\"text/css\">\n.x-boilerpipe-mark1 { text-decoration:none; background-color: #ffff42 !important; color: black !important; display:inline !important; visibility:visible !important; }\n</style>\n";
    private String preHighlight = "<span class=\"x-boilerpipe-mark1\">";
    private String postHighlight = "</span>";
    private static final TagAction TA_IGNORABLE_ELEMENT = new TagAction(){

        @Override
        void beforeStart(Implementation instance, String localName) {
            instance.inIgnorableElement++;
        }

        @Override
        void afterEnd(Implementation instance, String localName) {
            instance.inIgnorableElement--;
        }
    };
    private static final TagAction TA_HEAD = new TagAction(){

        @Override
        void beforeStart(Implementation instance, String localName) {
            instance.inIgnorableElement++;
        }

        @Override
        void beforeEnd(Implementation instance, String localName) {
            instance.html.append(instance.hl.extraStyleSheet);
        }

        @Override
        void afterEnd(Implementation instance, String localName) {
            instance.inIgnorableElement--;
        }
    };
    private static Map<String, TagAction> TAG_ACTIONS = new HashMap<String, TagAction>();

    public static HTMLHighlighter newHighlightingInstance() {
        return new HTMLHighlighter(false);
    }

    public static HTMLHighlighter newExtractingInstance() {
        return new HTMLHighlighter(true);
    }

    private HTMLHighlighter(boolean extractHTML) {
        if (extractHTML) {
            this.setOutputHighlightOnly(true);
            this.setExtraStyleSheet("\n<style type=\"text/css\">\nA:before { content:' '; } \nA:after { content:' '; } \nSPAN:before { content:' '; } \nSPAN:after { content:' '; } \n</style>\n");
            this.setPreHighlight("");
            this.setPostHighlight("");
        }
    }

    public String process(TextDocument doc, String origHTML) throws BoilerpipeProcessingException {
        return this.process(doc, new InputSource(new StringReader(origHTML)));
    }

    public String process(TextDocument doc, InputSource is) throws BoilerpipeProcessingException {
        Implementation implementation = new Implementation();
        implementation.process(doc, is);
        String html = implementation.html.toString();
        if (this.outputHighlightOnly) {
            boolean repeat = true;
            while (repeat) {
                repeat = false;
                Matcher m = PAT_TAG_NO_TEXT.matcher(html);
                if (m.find()) {
                    repeat = true;
                    html = m.replaceAll("");
                }
                if (!(m = PAT_SUPER_TAG.matcher(html)).find()) continue;
                repeat = true;
                html = m.replaceAll(m.group(1));
            }
        }
        return html;
    }

    public String process(URL url, BoilerpipeExtractor extractor) throws IOException, BoilerpipeProcessingException, SAXException {
        HTMLDocument htmlDoc = HTMLFetcher.fetch(url);
        TextDocument doc = new BoilerpipeSAXInput(htmlDoc.toInputSource()).getTextDocument();
        extractor.process(doc);
        InputSource is = htmlDoc.toInputSource();
        return this.process(doc, is);
    }

    public boolean isOutputHighlightOnly() {
        return this.outputHighlightOnly;
    }

    public void setOutputHighlightOnly(boolean outputHighlightOnly) {
        this.outputHighlightOnly = outputHighlightOnly;
    }

    public String getExtraStyleSheet() {
        return this.extraStyleSheet;
    }

    public void setExtraStyleSheet(String extraStyleSheet) {
        this.extraStyleSheet = extraStyleSheet;
    }

    public String getPreHighlight() {
        return this.preHighlight;
    }

    public void setPreHighlight(String preHighlight) {
        this.preHighlight = preHighlight;
    }

    public String getPostHighlight() {
        return this.postHighlight;
    }

    public void setPostHighlight(String postHighlight) {
        this.postHighlight = postHighlight;
    }

    private static String xmlEncode(String in) {
        if (in == null) {
            return "";
        }
        StringBuilder out = new StringBuilder(in.length());
        block6: for (int i = 0; i < in.length(); ++i) {
            char c = in.charAt(i);
            switch (c) {
                case '<': {
                    out.append("&lt;");
                    continue block6;
                }
                case '>': {
                    out.append("&gt;");
                    continue block6;
                }
                case '&': {
                    out.append("&amp;");
                    continue block6;
                }
                case '\"': {
                    out.append("&quot;");
                    continue block6;
                }
                default: {
                    out.append(c);
                }
            }
        }
        return out.toString();
    }

    public Map<String, Set<String>> getTagWhitelist() {
        return this.tagWhitelist;
    }

    public void setTagWhitelist(Map<String, Set<String>> tagWhitelist) {
        this.tagWhitelist = tagWhitelist;
    }

    static {
        TAG_ACTIONS.put("STYLE", TA_IGNORABLE_ELEMENT);
        TAG_ACTIONS.put("SCRIPT", TA_IGNORABLE_ELEMENT);
        TAG_ACTIONS.put("OPTION", TA_IGNORABLE_ELEMENT);
        TAG_ACTIONS.put("NOSCRIPT", TA_IGNORABLE_ELEMENT);
        TAG_ACTIONS.put("OBJECT", TA_IGNORABLE_ELEMENT);
        TAG_ACTIONS.put("EMBED", TA_IGNORABLE_ELEMENT);
        TAG_ACTIONS.put("APPLET", TA_IGNORABLE_ELEMENT);
        TAG_ACTIONS.put("LINK", TA_IGNORABLE_ELEMENT);
        TAG_ACTIONS.put("HEAD", TA_HEAD);
    }

    private final class Implementation
    extends AbstractSAXParser
    implements ContentHandler {
        StringBuilder html;
        private int inIgnorableElement;
        private int characterElementIdx;
        private final BitSet contentBitSet;
        private final HTMLHighlighter hl;

        Implementation() {
            super((XMLParserConfiguration)new HTMLConfiguration());
            this.html = new StringBuilder();
            this.inIgnorableElement = 0;
            this.characterElementIdx = 0;
            this.contentBitSet = new BitSet();
            this.hl = HTMLHighlighter.this;
            this.setContentHandler(this);
        }

        void process(TextDocument doc, InputSource is) throws BoilerpipeProcessingException {
            for (TextBlock block : doc.getTextBlocks()) {
                BitSet bs;
                if (!block.isContent() || (bs = block.getContainedTextElements()) == null) continue;
                this.contentBitSet.or(bs);
            }
            try {
                this.parse(is);
            }
            catch (SAXException e) {
                throw new BoilerpipeProcessingException(e);
            }
            catch (IOException e) {
                throw new BoilerpipeProcessingException(e);
            }
        }

        @Override
        public void endDocument() throws SAXException {
        }

        @Override
        public void endPrefixMapping(String prefix) throws SAXException {
        }

        @Override
        public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
        }

        @Override
        public void processingInstruction(String target, String data) throws SAXException {
        }

        @Override
        public void setDocumentLocator(Locator locator) {
        }

        @Override
        public void skippedEntity(String name) throws SAXException {
        }

        @Override
        public void startDocument() throws SAXException {
        }

        /*
         * WARNING - Removed try catching itself - possible behaviour change.
         */
        @Override
        public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
            String classVal;
            TagAction ta = (TagAction)TAG_ACTIONS.get(localName);
            if (ta != null) {
                ta.beforeStart(this, localName);
            }
            boolean ignoreAttrs = false;
            if ("SPAN".equalsIgnoreCase(localName) && "x-boilerpipe-mark1".equals(classVal = atts.getValue("class"))) {
                ignoreAttrs = true;
            }
            try {
                if (this.inIgnorableElement == 0) {
                    Set whitelistAttributes;
                    if (HTMLHighlighter.this.outputHighlightOnly) {
                        // empty if block
                    }
                    if (HTMLHighlighter.this.tagWhitelist == null) {
                        whitelistAttributes = null;
                    } else {
                        whitelistAttributes = (Set)HTMLHighlighter.this.tagWhitelist.get(qName);
                        if (whitelistAttributes == null) {
                            return;
                        }
                    }
                    this.html.append('<');
                    this.html.append(qName);
                    if (!ignoreAttrs) {
                        int numAtts = atts.getLength();
                        for (int i = 0; i < numAtts; ++i) {
                            String attr = atts.getQName(i);
                            if (whitelistAttributes != null && !whitelistAttributes.contains(attr)) continue;
                            String value = atts.getValue(i);
                            this.html.append(' ');
                            this.html.append(attr);
                            this.html.append("=\"");
                            this.html.append(HTMLHighlighter.xmlEncode(value));
                            this.html.append("\"");
                        }
                    }
                    this.html.append('>');
                }
            }
            finally {
                if (ta != null) {
                    ta.afterStart(this, localName);
                }
            }
        }

        /*
         * WARNING - Removed try catching itself - possible behaviour change.
         */
        @Override
        public void endElement(String uri, String localName, String qName) throws SAXException {
            TagAction ta = (TagAction)TAG_ACTIONS.get(localName);
            if (ta != null) {
                ta.beforeEnd(this, localName);
            }
            try {
                if (this.inIgnorableElement == 0) {
                    if (HTMLHighlighter.this.outputHighlightOnly) {
                        // empty if block
                    }
                    if (HTMLHighlighter.this.tagWhitelist != null && !HTMLHighlighter.this.tagWhitelist.containsKey(qName)) {
                        return;
                    }
                    this.html.append("</");
                    this.html.append(qName);
                    this.html.append('>');
                }
            }
            finally {
                if (ta != null) {
                    ta.afterEnd(this, localName);
                }
            }
        }

        @Override
        public void characters(char[] ch, int start, int length) throws SAXException {
            ++this.characterElementIdx;
            if (this.inIgnorableElement == 0) {
                boolean highlight = this.contentBitSet.get(this.characterElementIdx);
                if (!highlight && HTMLHighlighter.this.outputHighlightOnly) {
                    return;
                }
                if (highlight) {
                    this.html.append(HTMLHighlighter.this.preHighlight);
                }
                this.html.append(HTMLHighlighter.xmlEncode(String.valueOf(ch, start, length)));
                if (highlight) {
                    this.html.append(HTMLHighlighter.this.postHighlight);
                }
            }
        }

        @Override
        public void startPrefixMapping(String prefix, String uri) throws SAXException {
        }
    }

    private static abstract class TagAction {
        private TagAction() {
        }

        void beforeStart(Implementation instance, String localName) {
        }

        void afterStart(Implementation instance, String localName) {
        }

        void beforeEnd(Implementation instance, String localName) {
        }

        void afterEnd(Implementation instance, String localName) {
        }
    }
}

