package org.archive.wayback.resourcestore.indexer;

import com.lowagie.text.ElementTags;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.util.logging.Logger;
import org.apache.commons.httpclient.Header;
import org.archive.wayback.core.CaptureSearchResult;
import org.archive.wayback.util.htmllex.ContextAwareLexer;
import org.archive.wayback.util.htmllex.ParseContext;
import org.archive.wayback.util.htmllex.ParseEventDelegator;
import org.htmlparser.Node;
import org.htmlparser.lexer.Lexer;
import org.htmlparser.lexer.Page;
import org.htmlparser.util.ParserException;

/* loaded from: input_file:WEB-INF/lib/wayback-core-1.7.1-SNAPSHOT.jar:org/archive/wayback/resourcestore/indexer/HTTPRecordAnnotater.class */
public class HTTPRecordAnnotater {
    private RobotMetaRule rule;
    private ParseEventDelegator rules;
    private RobotMetaFlags robotFlags;
    private static final Logger LOGGER = Logger.getLogger(HTTPRecordAnnotater.class.getName());
    private static final String UPPER_LOCATION = "Location".toUpperCase();
    private static final String[] mimes = {"html"};

    public HTTPRecordAnnotater() {
        this.rule = null;
        this.rules = null;
        this.rules = new ParseEventDelegator();
        this.rules.init();
        this.rule = new RobotMetaRule();
        this.robotFlags = new RobotMetaFlags();
        this.rule.setRobotFlags(this.robotFlags);
        this.rule.visit(this.rules);
    }

    public boolean isHTML(String str) {
        String lowerCase = str.toLowerCase();
        for (String str2 : mimes) {
            if (lowerCase.contains(str2)) {
                return true;
            }
        }
        return false;
    }

    private String escapeSpaces(String str) {
        return str.contains(" ") ? str.replace(" ", "%20") : str;
    }

    public String transformHTTPMime(String str) {
        if (str == null) {
            return null;
        }
        int indexOf = str.indexOf(";");
        return indexOf > 0 ? escapeSpaces(str.substring(0, indexOf).trim()) : escapeSpaces(str.trim());
    }

    public void annotateHTTPContent(CaptureSearchResult captureSearchResult, InputStream inputStream, Header[] headerArr, String str) {
        this.robotFlags.reset();
        String str2 = null;
        if (headerArr != null) {
            for (Header header : headerArr) {
                if (!header.getName().toUpperCase().equals(UPPER_LOCATION)) {
                    if (header.getName().toLowerCase().equals("content-type")) {
                        str2 = transformHTTPMime(header.getValue());
                    } else if (header.getName().toLowerCase().equals("X-Robots-Tag")) {
                        this.robotFlags.parse(header.getValue());
                    }
                }
            }
        }
        if (str2 == null) {
            str2 = transformHTTPMime(str);
        }
        if (str2 == null) {
            str2 = ElementTags.UNKNOWN;
        }
        captureSearchResult.setMimeType(str2);
        if (isHTML(str2)) {
            annotateHTMLContent(inputStream, "utf-8", captureSearchResult.getFile() + ":" + captureSearchResult.getOffset(), captureSearchResult);
        }
        this.robotFlags.apply(captureSearchResult);
    }

    public void annotateHTMLContent(InputStream inputStream, String str, String str2, CaptureSearchResult captureSearchResult) {
        ParseContext parseContext = new ParseContext();
        try {
            ContextAwareLexer contextAwareLexer = new ContextAwareLexer(new Lexer(new Page(inputStream, str)), parseContext);
            while (true) {
                Node nextNode = contextAwareLexer.nextNode();
                if (nextNode == null) {
                    this.rules.handleParseComplete(parseContext);
                    return;
                }
                this.rules.handleNode(parseContext, nextNode);
            }
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
            LOGGER.warning(str2 + " " + e.getLocalizedMessage());
        } catch (IOException e2) {
            LOGGER.warning(str2 + " " + e2.getLocalizedMessage());
        } catch (ParserException e3) {
            e3.printStackTrace();
            LOGGER.warning(str2 + " " + e3.getLocalizedMessage());
        }
    }
}
