/*
 * Decompiled with CFR 0.152.
 */
package nl.basjes.parse.httpdlog.dissectors;

import java.net.URI;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.EnumSet;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import nl.basjes.parse.core.Casts;
import nl.basjes.parse.core.Dissector;
import nl.basjes.parse.core.Parsable;
import nl.basjes.parse.core.ParsedField;
import nl.basjes.parse.core.exceptions.DissectionFailure;
import org.apache.commons.httpclient.URIException;
import org.apache.commons.httpclient.util.URIUtil;
import org.apache.commons.lang3.StringEscapeUtils;

public class HttpUriDissector
extends Dissector {
    private static final String INPUT_TYPE = "HTTP.URI";
    private boolean wantProtocol = false;
    private boolean wantUserinfo = false;
    private boolean wantHost = false;
    private boolean wantPort = false;
    private boolean wantPath = false;
    private boolean wantQuery = false;
    private boolean wantRef = false;
    private static BitSet badUriChars = new BitSet(256);
    private static final Pattern BAD_EXCAPE_PATTERN;
    private static final Pattern EQUALS_HASH_PATTERN;
    private static final Pattern HASH_AMP_PATTERN;
    private static final Pattern DOUBLE_HASH_PATTERN;
    private static final Pattern ALMOST_HTML_ENCODED;

    public String getInputType() {
        return INPUT_TYPE;
    }

    public List<String> getPossibleOutput() {
        ArrayList<String> result = new ArrayList<String>();
        result.add("HTTP.PROTOCOL:protocol");
        result.add("HTTP.USERINFO:userinfo");
        result.add("HTTP.HOST:host");
        result.add("HTTP.PORT:port");
        result.add("HTTP.PATH:path");
        result.add("HTTP.QUERYSTRING:query");
        result.add("HTTP.REF:ref");
        return result;
    }

    public EnumSet<Casts> prepareForDissect(String inputname, String outputname) {
        String name = this.extractFieldName(inputname, outputname);
        if ("protocol".equals(name)) {
            this.wantProtocol = true;
            return Casts.STRING_ONLY;
        }
        if ("userinfo".equals(name)) {
            this.wantUserinfo = true;
            return Casts.STRING_ONLY;
        }
        if ("host".equals(name)) {
            this.wantHost = true;
            return Casts.STRING_ONLY;
        }
        if ("port".equals(name)) {
            this.wantPort = true;
            return Casts.STRING_OR_LONG;
        }
        if ("path".equals(name)) {
            this.wantPath = true;
            return Casts.STRING_ONLY;
        }
        if ("query".equals(name)) {
            this.wantQuery = true;
            return Casts.STRING_ONLY;
        }
        if ("ref".equals(name)) {
            this.wantRef = true;
            return Casts.STRING_ONLY;
        }
        return null;
    }

    public void dissect(Parsable<?> parsable, String inputname) throws DissectionFailure {
        URI uri;
        Matcher doubleHashMatcher;
        ParsedField field = parsable.getParsableField(INPUT_TYPE, inputname);
        String uriString = field.getValue().getString();
        if (uriString == null || uriString.isEmpty()) {
            return;
        }
        try {
            uriString = URIUtil.encode((String)uriString, (BitSet)badUriChars, (String)"UTF-8");
        }
        catch (URIException e) {
            throw new DissectionFailure("Failed to parse URI >>" + field.getValue().getString() + "<< because of : " + e.getMessage());
        }
        int firstQuestionMark = uriString.indexOf(63);
        int firstAmpersand = uriString.indexOf(38);
        if (firstAmpersand != -1 || firstQuestionMark != -1) {
            uriString = uriString.replaceAll("\\?", "&");
            uriString = uriString.replaceFirst("&", "?&");
        }
        uriString = BAD_EXCAPE_PATTERN.matcher(uriString).replaceAll("%25$1");
        uriString = BAD_EXCAPE_PATTERN.matcher(uriString).replaceAll("%25$1");
        uriString = ALMOST_HTML_ENCODED.matcher(uriString).replaceAll("$1&$2");
        uriString = StringEscapeUtils.unescapeHtml4((String)uriString);
        uriString = EQUALS_HASH_PATTERN.matcher(uriString).replaceAll("=");
        uriString = HASH_AMP_PATTERN.matcher(uriString).replaceAll("&");
        while ((doubleHashMatcher = DOUBLE_HASH_PATTERN.matcher(uriString)).find()) {
            uriString = doubleHashMatcher.replaceAll("~$1#");
        }
        boolean isUrl = true;
        try {
            if (uriString.charAt(0) == '/') {
                uri = URI.create("dummy-protocol://dummy.host.name" + uriString);
                isUrl = false;
            } else {
                uri = URI.create(uriString);
            }
        }
        catch (IllegalArgumentException e) {
            throw new DissectionFailure("Failed to parse URI >>" + field.getValue().getString() + "<< because of : " + e.getMessage());
        }
        if (this.wantQuery || this.wantPath || this.wantRef) {
            if (this.wantQuery) {
                String query = uri.getRawQuery();
                if (query == null) {
                    query = "";
                }
                parsable.addDissection(inputname, "HTTP.QUERYSTRING", "query", query);
            }
            if (this.wantPath) {
                parsable.addDissection(inputname, "HTTP.PATH", "path", uri.getPath());
            }
            if (this.wantRef) {
                parsable.addDissection(inputname, "HTTP.REF", "ref", uri.getFragment());
            }
        }
        if (isUrl) {
            if (this.wantProtocol) {
                parsable.addDissection(inputname, "HTTP.PROTOCOL", "protocol", uri.getScheme());
            }
            if (this.wantUserinfo) {
                parsable.addDissection(inputname, "HTTP.USERINFO", "userinfo", uri.getUserInfo());
            }
            if (this.wantHost) {
                parsable.addDissection(inputname, "HTTP.HOST", "host", uri.getHost());
            }
            if (this.wantPort && uri.getPort() != -1) {
                parsable.addDissection(inputname, "HTTP.PORT", "port", uri.getPort());
            }
        }
    }

    static {
        badUriChars.set(0, 255, true);
        badUriChars.andNot(org.apache.commons.httpclient.URI.unwise);
        badUriChars.andNot(org.apache.commons.httpclient.URI.space);
        badUriChars.andNot(org.apache.commons.httpclient.URI.control);
        badUriChars.set(60, false);
        badUriChars.set(62, false);
        badUriChars.set(34, false);
        BAD_EXCAPE_PATTERN = Pattern.compile("%([^0-9a-fA-F]|[0-9a-fA-F][^0-9a-fA-F]|.$|$)");
        EQUALS_HASH_PATTERN = Pattern.compile("=#");
        HASH_AMP_PATTERN = Pattern.compile("#&");
        DOUBLE_HASH_PATTERN = Pattern.compile("#(.*)#");
        ALMOST_HTML_ENCODED = Pattern.compile("([^&])(#x[0-9a-fA-F][0-9a-fA-F];)");
    }
}

