/*
 * Decompiled with CFR 0.152.
 */
package edu.uci.ics.crawler4j.robotstxt;

import edu.uci.ics.crawler4j.crawler.Page;
import edu.uci.ics.crawler4j.crawler.exceptions.PageBiggerThanMaxSizeException;
import edu.uci.ics.crawler4j.fetcher.PageFetchResult;
import edu.uci.ics.crawler4j.fetcher.PageFetcher;
import edu.uci.ics.crawler4j.robotstxt.HostDirectives;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtParser;
import edu.uci.ics.crawler4j.url.WebURL;
import edu.uci.ics.crawler4j.util.Util;
import java.net.MalformedURLException;
import java.net.SocketException;
import java.net.SocketTimeoutException;
import java.net.URL;
import java.net.UnknownHostException;
import java.util.HashMap;
import java.util.Map;
import org.apache.http.NoHttpResponseException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class RobotstxtServer {
    private static final Logger logger = LoggerFactory.getLogger(RobotstxtServer.class);
    protected RobotstxtConfig config;
    protected final Map<String, HostDirectives> host2directivesCache = new HashMap<String, HostDirectives>();
    protected PageFetcher pageFetcher;

    public RobotstxtServer(RobotstxtConfig config, PageFetcher pageFetcher) {
        this.config = config;
        this.pageFetcher = pageFetcher;
    }

    private static String getHost(URL url) {
        return url.getHost().toLowerCase();
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public boolean allows(WebURL webURL) {
        if (this.config.isEnabled()) {
            try {
                URL url = new URL(webURL.getURL());
                String host = RobotstxtServer.getHost(url);
                String path = url.getPath();
                HostDirectives directives = this.host2directivesCache.get(host);
                if (directives != null && directives.needsRefetch()) {
                    Map<String, HostDirectives> map = this.host2directivesCache;
                    synchronized (map) {
                        this.host2directivesCache.remove(host);
                        directives = null;
                    }
                }
                if (directives == null) {
                    directives = this.fetchDirectives(url);
                }
                return directives.allows(path);
            }
            catch (MalformedURLException e) {
                logger.error("Bad URL in Robots.txt: " + webURL.getURL(), (Throwable)e);
            }
        }
        return true;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private HostDirectives fetchDirectives(URL url) {
        WebURL robotsTxtUrl = new WebURL();
        String host = RobotstxtServer.getHost(url);
        String port = url.getPort() == url.getDefaultPort() || url.getPort() == -1 ? "" : ":" + url.getPort();
        robotsTxtUrl.setURL("http://" + host + port + "/robots.txt");
        HostDirectives directives = null;
        PageFetchResult fetchResult = null;
        try {
            fetchResult = this.pageFetcher.fetchPage(robotsTxtUrl);
            if (fetchResult.getStatusCode() == 200) {
                String content;
                Page page = new Page(robotsTxtUrl);
                fetchResult.fetchContent(page);
                if (Util.hasPlainTextContent(page.getContentType())) {
                    content = page.getContentCharset() == null ? new String(page.getContentData()) : new String(page.getContentData(), page.getContentCharset());
                    directives = RobotstxtParser.parse(content, this.config.getUserAgentName());
                } else if (page.getContentType().contains("html")) {
                    content = new String(page.getContentData());
                    directives = RobotstxtParser.parse(content, this.config.getUserAgentName());
                } else {
                    logger.warn("Can't read this robots.txt: {}  as it is not written in plain text, contentType: {}", (Object)robotsTxtUrl.getURL(), (Object)page.getContentType());
                }
            } else {
                logger.debug("Can't read this robots.txt: {}  as it's status code is {}", (Object)robotsTxtUrl.getURL(), (Object)fetchResult.getStatusCode());
            }
        }
        catch (SocketException | SocketTimeoutException | UnknownHostException | NoHttpResponseException se) {
        }
        catch (PageBiggerThanMaxSizeException pbtms) {
            logger.error("Error occurred while fetching (robots) url: {}, {}", (Object)robotsTxtUrl.getURL(), (Object)pbtms.getMessage());
        }
        catch (Exception e) {
            logger.error("Error occurred while fetching (robots) url: " + robotsTxtUrl.getURL(), (Throwable)e);
        }
        finally {
            if (fetchResult != null) {
                fetchResult.discardContentIfNotConsumed();
            }
        }
        if (directives == null) {
            directives = new HostDirectives();
        }
        Map<String, HostDirectives> map = this.host2directivesCache;
        synchronized (map) {
            if (this.host2directivesCache.size() == this.config.getCacheSize()) {
                String minHost = null;
                long minAccessTime = Long.MAX_VALUE;
                for (Map.Entry<String, HostDirectives> entry : this.host2directivesCache.entrySet()) {
                    if (entry.getValue().getLastAccessTime() >= minAccessTime) continue;
                    minAccessTime = entry.getValue().getLastAccessTime();
                    minHost = entry.getKey();
                }
                this.host2directivesCache.remove(minHost);
            }
            this.host2directivesCache.put(host, directives);
        }
        return directives;
    }
}

