/*
 * Decompiled with CFR 0.152.
 */
package edu.uci.ics.crawler4j.crawler;

import edu.uci.ics.crawler4j.crawler.CrawlController;
import edu.uci.ics.crawler4j.crawler.Page;
import edu.uci.ics.crawler4j.crawler.exceptions.ContentFetchException;
import edu.uci.ics.crawler4j.crawler.exceptions.PageBiggerThanMaxSizeException;
import edu.uci.ics.crawler4j.crawler.exceptions.ParseException;
import edu.uci.ics.crawler4j.crawler.exceptions.RedirectException;
import edu.uci.ics.crawler4j.fetcher.PageFetchResult;
import edu.uci.ics.crawler4j.fetcher.PageFetcher;
import edu.uci.ics.crawler4j.frontier.DocIDServer;
import edu.uci.ics.crawler4j.frontier.Frontier;
import edu.uci.ics.crawler4j.parser.NotAllowedContentException;
import edu.uci.ics.crawler4j.parser.ParseData;
import edu.uci.ics.crawler4j.parser.Parser;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtServer;
import edu.uci.ics.crawler4j.url.WebURL;
import java.util.ArrayList;
import java.util.Locale;
import org.apache.http.impl.EnglishReasonPhraseCatalog;
import uk.org.lidalia.slf4jext.Level;
import uk.org.lidalia.slf4jext.Logger;
import uk.org.lidalia.slf4jext.LoggerFactory;

public class WebCrawler
implements Runnable {
    protected static final Logger logger = LoggerFactory.getLogger(WebCrawler.class);
    protected int myId;
    protected CrawlController myController;
    private Thread myThread;
    private Parser parser;
    private PageFetcher pageFetcher;
    private RobotstxtServer robotstxtServer;
    private DocIDServer docIdServer;
    private Frontier frontier;
    private boolean isWaitingForNewURLs;

    public void init(int id, CrawlController crawlController) {
        this.myId = id;
        this.pageFetcher = crawlController.getPageFetcher();
        this.robotstxtServer = crawlController.getRobotstxtServer();
        this.docIdServer = crawlController.getDocIdServer();
        this.frontier = crawlController.getFrontier();
        this.parser = new Parser(crawlController.getConfig());
        this.myController = crawlController;
        this.isWaitingForNewURLs = false;
    }

    public int getMyId() {
        return this.myId;
    }

    public CrawlController getMyController() {
        return this.myController;
    }

    public void onStart() {
    }

    public void onBeforeExit() {
    }

    protected void handlePageStatusCode(WebURL webUrl, int statusCode, String statusDescription) {
    }

    protected WebURL handleUrlBeforeProcess(WebURL curURL) {
        return curURL;
    }

    protected void onPageBiggerThanMaxSize(String urlStr, long pageSize) {
        logger.warn("Skipping a URL: {} which was bigger ( {} ) than max allowed size", (Object)urlStr, (Object)pageSize);
    }

    protected void onUnexpectedStatusCode(String urlStr, int statusCode, String contentType, String description) {
        logger.warn("Skipping URL: {}, StatusCode: {}, {}, {}", new Object[]{urlStr, statusCode, contentType, description});
    }

    protected void onContentFetchError(WebURL webUrl) {
        logger.warn("Can't fetch content of: {}", (Object)webUrl.getURL());
    }

    protected void onParseError(WebURL webUrl) {
        logger.warn("Parsing error of: {}", (Object)webUrl.getURL());
    }

    public Object getMyLocalData() {
        return null;
    }

    /*
     * Unable to fully structure code
     */
    @Override
    public void run() {
        this.onStart();
        block2: while (true) {
            assignedURLs = new ArrayList<WebURL>(50);
            this.isWaitingForNewURLs = true;
            this.frontier.getNextURLs(50, assignedURLs);
            this.isWaitingForNewURLs = false;
            if (assignedURLs.size() == 0) {
                if (this.frontier.isFinished()) {
                    return;
                }
                try {
                    Thread.sleep(3000L);
                }
                catch (InterruptedException e) {
                    WebCrawler.logger.error("Error occurred", (Throwable)e);
                }
                continue;
            }
            var2_2 = assignedURLs.iterator();
            do {
                if (var2_2.hasNext()) ** break;
                continue block2;
                curURL = (WebURL)var2_2.next();
                if (curURL == null) continue;
                curURL = this.handleUrlBeforeProcess(curURL);
                this.processPage(curURL);
                this.frontier.setProcessed(curURL);
            } while (!this.myController.isShuttingDown());
            break;
        }
        WebCrawler.logger.info("Exiting because of controller shutdown.");
    }

    public boolean shouldVisit(Page page, WebURL url) {
        return true;
    }

    public void visit(Page page) {
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void processPage(WebURL curURL) {
        PageFetchResult fetchResult = null;
        try {
            if (curURL == null) {
                throw new Exception("Failed processing a NULL url !?");
            }
            fetchResult = this.pageFetcher.fetchPage(curURL);
            int statusCode = fetchResult.getStatusCode();
            this.handlePageStatusCode(curURL, statusCode, EnglishReasonPhraseCatalog.INSTANCE.getReason(statusCode, Locale.ENGLISH));
            Page page = new Page(curURL);
            page.setFetchResponseHeaders(fetchResult.getResponseHeaders());
            page.setStatusCode(statusCode);
            if (statusCode != 200) {
                if (statusCode == 301 || statusCode == 302 || statusCode == 300 || statusCode == 303 || statusCode == 307 || statusCode == 308) {
                    page.setRedirect(true);
                    if (this.myController.getConfig().isFollowRedirects()) {
                        String movedToUrl = fetchResult.getMovedToUrl();
                        if (movedToUrl == null) {
                            throw new RedirectException(Level.WARN, "Unexpected error, URL: " + curURL + " is redirected to NOTHING");
                        }
                        page.setRedirectedToUrl(movedToUrl);
                        int newDocId = this.docIdServer.getDocId(movedToUrl);
                        if (newDocId > 0) {
                            throw new RedirectException(Level.DEBUG, "Redirect page: " + curURL + " is already seen");
                        }
                        WebURL webURL = new WebURL();
                        webURL.setURL(movedToUrl);
                        webURL.setParentDocid(curURL.getParentDocid());
                        webURL.setParentUrl(curURL.getParentUrl());
                        webURL.setDepth(curURL.getDepth());
                        webURL.setDocid(-1);
                        webURL.setAnchor(curURL.getAnchor());
                        if (this.shouldVisit(page, webURL)) {
                            if (this.robotstxtServer.allows(webURL)) {
                                webURL.setDocid(this.docIdServer.getNewDocID(movedToUrl));
                                this.frontier.schedule(webURL);
                            } else {
                                logger.debug("Not visiting: {} as per the server's \"robots.txt\" policy", (Object)webURL.getURL());
                            }
                        } else {
                            logger.debug("Not visiting: {} as per your \"shouldVisit\" policy", (Object)webURL.getURL());
                        }
                    }
                } else {
                    String description = EnglishReasonPhraseCatalog.INSTANCE.getReason(fetchResult.getStatusCode(), Locale.ENGLISH);
                    String contentType = fetchResult.getEntity() == null ? "" : fetchResult.getEntity().getContentType().getValue();
                    this.onUnexpectedStatusCode(curURL.getURL(), fetchResult.getStatusCode(), contentType, description);
                }
            } else {
                if (!curURL.getURL().equals(fetchResult.getFetchedUrl())) {
                    if (this.docIdServer.isSeenBefore(fetchResult.getFetchedUrl())) {
                        throw new RedirectException(Level.DEBUG, "Redirect page: " + curURL + " has already been seen");
                    }
                    curURL.setURL(fetchResult.getFetchedUrl());
                    curURL.setDocid(this.docIdServer.getNewDocID(fetchResult.getFetchedUrl()));
                }
                if (!fetchResult.fetchContent(page)) {
                    throw new ContentFetchException();
                }
                this.parser.parse(page, curURL.getURL());
                ParseData parseData = page.getParseData();
                ArrayList<WebURL> toSchedule = new ArrayList<WebURL>();
                int maxCrawlDepth = this.myController.getConfig().getMaxDepthOfCrawling();
                for (WebURL webURL : parseData.getOutgoingUrls()) {
                    webURL.setParentDocid(curURL.getDocid());
                    webURL.setParentUrl(curURL.getURL());
                    int newdocid = this.docIdServer.getDocId(webURL.getURL());
                    if (newdocid > 0) {
                        webURL.setDepth((short)-1);
                        webURL.setDocid(newdocid);
                        continue;
                    }
                    webURL.setDocid(-1);
                    webURL.setDepth((short)(curURL.getDepth() + 1));
                    if (maxCrawlDepth != -1 && curURL.getDepth() >= maxCrawlDepth) continue;
                    if (this.shouldVisit(page, webURL)) {
                        if (this.robotstxtServer.allows(webURL)) {
                            webURL.setDocid(this.docIdServer.getNewDocID(webURL.getURL()));
                            toSchedule.add(webURL);
                            continue;
                        }
                        logger.debug("Not visiting: {} as per the server's \"robots.txt\" policy", (Object)webURL.getURL());
                        continue;
                    }
                    logger.debug("Not visiting: {} as per your \"shouldVisit\" policy", (Object)webURL.getURL());
                }
                this.frontier.scheduleAll(toSchedule);
                this.visit(page);
            }
        }
        catch (PageBiggerThanMaxSizeException e) {
            this.onPageBiggerThanMaxSize(curURL.getURL(), e.getPageSize());
        }
        catch (ParseException pe) {
            this.onParseError(curURL);
        }
        catch (ContentFetchException cfe) {
            this.onContentFetchError(curURL);
        }
        catch (RedirectException re) {
            logger.log(re.level, re.getMessage());
        }
        catch (NotAllowedContentException nace) {
            logger.debug("Skipping: {} as it contains binary content which you configured not to crawl", (Object)curURL.getURL());
        }
        catch (Exception e) {
            String urlStr = curURL == null ? "NULL" : curURL.getURL();
            logger.error("{}, while processing: {}", (Object)e.getMessage(), (Object)urlStr);
            logger.debug("Stacktrace", (Throwable)e);
        }
        finally {
            if (fetchResult != null) {
                fetchResult.discardContentIfNotConsumed();
            }
        }
    }

    public Thread getThread() {
        return this.myThread;
    }

    public void setThread(Thread myThread) {
        this.myThread = myThread;
    }

    public boolean isNotWaitingForNewURLs() {
        return !this.isWaitingForNewURLs;
    }
}

