/*
 * Decompiled with CFR 0.152.
 */
package com.ibm.es.nuvo.crawler.web.bucket;

import com.ibm.es.nuvo.crawler.util.hash.Hash64;
import com.ibm.es.nuvo.crawler.web.bucket.Bucket;
import com.ibm.es.nuvo.crawler.web.bucket.DNSResolver;
import com.ibm.es.nuvo.crawler.web.config.InvalidRuleException;
import com.ibm.es.nuvo.crawler.web.db.CrawlRec;
import com.ibm.es.nuvo.crawler.web.db.tables.RobotsTable;
import com.ibm.es.nuvo.crawler.web.http.Client;
import com.ibm.es.nuvo.crawler.web.http.DownloadFailedException;
import com.ibm.es.nuvo.crawler.web.http.FetchIncompleteLineException;
import com.ibm.es.nuvo.crawler.web.http.HTTPCategory;
import com.ibm.es.nuvo.crawler.web.http.URLSorter;
import com.ibm.es.nuvo.crawler.web.net.CURL;
import com.ibm.es.nuvo.crawler.web.rule.RobotsTxtParser;
import com.ibm.es.nuvo.crawler.web.util.StreamUtils;
import com.ibm.es.nuvo.logging.ExtendedLogger;
import com.ibm.es.nuvo.logging.Loggers;
import java.io.IOException;
import java.io.Reader;
import java.net.ConnectException;
import java.net.InetSocketAddress;
import java.net.SocketException;
import java.net.SocketTimeoutException;
import java.sql.SQLException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.net.ssl.SSLHandshakeException;

public class RobotsResolver {
    private static final String copyright = "IBM Confidential OCO Source Materials 5724-R21 \u00a9 Copyright IBM Corp.  2006, 2007.   All Rights Reserved. The source code for this program is not published or otherwise divested of its trade secrets, irrespective of what has been deposited with the U.S. Copyright Office.";
    private static Logger logger = Loggers.logger;
    private static final ExtendedLogger tracer = ExtendedLogger.getLogger("NuvoTracer." + RobotsResolver.class.getName());
    public static final String S_ROBOTS_ENCODING = "ISO-8859-1";
    private static RobotsResolver s_instance = new RobotsResolver();
    private static final int MAX_REDIRECT = 10;

    private RobotsResolver() {
    }

    public static RobotsResolver instance() {
        return s_instance;
    }

    public void downloadRobots(Bucket bucket) {
        this.downloadRobots(bucket, new ReentrantLock());
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void downloadRobots(Bucket bucket, Lock lock) {
        CURL url;
        if (tracer.isLoggable(Level.FINE)) {
            tracer.fine("Start to download robots.txt from " + bucket.getHostname());
        }
        if ((url = bucket.getRobotsTxtURL()) == null || !url.isValid()) {
            if (tracer.isLoggable(Level.FINE)) {
                tracer.log(Level.FINE, "An invalid URL" + url.toString() + " is generated for host " + bucket.toString());
            }
            bucket.setRobotsTxt(null);
            return;
        }
        bucket.startRobotsDownload(bucket.getTimeout());
        CrawlRec cr = null;
        try {
            String msg;
            CURL orgURL;
            block68: {
                cr = new CrawlRec();
                cr.setURL(url);
                cr.setBucket(bucket);
                boolean connected = false;
                boolean truncated = false;
                bucket.updateRobotsDate();
                int rc = 0;
                HashSet<Hash64> redirectURIs = new HashSet<Hash64>();
                boolean redirect = false;
                orgURL = cr.getURL();
                for (int retryCounter = 0; retryCounter < 1 && !connected; ++retryCounter) {
                    if (retryCounter > 0) {
                        try {
                            Thread.sleep(2000L);
                        }
                        catch (InterruptedException ie) {
                            Thread.currentThread().interrupt();
                        }
                    }
                    Iterator<InetSocketAddress> it = bucket.getIP();
                    redirectURIs.clear();
                    redirectURIs.add(orgURL.getURLHash());
                    Bucket redirectBucket = null;
                    while (redirectURIs.size() < 10) {
                        redirect = false;
                        while (!connected && !redirect && it.hasNext()) {
                            InetSocketAddress isa = it.next();
                            Client client = null;
                            try {
                                cr.setInetSocketAddress(isa);
                                client = new Client(cr);
                                try {
                                    client.download(lock);
                                }
                                finally {
                                    cr.deregister();
                                }
                                if (!bucket.getConfig().isValid()) {
                                    return;
                                }
                                truncated = client.isTruncated();
                                rc = client.getHTTPReturnCode();
                                if (!HTTPCategory.talkedToServer(rc) && 500 != rc) break;
                                connected = true;
                                if (HTTPCategory.successful(rc)) {
                                    Reader reader = null;
                                    String robotsTxt = null;
                                    try {
                                        reader = client.getContentAsReader(null);
                                        robotsTxt = RobotsTxtParser.parseRaw(reader, bucket.getConfig().getAgent().getName());
                                        if (tracer.isLoggable(Level.FINE)) {
                                            tracer.log(Level.FINE, url + " " + robotsTxt);
                                            if (tracer.isLoggable(Level.FINER)) {
                                                tracer.finer("Original text\n" + client.getContentString());
                                            }
                                        }
                                    }
                                    finally {
                                        StreamUtils.safeClose(reader);
                                        client.clear();
                                    }
                                    if (bucket.getManager().isPersistent() && (robotsTxt == null || robotsTxt.length() < 31743)) {
                                        lock.lock();
                                        try {
                                            RobotsTable.getInstance(cr.getCrawlerId()).updateRobotText(bucket, robotsTxt);
                                        }
                                        finally {
                                            lock.unlock();
                                        }
                                        bucket.setLargeRobots(false);
                                        break;
                                    }
                                    if (tracer.isLoggable(Level.FINER)) {
                                        tracer.finer("Robots.txt is too large " + cr.getURL());
                                    }
                                    bucket.setRobotsTxt(robotsTxt);
                                    bucket.setLargeRobots(true);
                                    break;
                                }
                                if (!HTTPCategory.redirect(rc)) break;
                                connected = false;
                                CURL newURL = client.getRedirectURL();
                                client.clear();
                                if (newURL == null || !newURL.isValid() || !redirectURIs.add(newURL.getURLHash())) break;
                                if (!newURL.isRobots()) {
                                    rc = 404;
                                    connected = true;
                                    break;
                                }
                                lock.lock();
                                try {
                                    redirectBucket = bucket.getManager().bucketForURL(newURL);
                                }
                                finally {
                                    lock.unlock();
                                }
                                if (redirectBucket == null) {
                                    if (!tracer.isLoggable(Level.FINE)) break;
                                    tracer.log(Level.FINE, "G0002I.TRACE_MESSAGE", new Object[]{newURL + " " + cr.getURL().toString()});
                                    break;
                                }
                                if (redirectBucket.needsDNSUpdate()) {
                                    DNSResolver.instance().resolve(redirectBucket, lock);
                                }
                                if (!redirectBucket.hasIP()) break;
                                it = redirectBucket.getIP();
                                if (tracer.isLoggable(Level.FINE)) {
                                    tracer.log(Level.FINE, "Robots.txt for " + newURL + " was redirected to " + cr.getURL().toString() + ".");
                                }
                                cr.setURL(newURL);
                                cr.setBucket(redirectBucket);
                                client = null;
                                redirect = true;
                                break;
                            }
                            catch (DownloadFailedException e) {
                                connected = false;
                                if (!tracer.isLoggable(Level.FINE)) continue;
                                this.logDownloadFail(url, isa, e);
                            }
                            catch (FetchIncompleteLineException e) {
                                connected = false;
                                if (!tracer.isLoggable(Level.FINE)) continue;
                                tracer.log(Level.FINE, "fetching robots.txt " + e.getMessage());
                            }
                            catch (InvalidRuleException e) {
                                String contentType = client.getContentType();
                                if (contentType == null || !contentType.equalsIgnoreCase("text/plain")) continue;
                                connected = false;
                                if (!tracer.isLoggable(Level.FINE)) continue;
                                tracer.log(Level.FINE, "parsing robots.txt", e);
                            }
                            catch (Exception e) {
                                if (!logger.isLoggable(Level.SEVERE)) continue;
                                logger.log(Level.SEVERE, "C4807E.ROBOT_DOWNLOAD", e);
                            }
                            finally {
                                if (client == null) continue;
                                client.clear();
                            }
                        }
                        if (redirect) continue;
                    }
                }
                if (!connected) {
                    bucket.incrRobotsFailCount();
                    msg = "Failed to connect ";
                } else if (HTTPCategory.robotsNotInForce(rc)) {
                    msg = "Robots.txt was not found at ";
                    bucket.zeroRobotsFailCount();
                    try {
                        if (!bucket.getManager().isPersistent()) break block68;
                        lock.lock();
                        try {
                            RobotsTable.getInstance(cr.getCrawlerId()).updateRobotText(bucket, null);
                        }
                        finally {
                            lock.unlock();
                        }
                    }
                    catch (SQLException ignored) {}
                } else if (HTTPCategory.successful(rc)) {
                    if (truncated) {
                        msg = "Robots.txt was truncated at ";
                        bucket.incrRobotsFailCount();
                    } else {
                        msg = "Robots.txt was found at ";
                        bucket.zeroRobotsFailCount();
                    }
                } else {
                    msg = "Failed to get Robots.txt from ";
                    bucket.incrRobotsFailCount();
                }
            }
            if (tracer.isLoggable(Level.FINE)) {
                tracer.log(Level.FINE, msg + orgURL);
            }
            bucket.stopRobotsDownload();
            cr.setURL(orgURL);
            cr.setBucket(bucket);
        }
        finally {
            if (cr != null) {
                cr.setWriteRDS(false);
                cr.setUpdateURL(false);
                if (bucket.getManager().isPersistent()) {
                    URLSorter.instance().processCrawlResults(cr, lock);
                }
            }
        }
    }

    private void logDownloadFail(CURL url, InetSocketAddress isa, DownloadFailedException e) {
        if (!tracer.isLoggable(Level.FINE)) {
            return;
        }
        Throwable cause = e.getCause();
        if (cause != null && cause instanceof SocketTimeoutException) {
            tracer.log(Level.FINE, "_downloadRobots Timeout " + url + " " + isa);
        } else if (cause != null && cause instanceof ConnectException) {
            tracer.log(Level.FINE, "_downloadRobots Connection Refused " + url + " " + isa);
        } else if (cause != null && cause instanceof SocketException) {
            tracer.log(Level.FINE, "_downloadRobots Socket Exception " + url + " " + cause.getMessage());
        } else if (cause != null && cause instanceof SSLHandshakeException) {
            tracer.log(Level.FINE, "_downloadRobots Failed to make SSL Connection " + url + " " + cause.getMessage());
        } else if (cause != null && cause instanceof IOException) {
            tracer.log(Level.FINE, "_downloadRobots Failed " + url + " " + cause.getMessage());
        } else {
            tracer.log(Level.FINE, "_downloadRobots", e);
        }
    }
}

