/*
 * Decompiled with CFR 0.152.
 */
package com.ibm.es.nuvo.crawler.web.http;

import com.ibm.es.nuvo.GlobalSystem;
import com.ibm.es.nuvo.crawler.util.hash.HashableDataBufferOutputStream;
import com.ibm.es.nuvo.crawler.web.archive.ArchiveEntryProcessor;
import com.ibm.es.nuvo.crawler.web.bucket.Bucket;
import com.ibm.es.nuvo.crawler.web.bucket.BucketManager;
import com.ibm.es.nuvo.crawler.web.configuration.space.CrawlSpaceConfig;
import com.ibm.es.nuvo.crawler.web.db.CrawlRec;
import com.ibm.es.nuvo.crawler.web.db.InsertRequest;
import com.ibm.es.nuvo.crawler.web.db.InsertRequestSet;
import com.ibm.es.nuvo.crawler.web.db.URLInserter;
import com.ibm.es.nuvo.crawler.web.db.URLUpdater;
import com.ibm.es.nuvo.crawler.web.db.UpdateRequest;
import com.ibm.es.nuvo.crawler.web.db.tables.ArchiveEntryTable;
import com.ibm.es.nuvo.crawler.web.error.GenericException;
import com.ibm.es.nuvo.crawler.web.error.WCException;
import com.ibm.es.nuvo.crawler.web.http.HTTPCategory;
import com.ibm.es.nuvo.crawler.web.net.CURL;
import com.ibm.es.nuvo.crawler.web.plugin.PluginHandler;
import com.ibm.es.nuvo.crawler.web.plugin.WebCrawlerPlugin;
import com.ibm.es.nuvo.crawler.web.rule.WebSpace;
import com.ibm.es.nuvo.crawler.web.storage.DebugDocumenQueue;
import com.ibm.es.nuvo.crawler.web.storage.TeeDocumentQueue;
import com.ibm.es.nuvo.crawler.web.storage.WebWriter;
import com.ibm.es.nuvo.crawler.web.storage.WebWriterImpl;
import com.ibm.es.nuvo.crawler.web.util.Strings;
import com.ibm.es.nuvo.crawler.web.util.UnixTime;
import com.ibm.es.nuvo.documentqueue.DocumentQueueInterface;
import com.ibm.es.nuvo.logging.ExtendedLogger;
import com.ibm.supa.config.ConfigurationLoader;
import com.ibm.supa.web.WebConfig;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.logging.Level;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class URLSorter {
    private static final String copyright = "IBM Confidential OCO Source Materials 5724-R21 \u00a9 Copyright IBM Corp.  2006, 2007.   All Rights Reserved. The source code for this program is not published or otherwise divested of its trade secrets, irrespective of what has been deposited with the U.S. Copyright Office.";
    private static final ExtendedLogger tracer = ExtendedLogger.getLogger("NuvoTracer." + URLSorter.class.getName());
    public static final String s_agent = "URLSorter";
    private WebWriter webWriter;
    private static URLSorter s_instance = new URLSorter();

    private URLSorter() {
        DocumentQueueInterface documentQueue = !Boolean.getBoolean("web.dump.doc") ? GlobalSystem.getSingleInstance().getDocumentQueue() : DebugDocumenQueue.instance();
        String teeDir = System.getProperty("web.copy.dir");
        if (teeDir != null) {
            documentQueue = new TeeDocumentQueue(10, documentQueue, new File(teeDir));
        }
        this.webWriter = new WebWriterImpl(documentQueue);
        this.webWriter.open();
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void processCrawlResults(CrawlRec cr, Lock lock) {
        block31: {
            if (!cr.getConfig().isValid()) {
                return;
            }
            List<WebCrawlerPlugin> plugins = cr.getConfig().getAgent().getPlugins();
            if (plugins.size() > 0 && cr.getNewHTTPStatus() == 200) {
                PluginHandler.getInstance().process(cr, plugins);
            }
            try {
                if (tracer.isLoggable(Level.FINER)) {
                    tracer.entering(this.getClass().getName(), "processCrawlResults(CrawlRec)", new Object[]{cr.toString()});
                }
                if (cr.doWriteRDS()) {
                    if (!this.writeContent(cr)) {
                        if (tracer.isLoggable(Level.FINE)) {
                            tracer.fine("failed to write " + cr.toString());
                        }
                        return;
                    }
                } else {
                    HashableDataBufferOutputStream content = cr.getContent();
                    if (content != null) {
                        content.dispose();
                    }
                }
                if (!cr.getConfig().isValid()) {
                    return;
                }
                if (!cr.isNoFollow()) {
                    if (!cr.getConfig().isValid()) {
                        return;
                    }
                    String collectionId = cr.getConfig().getCollectionId();
                    WebConfig webConfig = ConfigurationLoader.getAnalysisScopeConfig(collectionId).getWebConfig();
                    if (webConfig != null && webConfig.getMaxLinkDepth() > 0 && cr.getDepth() >= webConfig.getMaxLinkDepth()) {
                        if (!cr.getConfig().isValid()) {
                            return;
                        }
                        if (cr.doUpdateURL()) {
                            lock.lock();
                            try {
                                this._writeURL(cr);
                            }
                            finally {
                                lock.unlock();
                            }
                        }
                        return;
                    }
                    List<CURL> list = cr.getOutLinks();
                    CURL redu = cr.getRedirectURL();
                    if (list != null || redu != null) {
                        if (list == null) {
                            list = new ArrayList<CURL>();
                        }
                        if (redu != null) {
                            list.add(redu);
                        }
                        lock.lock();
                        try {
                            this.insert(list, cr.getURL().getURLHash().longValue(), cr.getBucket().getManager(), cr.getConfig().getMaxPathDepth(), cr.getCrawlerId(), cr.getDepth() + 1);
                        }
                        finally {
                            lock.unlock();
                        }
                    }
                }
                if (!cr.getConfig().isValid()) {
                    return;
                }
                if (!cr.doUpdateURL()) break block31;
                lock.lock();
                try {
                    this._writeURL(cr);
                }
                finally {
                    lock.unlock();
                }
            }
            catch (Throwable e) {
                if (!tracer.isLoggable(Level.FINE)) break block31;
                tracer.log(Level.FINE, "The document " + cr.getURL().downstreamString() + " was crawled, but its metadata was not updated.");
            }
        }
    }

    private boolean writeContent(CrawlRec cr) {
        if (tracer.isLoggable(Level.FINER)) {
            tracer.entering(this.getClass().getName(), "_writeRDS", new Object[]{cr});
        }
        if (cr == null) {
            return false;
        }
        short httpStatus = cr.getNewHTTPStatus();
        if (httpStatus == 0) {
            if (tracer.isLoggable(Level.FINER)) {
                tracer.finer("Null HTTP status for " + cr);
            }
            return false;
        }
        CURL durl = cr.getDownstreamURL();
        if (durl == null) {
            durl = cr.getURL();
        }
        if (durl == null) {
            return false;
        }
        if (!cr.isTruncated() && ArchiveEntryProcessor.isArchive(durl, cr.getContentTypeLC())) {
            HashableDataBufferOutputStream content = cr.getContent();
            try {
                ArchiveEntryTable archiveTable = ArchiveEntryTable.getInstance(cr.getCrawlerId());
                if (archiveTable != null) {
                    CrawlRec rec;
                    ArchiveEntryProcessor processor = archiveTable.getProcessor(durl, cr, cr.getWebSpace(), cr.getConfig().getCodepage());
                    while ((rec = processor.next()) != null) {
                        if (ArchiveEntryProcessor.isArchive(rec.getURL(), rec.getContentTypeLC()) || this.writeContent(rec)) continue;
                        return false;
                    }
                    if (content != null) {
                        content.dispose();
                    }
                }
                return true;
            }
            catch (Exception ignored) {
                if (tracer.isLoggable(Level.FINE)) {
                    tracer.fine("Not an archive " + cr.getURL().downstreamString());
                }
                return this.webWriter.write(cr);
            }
        }
        return this.webWriter.write(cr);
    }

    private void _writeURL(CrawlRec cr) throws WCException {
        if (tracer.isLoggable(Level.FINER)) {
            tracer.entering(this.getClass().getName(), "_writeURL(CrawlRec)", new Object[]{cr.toString()});
        }
        try {
            int httpStatusCount = 1;
            short newHTTPStatus = cr.getNewHTTPStatus();
            if (newHTTPStatus == cr.getOldHTTPStatus()) {
                httpStatusCount = cr.getHTTPStatusCount() + 1;
            }
            int flags = 0;
            flags |= cr.isRss() ? 4 : 0;
            flags |= cr.isNoFollow() ? 2 : 0;
            URLUpdater.instance().enqueueRequest(new UpdateRequest(cr.getURL().getURLHash().longValue(), newHTTPStatus, httpStatusCount, cr.getCrawlDate(), this._calculateRecrawlDate(cr), cr.getServerDate(), cr.getExpirationDate(), cr.getLastModifiedDate(), cr.getNewFingerprint(), flags |= cr.isNoIndex() ? 1 : 0, cr.getCrawlerId(), cr.getConfig()));
        }
        catch (Exception e) {
            GenericException wce = new GenericException("URLSorter._writeURL" + WCException.stackTraceFor(e), (Throwable)e);
            throw wce;
        }
    }

    private int _calculateRecrawlDate(CrawlRec cr) {
        if (cr.isRss()) {
            return UnixTime.now() + 3600;
        }
        int oldInterval = cr.getCrawlIntervalSec();
        assert (cr.getBucket() != null);
        CrawlSpaceConfig config = cr.getConfig();
        assert (config != null);
        int maxRecrawlInterval = config.getMaxRecrawlInterval();
        int minRecrawlInterval = config.getMinRecrawlInterval();
        if (oldInterval <= 0) {
            oldInterval = (maxRecrawlInterval - minRecrawlInterval) / 2;
        }
        int newInterval = oldInterval;
        if (HTTPCategory.tempUnavailable(cr.getNewHTTPStatus())) {
            boolean sameResult = cr.getNewHTTPStatus() == cr.getOldHTTPStatus();
            double rci = (sameResult ? 1.7 : 0.8) * (double)oldInterval;
            newInterval = (int)rci;
        } else if (HTTPCategory.ruledOut(cr.getNewHTTPStatus())) {
            newInterval = maxRecrawlInterval;
        } else if (cr.getOldFingerprint() != 0 && cr.getOldFingerprint() == cr.getNewFingerprint()) {
            double rci = 0.8 * (double)oldInterval;
            newInterval = (int)rci;
        } else {
            double rci = 2.0 * (double)oldInterval;
            newInterval = (int)rci;
        }
        if (newInterval < minRecrawlInterval) {
            newInterval = minRecrawlInterval;
        }
        if (newInterval > maxRecrawlInterval) {
            newInterval = maxRecrawlInterval;
        }
        return UnixTime.now() + newInterval;
    }

    public void insert(List<CURL> urls, long discovererURLHash, BucketManager manager, int maxPathDepth, String id, int newDepth) {
        if (tracer.isLoggable(Level.FINER)) {
            tracer.entering(this.getClass().getName(), "insert(LinkedList,long)", new Object[]{urls, discovererURLHash});
        }
        if (urls == null || urls.size() == 0) {
            if (tracer.isLoggable(Level.FINER)) {
                tracer.exiting(this.getClass().getName(), "insert(LinkedList,long)");
            }
            return;
        }
        ArrayList<InsertRequest> list = new ArrayList<InsertRequest>(urls.size());
        for (CURL url : urls) {
            Bucket bucket;
            WebSpace webSpace;
            if (!url.isValid()) {
                if (!tracer.isLoggable(Level.FINER)) continue;
                tracer.finer("Discarding invalid URL, reason=" + (Object)((Object)url.getStatus()) + ": " + url.badString());
                continue;
            }
            if (Strings.nInstances(url.getPath(), '/', maxPathDepth) < 0 || !(webSpace = manager.getConfig().getWebSpace()).isAllowed(url) || webSpace.isExcluded(url) || (bucket = manager.bucketForURL(url)) == null || bucket.verifyURL(url) != 0 || url.getContext() != null && (bucket = manager.bucketForURL(url.getContext())) != null && bucket.verifyURL(url) != 0) continue;
            list.add(new InsertRequest(url.getHostHash().longValue(), url.getURLHash().longValue(), discovererURLHash, UnixTime.now(), url.downstreamString(), id, newDepth));
        }
        if (list.size() > 0) {
            URLInserter.instance().enqueueRequest(new InsertRequestSet(list));
        }
        if (tracer.isLoggable(Level.FINER)) {
            tracer.exiting(this.getClass().getName(), "insert(LinkedList,long)");
        }
    }

    public static URLSorter instance() {
        return s_instance;
    }

    public void takeSnapshot(StringBuilder builder) {
        this.webWriter.takeSnapshot(builder);
    }

    public void processCrawlResults(CrawlRec cr) {
        this.processCrawlResults(cr, new ReentrantLock());
    }
}

