/*
 * Decompiled with CFR 0.152.
 */
package com.ibm.es.nuvo.configuration;

import com.ibm.es.nuvo.api.CrawlService;
import com.ibm.es.nuvo.common.ExtendedException;
import com.ibm.es.nuvo.common.Message;
import com.ibm.es.nuvo.configuration.CollectionConfiguration;
import com.ibm.es.nuvo.configuration.CollectionCrawlspaceConfigurationManager;
import com.ibm.es.nuvo.configuration.ConfigurationLock;
import com.ibm.es.nuvo.configuration.ConfigurationManager;
import com.ibm.es.nuvo.configuration.ConfigurationManagerException;
import com.ibm.es.nuvo.configuration.GlobalConfiguration;
import com.ibm.es.nuvo.configuration.crawler.CrawlerConfigurationInterface;
import com.ibm.es.nuvo.configuration.crawler.WebCrawlerConfiguration;
import com.ibm.es.nuvo.crawler.CrawlerMonitor;
import com.ibm.es.nuvo.crawler.URIStatus;
import com.ibm.es.nuvo.crawler.framework.configuration.validator.HistoryEntry;
import com.ibm.es.nuvo.crawler.web.WebCrawler;
import com.ibm.es.nuvo.crawler.web.configuration.CrawlerConfig;
import com.ibm.es.nuvo.crawler.web.configuration.loader.WebCrawlerConfigLoader;
import com.ibm.es.nuvo.crawler.web.configuration.space.CrawlSpaceConfig;
import com.ibm.es.nuvo.crawler.web.configuration.validator.WCValidator;
import com.ibm.es.nuvo.crawlspace.CollectionCrawlspace;
import com.ibm.es.nuvo.crawlspace.HttpBasicAuth;
import com.ibm.es.nuvo.crawlspace.HttpProxySettings;
import com.ibm.es.nuvo.crawlspace.WebCrawlspace;
import com.ibm.es.nuvo.crawlspace.WebCrawlspaceBranch;
import com.ibm.es.nuvo.logging.Loggers;
import com.ibm.es.nuvo.util.Constants;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Properties;
import java.util.Vector;
import java.util.logging.Level;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class WebCrawlServiceImpl
implements CrawlService {
    private static final String copyright = "IBM Confidential OCO Source Materials 5724-R21 \u00a9 Copyright IBM Corp.  2006, 2007.   All Rights Reserved. The source code for this program is not published or otherwise divested of its trade secrets, irrespective of what has been deposited with the U.S. Copyright Office.";
    private static WebCrawlServiceImpl crawlService = null;
    private CrawlerConfig globalWebCrawlerConfig = null;
    private WebCrawler webCrawler = null;
    private Thread webCrawlerThread = null;

    public static CrawlService getCrawlService() {
        if (crawlService == null) {
            crawlService = new WebCrawlServiceImpl();
        }
        return crawlService;
    }

    private WebCrawlServiceImpl() {
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Override
    public void startCrawl(String crawlSpaceId, boolean fullRecrawl) throws ExtendedException {
        Object lockObject = ConfigurationLock.getLockObj();
        String collectionId = CollectionConfiguration.crawlspaceIdToCollectionId(crawlSpaceId);
        Object object = lockObject;
        synchronized (object) {
            CollectionConfiguration config = ConfigurationManager.getConfigurationManager().getCollection(collectionId);
            CrawlerConfigurationInterface crawlerConfigInterface = WebCrawlerConfiguration.getInstance();
            String collectionDirectory = CollectionConfiguration.buildCollectionPath(collectionId);
            CrawlSpaceConfig crawlSpaceConfig = (CrawlSpaceConfig)crawlerConfigInterface.readCrawlspace(collectionDirectory, collectionId);
            Properties crawlerProperties = new Properties();
            crawlerProperties.put("crawler.configured.object", crawlSpaceConfig);
            crawlerProperties.put("crawler.global.configured.object", this.globalWebCrawlerConfig);
            crawlerProperties.put("crawler.metadata.db.path", config.getCrawlerDataDirectory(Constants.CrawlerType.Web));
            if (fullRecrawl) {
                crawlerProperties.put("crawler.force.full.recrawl", "true");
            }
            this.webCrawler.startCrawl(collectionId, crawlSpaceId, crawlerProperties);
            if (Loggers.logger.isLoggable(Level.INFO)) {
                Message msg = new Message("G0069I.WEB_CRAWLING_STARTED", collectionId);
                Loggers.logger.log(Level.INFO, msg);
            }
        }
    }

    @Override
    public void stopCrawl(String crawlspaceId) throws ExtendedException {
        if (this.webCrawler != null && this.crawlspaceExists(crawlspaceId)) {
            this.webCrawler.stopCrawl(crawlspaceId);
            if (Loggers.logger.isLoggable(Level.INFO)) {
                String collectionId = CollectionConfiguration.crawlspaceIdToCollectionId(crawlspaceId);
                Message msg = new Message("G0070I.WEB_CRAWLING_STOPPED", collectionId);
                Loggers.logger.log(Level.INFO, msg);
            }
        }
    }

    @Override
    public boolean isCrawling(String crawlspaceId) throws ExtendedException {
        boolean isCrawling = false;
        if (this.webCrawler != null && this.crawlspaceExists(crawlspaceId)) {
            CrawlerMonitor.CrawlerState state = this.webCrawler.getCrawlSpaceState(crawlspaceId);
            isCrawling = CrawlerMonitor.CrawlerState.CRAWLING.equals((Object)state) || CrawlerMonitor.CrawlerState.IDLE.equals((Object)state);
        }
        return isCrawling;
    }

    @Override
    public List<URIStatus> getURLStatusReport(String crawlSpaceId, String uriPattern, int maxReturned) throws ExtendedException {
        String collectionId = CollectionConfiguration.crawlspaceIdToCollectionId(crawlSpaceId);
        CollectionConfiguration config = ConfigurationManager.getConfigurationManager().getCollection(collectionId);
        if (this.webCrawler == null) {
            return null;
        }
        return this.webCrawler.getURLStatusReport(crawlSpaceId, uriPattern, config.getCrawlerDataDirectory(Constants.CrawlerType.Web), maxReturned);
    }

    @Override
    public HashMap<?, ?> getStatus(String crawlspaceId) throws ExtendedException {
        return null;
    }

    @Override
    public List<String> getForbiddenHosts(String crawlspaceId) throws ExtendedException {
        return null;
    }

    @Override
    public List<HistoryEntry<WCValidator.Type>> validate(String crawlspaceId, boolean modifyRules, String rootUrl, List<String> allowRules, List<String> denyLocations, List<String> newAllowRules) throws ExtendedException {
        CrawlSpaceConfig crawlSpaceConfig = this.createNewCrawlspace(crawlspaceId, true);
        if (allowRules == null || allowRules.size() == 0) {
            allowRules = WebCrawlspaceBranch.generateAllowRulesFor(rootUrl);
        }
        CollectionCrawlspaceConfigurationManager.addDataToWebCrawlspace(crawlSpaceConfig, rootUrl, allowRules, denyLocations);
        List<HistoryEntry<WCValidator.Type>> history = this.doValidation(crawlSpaceConfig, modifyRules);
        if (newAllowRules != null) {
            HistoryEntry<WCValidator.Type> entry;
            int i;
            newAllowRules.addAll(allowRules);
            boolean foundRedirect = false;
            for (i = 0; i < history.size(); ++i) {
                String newPrefixRule;
                entry = history.get(i);
                if (WCValidator.Type.NEW_START_URL.equals((Object)entry.getType())) {
                    foundRedirect = true;
                    String redirectURL = entry.getValue();
                    Vector<String> allowRulesForRedirectURL = WebCrawlspaceBranch.generateAllowRulesFor(redirectURL);
                    newAllowRules.addAll(allowRulesForRedirectURL);
                    continue;
                }
                if (!WCValidator.Type.NEW_PREFIX.equals((Object)entry.getType()) || newAllowRules.contains(newPrefixRule = entry.getValue())) continue;
                newAllowRules.add(newPrefixRule);
            }
            if (foundRedirect) {
                for (i = history.size() - 1; i >= 0; --i) {
                    entry = history.get(i);
                    if (!WCValidator.Type.NO_LINK_FOUND.equals((Object)entry.getType())) continue;
                    history.remove(i);
                }
            }
            for (i = newAllowRules.size() - 1; i >= 0; --i) {
                String rule = newAllowRules.get(i);
                if (newAllowRules.indexOf(rule) >= i) continue;
                newAllowRules.remove(i);
            }
        }
        return history;
    }

    public List<HistoryEntry<WCValidator.Type>> validateProxy(String crawlspaceId, boolean modifyRules, String url, String domain, String host, int port, String userid, String pwd) throws ExtendedException {
        CrawlSpaceConfig crawlSpaceConfig = this.createNewCrawlspace(crawlspaceId, false);
        String newUrl = CollectionCrawlspaceConfigurationManager.generateStartURLForWebCrawlspace(url);
        crawlSpaceConfig.addStartURL(newUrl);
        HttpProxySettings proxy = new HttpProxySettings();
        proxy.setDomainName(domain);
        proxy.setHostName(host);
        proxy.setPort(port);
        proxy.setUserId(userid);
        proxy.setPassword(pwd);
        CollectionCrawlspaceConfigurationManager.addProxyToWebCrawlspace(crawlSpaceConfig, proxy);
        List<HistoryEntry<WCValidator.Type>> history = this.doValidation(crawlSpaceConfig, modifyRules);
        return history;
    }

    public List<HistoryEntry<WCValidator.Type>> validateBasicAuth(String crawlspaceId, boolean modifyRules, String url, String userid, String pwd) throws ExtendedException {
        CrawlSpaceConfig crawlSpaceConfig = this.createNewCrawlspace(crawlspaceId, true);
        String newUrl = CollectionCrawlspaceConfigurationManager.generateStartURLForWebCrawlspace(url);
        crawlSpaceConfig.addStartURL(newUrl);
        HttpBasicAuth auth = new HttpBasicAuth();
        auth.setUserId(userid);
        auth.setPassword(pwd);
        String authUrl = CollectionCrawlspaceConfigurationManager.trimURLBackToSeparator(newUrl);
        CollectionCrawlspaceConfigurationManager.addBasicAuthToWebCrawlspace(crawlSpaceConfig, auth, authUrl);
        List<HistoryEntry<WCValidator.Type>> history = this.doValidation(crawlSpaceConfig, modifyRules);
        return history;
    }

    private CrawlSpaceConfig createNewCrawlspace(String crawlspaceId, boolean addProxyIfPresent) throws ExtendedException {
        WebCrawlspace webCS;
        String collectionId = CollectionConfiguration.crawlspaceIdToCollectionId(crawlspaceId);
        CrawlerConfigurationInterface crawlerConfigInterface = WebCrawlerConfiguration.getInstance();
        CrawlSpaceConfig crawlSpaceConfig = (CrawlSpaceConfig)crawlerConfigInterface.createDefaultCrawlspace(collectionId);
        CollectionCrawlspace collCS = null;
        try {
            collCS = CollectionCrawlspaceConfigurationManager.getConfigurationManager().getCrawlspace(collectionId);
        }
        catch (ConfigurationManagerException e) {
            // empty catch block
        }
        if (collCS != null && (webCS = collCS.getWebCrawlspace()) != null) {
            HttpProxySettings proxy;
            CollectionCrawlspaceConfigurationManager.addAgentInfoToWebCrawlspace(crawlSpaceConfig, webCS.getCrawlerName(), webCS.getEmailId());
            if (addProxyIfPresent && (proxy = webCS.getHttpProxy()) != null) {
                CollectionCrawlspaceConfigurationManager.addProxyToWebCrawlspace(crawlSpaceConfig, proxy);
            }
        }
        return crawlSpaceConfig;
    }

    private List<HistoryEntry<WCValidator.Type>> doValidation(CrawlSpaceConfig crawlSpaceConfig, boolean modifyRules) {
        Properties crawlerProperties = new Properties();
        crawlerProperties.put("crawler.configured.object", crawlSpaceConfig);
        crawlerProperties.put("crawler.global.configured.object", this.globalWebCrawlerConfig);
        WCValidator validator = new WCValidator(crawlerProperties);
        validator.validate(modifyRules);
        List<HistoryEntry<WCValidator.Type>> history = validator.getHistory();
        return history;
    }

    @Override
    public boolean crawlspaceExists(String crawlspaceId) {
        String collectionId = CollectionConfiguration.crawlspaceIdToCollectionId(crawlspaceId);
        return this.crawlspaceExistsForCollection(collectionId);
    }

    @Override
    public boolean crawlspaceExistsForCollection(String collectionId) {
        CrawlerConfigurationInterface crawlerConfigInterface = WebCrawlerConfiguration.getInstance();
        String collectionDirectory = CollectionConfiguration.buildCollectionPath(collectionId);
        return crawlerConfigInterface.crawlspaceExists(collectionDirectory);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Override
    public void initialize() throws FileNotFoundException, ExtendedException {
        String defaultWebCrawlerConfigFileName = GlobalConfiguration.getConfiguration().getConfigFileName("defaults", "web_crawler.xml");
        FileInputStream configFile = new FileInputStream(defaultWebCrawlerConfigFileName);
        try {
            this.globalWebCrawlerConfig = (CrawlerConfig)WebCrawlerConfigLoader.instance().load(null, configFile);
            Properties webCrawlerGlobalProperties = new Properties();
            webCrawlerGlobalProperties.put("crawler.global.configured.object", this.globalWebCrawlerConfig);
            String dbPath = GlobalConfiguration.getConfiguration().getConfigPath() + File.separator + "cloudscape";
            webCrawlerGlobalProperties.setProperty("crawler.metadata.db.path", dbPath);
            this.webCrawler = new WebCrawler();
            this.webCrawler.applyConfig(null, webCrawlerGlobalProperties);
            this.webCrawlerThread = new Thread((Runnable)this.webCrawler, "WebCrawler");
            this.webCrawlerThread.setDaemon(true);
            this.webCrawlerThread.start();
        }
        finally {
            if (configFile != null) {
                try {
                    configFile.close();
                }
                catch (IOException iOException) {}
            }
        }
    }

    @Override
    public void shutdown() throws ExtendedException {
        if (this.webCrawler != null) {
            this.webCrawler.shutdown();
            this.webCrawlerThread = null;
        }
        this.webCrawler = null;
    }

    @Override
    public Object getCrawler() {
        return this.webCrawler;
    }

    @Override
    public Constants.CrawlerType getType() {
        return Constants.CrawlerType.Web;
    }
}

