/*
 * Decompiled with CFR 0.152.
 */
package com.ibm.es.nuvo.crawler.web.configuration.validator;

import com.ibm.es.nuvo.crawler.web.bucket.Bucket;
import com.ibm.es.nuvo.crawler.web.bucket.TmpBucketManager;
import com.ibm.es.nuvo.crawler.web.configuration.validator.ValidatorBase;
import com.ibm.es.nuvo.crawler.web.configuration.validator.WCValidator;
import com.ibm.es.nuvo.crawler.web.db.CrawlRec;
import com.ibm.es.nuvo.crawler.web.error.WCException;
import com.ibm.es.nuvo.crawler.web.http.Client;
import com.ibm.es.nuvo.crawler.web.http.HTTPCategory;
import com.ibm.es.nuvo.crawler.web.net.CURL;
import com.ibm.es.nuvo.crawler.web.parser.doc.HTMLParsedDocument;
import com.ibm.es.nuvo.crawler.web.parser.doc.Link;
import com.ibm.es.nuvo.crawler.web.parser.html.stream.AttributeEntry;
import com.ibm.es.nuvo.crawler.web.parser.html.stream.AttributeHandler;
import com.ibm.es.nuvo.crawler.web.parser.html.stream.HtmlStreamParser;
import com.ibm.es.nuvo.crawler.web.parser.javascript.JavaScriptHandler;
import com.ibm.es.nuvo.crawler.web.util.StreamUtils;
import com.ibm.es.nuvo.logging.ExtendedLogger;
import com.ibm.es.nuvo.util.databuffer.DataBufferException;
import java.io.CharConversionException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.ListIterator;
import java.util.Properties;
import java.util.logging.Level;

public class StartURLValidator
extends ValidatorBase {
    private static final String copyright = "IBM Confidential OCO Source Materials 5724-R21 \u00a9 Copyright IBM Corp.  2006, 2007.   All Rights Reserved. The source code for this program is not published or otherwise divested of its trade secrets, irrespective of what has been deposited with the U.S. Copyright Office.";
    private static final List<String> NEED_LIST = Arrays.asList("realm");
    private static final ExtendedLogger tracer = ExtendedLogger.getLogger("NuvoTracer." + StartURLValidator.class.getName());

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    void validate(WCValidator validator) {
        List<String> startURLs = validator.getStartURLs();
        TmpBucketManager manager = validator.getManager();
        ArrayList<String> newSeeds = new ArrayList<String>();
        ListIterator<String> itr = startURLs.listIterator();
        while (itr.hasNext()) {
            String url = itr.next();
            CURL curl = new CURL(url);
            Bucket bucket = manager.bucketForURL(curl);
            if (bucket == null) continue;
            CrawlRec rec = new CrawlRec();
            rec.setURL(curl);
            rec.setBucket(bucket);
            Client client = null;
            try {
                client = new Client(rec);
                client.download();
                rec.setRedirectURL(client.getRedirectURL());
                int returnCode = client.getHTTPReturnCode();
                String enc = client.getContentTypeAndEncoding()[1];
                if (enc == null) {
                    enc = validator.getSpace().getCodepage();
                }
                if (enc == null) {
                    enc = "ISO8859-1";
                }
                if (client.getContent() != null) {
                    HTMLParsedDocument pd;
                    block41: {
                        InputStream is = null;
                        pd = null;
                        try {
                            is = client.getContent().getInputStream();
                            if (is == null) {
                                System.out.println(curl);
                                break block41;
                            }
                            try {
                                pd = HtmlStreamParser.parse(is, curl.toString(), enc);
                            }
                            catch (CharConversionException e) {
                                StreamUtils.safeClose(is);
                                is = client.getContent().getInputStream();
                                try {
                                    pd = HtmlStreamParser.parse(is, curl.toString(), "iso8859-1");
                                }
                                catch (CharConversionException ignored) {
                                    // empty catch block
                                }
                            }
                        }
                        catch (DataBufferException ignored) {
                        }
                        finally {
                            StreamUtils.safeClose(is);
                        }
                    }
                    if (pd != null) {
                        String redirectURL = pd.getRedirectURL();
                        CURL rcurl = null;
                        if (redirectURL != null) {
                            rcurl = new CURL(redirectURL);
                            if (!rcurl.isValid()) {
                                rcurl = new CURL(curl, redirectURL);
                            }
                            if (HTTPCategory.successful(returnCode) && rcurl.isValid() && !rcurl.toString().equals(curl.toString())) {
                                returnCode = 3020;
                                rec.setRedirectURL(rcurl);
                            }
                        }
                        if (HTTPCategory.successful(returnCode)) {
                            boolean hasLink;
                            block42: {
                                Bucket b;
                                hasLink = false;
                                long hash = curl.getURLHash().longValue();
                                if (pd.frames() != null && pd.frames().size() > 0) {
                                    hasLink = true;
                                } else if (pd.getLinks() != null) {
                                    for (Link link : pd.getLinks()) {
                                        CURL cu = new CURL(link.getRelativeAddress());
                                        if (!cu.isValid()) {
                                            cu = new CURL(curl, link.getRelativeAddress());
                                        }
                                        if (!cu.isValid()) continue;
                                        b = manager.bucketForURL(cu);
                                        if (!cu.isValid() || b == null || b.verifyURL(cu) != 0 || cu.getURLHash().longValue() == hash) continue;
                                        hasLink = true;
                                        break;
                                    }
                                }
                                if (!hasLink) {
                                    try {
                                        ArrayList<String> jsOutLinks = JavaScriptHandler.getLinks(curl, curl, client.getContent(), enc);
                                        if (jsOutLinks == null || jsOutLinks.size() <= 0) break block42;
                                        rec.setOutLinks(jsOutLinks);
                                        for (CURL c : rec.getOutLinks()) {
                                            if (!c.isValid()) continue;
                                            b = manager.bucketForURL(c);
                                            if (!c.isValid() || b == null || b.verifyURL(c) != 0 || c.getURLHash().longValue() == hash) continue;
                                            hasLink = true;
                                            break;
                                        }
                                    }
                                    catch (Throwable ignored) {
                                        // empty catch block
                                    }
                                }
                            }
                            if (!hasLink) {
                                validator.addChanges("/Crawler/StartURLs/URL", url, WCValidator.Type.NO_LINK_FOUND);
                            }
                        }
                    }
                    pd = null;
                }
                client.clear();
                if (HTTPCategory.successful(returnCode)) continue;
                if (401 == returnCode) {
                    String property;
                    boolean digest = false;
                    Properties headerProperties = client.getHTTPHeaderProperties();
                    if (headerProperties != null && (property = headerProperties.getProperty("www-authenticate")) != null) {
                        String lc = property.trim().toLowerCase();
                        if (lc.startsWith("basic")) {
                            AttributeEntry[] entries = AttributeHandler.parse(property = property.trim().substring(5).trim(), NEED_LIST);
                            if (entries != null && entries.length > 0) {
                                for (AttributeEntry entry : entries) {
                                    validator.getRealms().put(url, entry.getValue());
                                }
                            }
                        } else if (lc.startsWith("digest")) {
                            digest = true;
                        }
                    }
                    if (!digest) {
                        validator.addChanges("/Crawler/StartURLs/URL", url, WCValidator.Type.UNAUTHORIZED);
                        continue;
                    }
                    validator.addChanges("/Crawler/StartURLs/URL", url, WCValidator.Type.UNSUPPORTED_DIGEST_AUTH);
                    continue;
                }
                CURL redirectURL = rec.getRedirectURL();
                if (redirectURL != null) {
                    String redirectString = redirectURL.downstreamString();
                    for (String old : startURLs) {
                        if (!old.equals(redirectString)) continue;
                        redirectURL = null;
                        redirectString = null;
                        break;
                    }
                    if (redirectString == null) continue;
                    validator.addChanges("/Crawler/StartURLs/URL", url, WCValidator.Type.REDIRECTED);
                    if (!validator.isEnableSuggestion()) continue;
                    newSeeds.add(redirectString);
                    validator.addChanges("/Crawler/StartURLs/URL", redirectString, WCValidator.Type.NEW_START_URL);
                    validator.updated();
                    itr.remove();
                    continue;
                }
                validator.addChanges("/Crawler/StartURLs/URL", url, WCValidator.Type.PAGE_NOT_FOUND);
                itr.remove();
            }
            catch (WCException e) {
                validator.addChanges("/Crawler/StartURLs/URL", url, WCValidator.Type.PAGE_NOT_FOUND);
                itr.remove();
                if (!tracer.isLoggable(Level.FINE)) continue;
                tracer.log(Level.FINE, e);
            }
        }
        startURLs.addAll(newSeeds);
    }
}

