/*
 * Decompiled with CFR 0.152.
 */
package com.ibm.es.nuvo.crawler.web.parser.html.stream;

import com.ibm.es.nuvo.common.ExtendedException;
import com.ibm.es.nuvo.common.Message;
import com.ibm.es.nuvo.crawler.util.hash.HashUtil;
import com.ibm.es.nuvo.crawler.web.parser.doc.Field;
import com.ibm.es.nuvo.crawler.web.parser.doc.HTMLParsedDocument;
import com.ibm.es.nuvo.crawler.web.parser.doc.Link;
import com.ibm.es.nuvo.crawler.web.parser.html.HTMLEntities;
import com.ibm.es.nuvo.crawler.web.parser.html.stream.FrameHandler;
import com.ibm.es.nuvo.crawler.web.parser.html.stream.LinkHandler;
import com.ibm.es.nuvo.crawler.web.parser.html.stream.MetaHandler;
import com.ibm.es.nuvo.crawler.web.parser.html.stream.RSSHandler;
import com.ibm.es.nuvo.logging.ExtendedLogger;
import com.ibm.es.nuvo.logging.Loggers;
import java.io.ByteArrayInputStream;
import java.io.CharConversionException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Properties;
import java.util.logging.Level;

public class HtmlStreamParser {
    private static final String copyright = "IBM Confidential OCO Source Materials 5724-R21 \u00a9 Copyright IBM Corp.  2006, 2007.   All Rights Reserved. The source code for this program is not published or otherwise divested of its trade secrets, irrespective of what has been deposited with the U.S. Copyright Office.";
    private static final String CDATA_END = "]]";
    private static final int HTML_TAG_MAX_OFFSET = 500;
    private static final String CDATA_START = "![cdata[";
    private static ExtendedLogger logger = Loggers.logger;
    private static final ExtendedLogger tracer = ExtendedLogger.getLogger("NuvoTracer." + HtmlStreamParser.class.getName());
    private static final int BUFFER_SIZE = 1024;
    private static final double ATOM_VERSION = 100.0;

    public static HTMLParsedDocument parse(byte[] content, String url, String initialEncoding) throws CharConversionException {
        return HtmlStreamParser.parse(new ByteArrayInputStream(content), url, initialEncoding);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     * Unable to fully structure code
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    public static HTMLParsedDocument parse(InputStream is, String url, String initialEncoding) throws CharConversionException {
        block48: {
            reader = null;
            try {
                reader = new InputStreamReader(is, initialEncoding);
            }
            catch (UnsupportedEncodingException e) {
                reader = new InputStreamReader(is);
                if (!HtmlStreamParser.tracer.isLoggable(Level.FINE)) break block48;
                HtmlStreamParser.tracer.log(Level.FINE, MessageFormat.format("Unknown encoding {1} for {0}.", new Object[]{url, initialEncoding}), e);
            }
        }
        cs = new char[1024];
        sb = new StringBuilder();
        inTag = false;
        rssVersion = -1.0;
        tagType = TagType.NONE;
        lastType = TagType.NONE;
        properties = new Properties();
        properties.put("dc.date", new Date());
        links = new ArrayList<Link>();
        frames = new ArrayList<String>();
        fields = new ArrayList<Field>();
        needBody = false;
        rssImage = false;
        isAtom = false;
        title = null;
        part = Part.NONE;
        count = 0;
        foundHeader = false;
        base = null;
        hash = null;
        try {
            while ((len = reader.read(cs)) > 0) {
                block23: for (i = 0; i < len; ++i, ++count) {
                    string = null;
                    if (tagType != TagType.COMMENT) {
                        hash = HashUtil.hashCode64(cs[i], 0L, null, hash);
                    }
                    switch (cs[i]) {
                        case '<': {
                            string = sb.toString().trim();
                            if (HtmlStreamParser.tracer.isLoggable(Level.FINEST) && sb != null && sb.length() > 0) {
                                HtmlStreamParser.tracer.finest("Chars " + string);
                            }
                            if (string.startsWith("!--")) {
                                tagType = TagType.COMMENT;
                            }
                            inTag = true;
                            switch (1.$SwitchMap$com$ibm$es$nuvo$crawler$web$parser$html$stream$HtmlStreamParser$TagType[tagType.ordinal()]) {
                                case 1: {
                                    title = HTMLEntities.decodeHTMLEncoding(string);
                                    break;
                                }
                                case 2: {
                                    if (rssImage || string.length() <= 0) break;
                                    links.add(new Link(string, true));
                                    break;
                                }
                            }
                            needBody = false;
                            sb = new StringBuilder();
                            if (tagType == TagType.COMMENT) continue block23;
                            lastType = tagType;
                            tagType = TagType.NONE;
                            continue block23;
                        }
                        case '>': {
                            string = sb.toString().trim();
                            lc = string.toLowerCase();
                            if (tagType == TagType.COMMENT) ** GOTO lbl101
                            inTag = false;
                            var28_31 = lc;
                            if (lc.startsWith("![cdata[") && lc.endsWith("]]")) {
                                sb = new StringBuilder();
                                sb.append(string.substring("![cdata[".length(), string.length() - "]]".length()));
                                tagType = lastType;
                                if (!HtmlStreamParser.tracer.isLoggable(Level.FINEST)) continue block23;
                                HtmlStreamParser.tracer.finest("Tag(CDATA) " + var28_31);
                                continue block23;
                            }
                            if (HtmlStreamParser.tracer.isLoggable(Level.FINEST)) {
                                HtmlStreamParser.tracer.finest("Tag " + var28_31);
                            }
                            if ("html".equals(var28_31)) {
                                part = Part.HTML;
                                if (count < 500) {
                                    foundHeader = true;
                                }
                            } else if ("body".equals(var28_31)) {
                                part = Part.BODY;
                            } else if ("head".equals(var28_31)) {
                                part = Part.HEAD;
                            } else if ("/head".equals(var28_31)) {
                                part = Part.BODY;
                            }
                            if (title == null && var28_31.startsWith("title") && part == Part.HEAD && tagType != TagType.COMMENT) {
                                needBody = true;
                                tagType = TagType.TITLE;
                                sb = new StringBuilder();
                                continue block23;
                            }
                            if (rssVersion > 0.0 && var28_31.equals("link") && !rssImage) {
                                needBody = true;
                                sb = new StringBuilder();
                                tagType = TagType.RSS_LINK;
                                continue block23;
                            }
                            ** GOTO lbl104
lbl101:
                            // 1 sources

                            if (string.endsWith("--")) {
                                tagType = TagType.NONE;
                                inTag = false;
                            }
                        }
lbl104:
                        // 5 sources

                        default: {
                            if (!inTag) ** GOTO lbl171
                            if (tagType != TagType.NONE || !Character.isWhitespace(cs[i])) ** GOTO lbl165
                            token = sb.toString().trim().toLowerCase();
                            if (HtmlStreamParser.tracer.isLoggable(Level.FINEST)) {
                                HtmlStreamParser.tracer.finest("Token " + token);
                            }
                            if (!"link".equals(token)) ** GOTO lbl113
                            tagType = TagType.LINK;
                            ** GOTO lbl164
lbl113:
                            // 1 sources

                            if (rssVersion == 100.0 || !"a".equals(token) && !"area".equals(token)) ** GOTO lbl116
                            tagType = TagType.LINK;
                            ** GOTO lbl164
lbl116:
                            // 1 sources

                            if (part != Part.HEAD || !"base".equals(token)) ** GOTO lbl119
                            tagType = TagType.BASE;
                            ** GOTO lbl164
lbl119:
                            // 1 sources

                            if (!"meta".equals(token)) ** GOTO lbl122
                            tagType = TagType.META;
                            ** GOTO lbl164
lbl122:
                            // 1 sources

                            if (!"frame".equals(token)) ** GOTO lbl125
                            tagType = TagType.FRAME;
                            ** GOTO lbl164
lbl125:
                            // 1 sources

                            if (!token.startsWith("!--")) ** GOTO lbl128
                            tagType = TagType.COMMENT;
                            ** GOTO lbl164
lbl128:
                            // 1 sources

                            if (!"body".equals(token)) ** GOTO lbl131
                            part = Part.BODY;
                            ** GOTO lbl164
lbl131:
                            // 1 sources

                            if (!"header".equals(token)) ** GOTO lbl134
                            part = Part.HEAD;
                            ** GOTO lbl164
lbl134:
                            // 1 sources

                            if (part != Part.NONE || !"rss".equals(token)) ** GOTO lbl137
                            rssVersion = 0.9;
                            ** GOTO lbl164
lbl137:
                            // 1 sources

                            if (part != Part.NONE || !"rdf:rdf".regionMatches(0, token, 0, 7)) ** GOTO lbl140
                            rssVersion = 1.0;
                            ** GOTO lbl164
lbl140:
                            // 1 sources

                            if (part != Part.NONE || !"feed".equals(token)) ** GOTO lbl144
                            rssVersion = 100.0;
                            isAtom = true;
                            ** GOTO lbl164
lbl144:
                            // 1 sources

                            if (!(rssVersion > 0.0) || isAtom) ** GOTO lbl163
                            if (!"item".equals(token)) ** GOTO lbl148
                            tagType = TagType.RSS_ITEM;
                            ** GOTO lbl164
lbl148:
                            // 1 sources

                            if (!"rdf:li".equals(token)) ** GOTO lbl151
                            tagType = TagType.RSS_RESOURCE;
                            ** GOTO lbl164
lbl151:
                            // 1 sources

                            if (!"channel".equals(token)) ** GOTO lbl154
                            tagType = TagType.RSS_CHANNEL;
                            ** GOTO lbl164
lbl154:
                            // 1 sources

                            if (!"link".equals(token)) ** GOTO lbl157
                            tagType = TagType.RSS_LINK;
                            ** GOTO lbl164
lbl157:
                            // 1 sources

                            if ("image".equals(token)) {
                                rssImage = true;
                            } else if ("/image".equals(token)) {
                                rssImage = false;
                            }
                            ** GOTO lbl164
lbl163:
                            // 1 sources

                            tagType = TagType.DISCARD;
lbl164:
                            // 18 sources

                            sb = new StringBuilder();
lbl165:
                            // 2 sources

                            if (tagType != TagType.DISCARD) {
                                sb.append(cs[i]);
                            }
                            if (tagType != TagType.COMMENT || sb.length() <= 1024) continue block23;
                            sb = new StringBuilder(sb.toString().substring(1022));
                            continue block23;
lbl171:
                            // 1 sources

                            if (needBody) {
                                sb.append(cs[i]);
                                continue block23;
                            }
                            if (sb.length() <= 0) continue block23;
                            tag = sb.toString().trim();
                            switch (1.$SwitchMap$com$ibm$es$nuvo$crawler$web$parser$html$stream$HtmlStreamParser$TagType[tagType.ordinal()]) {
                                case 3: {
                                    base = LinkHandler.parseBase(tag);
                                }
                                case 4: {
                                    LinkHandler.parse(links, tag, isAtom);
                                    break;
                                }
                                case 5: {
                                    FrameHandler.parse(frames, tag);
                                    break;
                                }
                                case 6: {
                                    if (part != Part.HEAD) break;
                                    MetaHandler.parse(properties, fields, tag);
                                    break;
                                }
                                case 7: 
                                case 8: 
                                case 9: {
                                    RSSHandler.parse(links, tag);
                                    break;
                                }
                            }
                            sb = new StringBuilder();
                        }
                    }
                }
            }
            if (HtmlStreamParser.tracer.isLoggable(Level.FINER)) {
                HtmlStreamParser.tracer.finer("RSS " + rssVersion);
            }
            encoding = properties.getProperty("charset", initialEncoding);
            for (Field var28_33 : fields) {
                if (!"charset".equalsIgnoreCase(var28_33.getName()) || (values = var28_33.getValues()).length <= 0) continue;
                encoding = values[0];
            }
            document = new HTMLParsedDocument(encoding, links, frames, fields, properties.getProperty("refresh"), base == null ? url : base.downstreamString(), title, ((Date)properties.get("dc.date")).getTime());
            document.setHash(HashUtil.asLong(hash));
            document.setContentType(properties.getProperty("content-type", "text/html"));
            document.setRssVersion(rssVersion);
            document.setFoundHtmlTag((foundHeader != false || links.size() > 0 || frames.size() > 0 || fields.size() > 0 || title != null) && rssVersion < 0.0);
            return document;
        }
        catch (CharConversionException e) {
            if (HtmlStreamParser.tracer.isLoggable(Level.FINE) == false) throw e;
            HtmlStreamParser.tracer.log(Level.FINE, "The content at " + url + " cannot be parsed.", e);
            throw e;
        }
        catch (IOException e) {
            if (HtmlStreamParser.logger.isLoggable(Level.WARNING) == false) return new HTMLParsedDocument();
            message = new Message("C4922W.PARSE_HTML_FAIL", new Object[]{url});
            var28_35 = new ExtendedException(message, (Throwable)e);
            HtmlStreamParser.logger.log(Level.WARNING, var28_35);
            return new HTMLParsedDocument();
        }
    }

    static class 1 {
        static final /* synthetic */ int[] $SwitchMap$com$ibm$es$nuvo$crawler$web$parser$html$stream$HtmlStreamParser$TagType;

        static {
            $SwitchMap$com$ibm$es$nuvo$crawler$web$parser$html$stream$HtmlStreamParser$TagType = new int[TagType.values().length];
            try {
                1.$SwitchMap$com$ibm$es$nuvo$crawler$web$parser$html$stream$HtmlStreamParser$TagType[TagType.TITLE.ordinal()] = 1;
            }
            catch (NoSuchFieldError ex) {
                // empty catch block
            }
            try {
                1.$SwitchMap$com$ibm$es$nuvo$crawler$web$parser$html$stream$HtmlStreamParser$TagType[TagType.RSS_LINK.ordinal()] = 2;
            }
            catch (NoSuchFieldError ex) {
                // empty catch block
            }
            try {
                1.$SwitchMap$com$ibm$es$nuvo$crawler$web$parser$html$stream$HtmlStreamParser$TagType[TagType.BASE.ordinal()] = 3;
            }
            catch (NoSuchFieldError ex) {
                // empty catch block
            }
            try {
                1.$SwitchMap$com$ibm$es$nuvo$crawler$web$parser$html$stream$HtmlStreamParser$TagType[TagType.LINK.ordinal()] = 4;
            }
            catch (NoSuchFieldError ex) {
                // empty catch block
            }
            try {
                1.$SwitchMap$com$ibm$es$nuvo$crawler$web$parser$html$stream$HtmlStreamParser$TagType[TagType.FRAME.ordinal()] = 5;
            }
            catch (NoSuchFieldError ex) {
                // empty catch block
            }
            try {
                1.$SwitchMap$com$ibm$es$nuvo$crawler$web$parser$html$stream$HtmlStreamParser$TagType[TagType.META.ordinal()] = 6;
            }
            catch (NoSuchFieldError ex) {
                // empty catch block
            }
            try {
                1.$SwitchMap$com$ibm$es$nuvo$crawler$web$parser$html$stream$HtmlStreamParser$TagType[TagType.RSS_ITEM.ordinal()] = 7;
            }
            catch (NoSuchFieldError ex) {
                // empty catch block
            }
            try {
                1.$SwitchMap$com$ibm$es$nuvo$crawler$web$parser$html$stream$HtmlStreamParser$TagType[TagType.RSS_RESOURCE.ordinal()] = 8;
            }
            catch (NoSuchFieldError ex) {
                // empty catch block
            }
            try {
                1.$SwitchMap$com$ibm$es$nuvo$crawler$web$parser$html$stream$HtmlStreamParser$TagType[TagType.RSS_CHANNEL.ordinal()] = 9;
            }
            catch (NoSuchFieldError noSuchFieldError) {
                // empty catch block
            }
        }
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    private static enum Part {
        NONE,
        HTML,
        HEAD,
        BODY;

    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    private static enum TagType {
        NONE,
        DISCARD,
        LINK,
        FRAME,
        META,
        COMMENT,
        TITLE,
        RSS_CHANNEL,
        RSS_LINK,
        RSS_ITEM,
        RSS_RESOURCE,
        ATOM,
        BASE;

    }
}

