/*
 * Decompiled with CFR 0.152.
 */
package com.ibm.es.nuvo.parser.html;

import com.ibm.es.nuvo.common.Metadata;
import com.ibm.es.nuvo.parser.ParserException;
import com.ibm.es.nuvo.parser.ParserHandler;
import com.ibm.es.nuvo.parser.ParserStage;
import com.ibm.es.nuvo.parser.Segment;

public class HTMLCleanser
extends ParserStage {
    private static final String copyright = "IBM Confidential OCO Source Materials 5724-R21 \u00a9 Copyright IBM Corp.  2006, 2007.   All Rights Reserved. The source code for this program is not published or otherwise divested of its trade secrets, irrespective of what has been deposited with the U.S. Copyright Office.";
    private static final String CONTENT_TYPE = "contenttype";
    private boolean seenTitle;
    private Metadata metadata;
    private String contentType;

    public HTMLCleanser(ParserHandler handler, String contentType) {
        super(handler);
        this.contentType = contentType;
    }

    public void startDocument(String url, Metadata meta) throws ParserException {
        this.seenTitle = false;
        this.metadata = meta;
        super.startDocument(url, meta);
        super.addSegment(Segment.newSegment("doctype", this.contentType, 327680));
    }

    public void addSegment(Segment segment) throws ParserException {
        String field = segment.getName();
        if (field != null) {
            int flags = segment.getFlags();
            if ((field = field.toLowerCase()).equals("head")) {
                if (!this.seenTitle) {
                    field = "title";
                    this.seenTitle = true;
                } else {
                    field = null;
                }
            } else {
                if (field.equals("dc.language") || field.equals("language")) {
                    if (this.metadata != null && !this.metadata.contains("Language")) {
                        StringBuffer buf = new StringBuffer();
                        segment.appendTo(buf);
                        this.metadata.set("Language", buf.toString());
                    }
                    return;
                }
                if (field.equals(CONTENT_TYPE)) {
                    return;
                }
                if (!this.seenTitle && field.equals("title")) {
                    this.seenTitle = true;
                }
            }
            segment.setName(field);
            segment.setFlags(flags);
        }
        super.addSegment(segment);
    }
}

