/*
 * Decompiled with CFR 0.152.
 */
package com.ibm.es.nuvo.parser.transform.oi;

import com.ibm.es.nuvo.common.Metadata;
import com.ibm.es.nuvo.normalizer.MimetypeNormalizer;
import com.ibm.es.nuvo.parser.MalformedFormatException;
import com.ibm.es.nuvo.parser.ParserException;
import com.ibm.es.nuvo.parser.ParserHandler;
import com.ibm.es.nuvo.parser.ParserStage;
import com.ibm.es.nuvo.parser.Segment;
import com.ibm.es.nuvo.util.decimal.Decimal;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;

public class OutsideInCleanser
extends ParserStage {
    private static final String COPYRIGHT = "IBM Confidential OCO Source Materials 5724-R21 \u00a9 Copyright IBM Corp.  2006, 2007.   All Rights Reserved. The source code for this program is not published or otherwise divested of its trade secrets, irrespective of what has been deposited with the U.S. Copyright Office.";
    private static final String UNKNOWN_MIMETYPE = "application/octet-stream";
    private static final String CONTENT_TYPE = "contenttype";
    private DateFormat PDF_DATE_FORMAT = new SimpleDateFormat("'D:'yyyyMMddHHmmssZ");
    private String uri;
    private Metadata metadata;
    private boolean seenDocType;
    private StringBuilder buffer = new StringBuilder(32);

    protected OutsideInCleanser(ParserHandler handler) {
        super(handler);
    }

    public void startDocument(String url, Metadata meta) throws ParserException {
        this.metadata = meta;
        this.uri = url;
        this.seenDocType = false;
        super.startDocument(url, meta);
    }

    public void addSegment(Segment segment) throws ParserException {
        String field = segment.getName();
        if (field != null) {
            int flags = segment.getFlags();
            if ((field = field.toLowerCase()).equals(CONTENT_TYPE)) {
                if (this.seenDocType) {
                    return;
                }
                this.buffer.setLength(0);
                segment.appendTo(this.buffer);
                String docType = this.buffer.toString();
                if (docType.equals("Unknown (ANSI 8)")) {
                    throw new MalformedFormatException(this.uri);
                }
                String mimetype = MimetypeNormalizer.normalize(docType);
                if (mimetype == null || mimetype.length() == 0) {
                    mimetype = UNKNOWN_MIMETYPE;
                }
                if (mimetype.equals(UNKNOWN_MIMETYPE) || mimetype.equals("text/html")) {
                    super.addSegment(Segment.newSegment("_stype", segment, 0, segment.length(), segment.getFlags()));
                }
                segment = Segment.newSegment("doctype", mimetype, flags);
                if (this.metadata != null) {
                    this.metadata.set("doctype", mimetype);
                }
                this.seenDocType = true;
            } else if (field.equals("docdate")) {
                this.buffer.setLength(0);
                segment.appendTo(this.buffer);
                String datestr = this.buffer.toString();
                if (datestr.startsWith("D:")) {
                    try {
                        segment = Segment.newSegment("docdate", datestr, new Decimal(this.PDF_DATE_FORMAT.parse(datestr.replaceAll("'", ""))), flags | 0x40000);
                    }
                    catch (ParseException e) {
                        // empty catch block
                    }
                }
            }
        }
        super.addSegment(segment);
    }

    public void endDocument() throws ParserException {
        super.endDocument();
    }
}

