/*
 * Decompiled with CFR 0.152.
 */
package com.ibm.es.nuvo.tokenizer;

import com.ibm.es.nuvo.anchortext.AnchorTextException;
import com.ibm.es.nuvo.anchortext.ReferrerAnchorText;
import com.ibm.es.nuvo.anchortext.TargetAnchorText;
import com.ibm.es.nuvo.common.Metadata;
import com.ibm.es.nuvo.configuration.CollectionConfiguration;
import com.ibm.es.nuvo.indexer.SpecialSimilarity;
import com.ibm.es.nuvo.parser.ParserException;
import com.ibm.es.nuvo.parser.ParserHandler;
import com.ibm.es.nuvo.parser.Segment;
import com.ibm.es.nuvo.parser.XMLTagInfo;
import com.ibm.es.nuvo.tokenizer.Anchor;
import com.ibm.es.nuvo.tokenizer.FieldAggregator;
import com.ibm.es.nuvo.tokenizer.IndexableDocument;
import com.ibm.es.nuvo.tokenizer.PostTokenizeAnalyzer;
import com.ibm.es.nuvo.tokenizer.TokenizableDocument;
import com.ibm.es.nuvo.tokenizer.TokenizerException;
import com.ibm.es.nuvo.tokenizer.TokenizerPool;
import java.io.InputStream;
import java.util.List;

public class DocumentWriter
implements ParserHandler {
    private static final String copyright = "IBM Confidential OCO Source Materials 5724-R21 \u00a9 Copyright IBM Corp.  2006, 2007.   All Rights Reserved. The source code for this program is not published or otherwise divested of its trade secrets, irrespective of what has been deposited with the U.S. Copyright Office.";
    private PostTokenizeAnalyzer postAnalyzer;
    private TokenizableDocument document;
    private FieldAggregator aggregator;
    private String cid;
    private boolean extractAnchors;
    private SpecialSimilarity similarity;
    private boolean started;
    private String uri;
    private ReferrerAnchorText referrerAnchorText;
    private TokenizerPool tokenizerPool;

    public DocumentWriter(CollectionConfiguration config, TokenizerPool pool, PostTokenizeAnalyzer analyzer, boolean doExtractAnchors, SpecialSimilarity similarity) {
        this.cid = config.getId();
        this.tokenizerPool = pool;
        this.aggregator = new FieldAggregator(config);
        this.document = new TokenizableDocument(config);
        this.extractAnchors = doExtractAnchors;
        this.similarity = similarity;
        this.postAnalyzer = analyzer;
    }

    public void startDocument(String u, Metadata p) throws ParserException {
        this.document.setCollectionId(this.cid);
        this.document.setMetadata(p);
        this.aggregator.startDocument(u, p);
        this.uri = u;
        this.started = true;
    }

    public void addSegment(Segment segment) throws ParserException {
        this.aggregator.addSegment(segment);
    }

    public void addXMLTagInfo(XMLTagInfo xti) throws ParserException {
        this.aggregator.addXMLTagInfo(xti);
    }

    public void endDocument() throws ParserException {
    }

    public boolean tokenizeDocument() throws TokenizerException {
        if (!this.started) {
            return false;
        }
        this.aggregator.endDocument();
        this.document.tokenizeDocument(this.tokenizerPool, this.aggregator);
        try {
            if (this.extractAnchors) {
                List<Anchor> anchors = this.aggregator.getAnchors();
                this.referrerAnchorText = new ReferrerAnchorText(this.uri);
                for (Anchor anchor : anchors) {
                    if (anchor.getTokenVector() != null && !this.referrerAnchorText.addReference(anchor.target, anchor.getTokenVector())) break;
                }
            }
        }
        catch (AnchorTextException e) {
            e.printStackTrace();
        }
        return true;
    }

    public void setAnchorText(TargetAnchorText targetAnchorText) {
        if (this.started) {
            this.document.setAnchorText(targetAnchorText);
        }
    }

    public IndexableDocument finalizeDocument() {
        if (this.started) {
            IndexableDocument indexableDocument = this.document.finalizeDocument(this.uri, this.similarity, this.aggregator);
            if (indexableDocument == null) {
                return null;
            }
            indexableDocument.setReferrerAnchorText(this.referrerAnchorText);
            return indexableDocument;
        }
        return null;
    }

    public void resetDocument() {
        this.started = false;
    }

    public void setDocumentInputStream(InputStream is) {
        this.aggregator.setDocumentInputStream(is);
    }

    public PostTokenizeAnalyzer getAnalyzer() {
        return this.postAnalyzer;
    }

    public void dispose() {
    }
}

