/*
 * Decompiled with CFR 0.152.
 */
package com.ibm.es.nuvo.tokenizer;

import com.ibm.es.nuvo.anchortext.TargetAnchorText;
import com.ibm.es.nuvo.common.ACL;
import com.ibm.es.nuvo.common.Message;
import com.ibm.es.nuvo.common.Metadata;
import com.ibm.es.nuvo.configuration.CollectionConfiguration;
import com.ibm.es.nuvo.configuration.IndexDescriptor;
import com.ibm.es.nuvo.crawler.util.hash.HashUtil;
import com.ibm.es.nuvo.indexer.SpecialSimilarity;
import com.ibm.es.nuvo.normalizer.LanguageNormalizer;
import com.ibm.es.nuvo.parser.Segment;
import com.ibm.es.nuvo.tokenizer.Anchor;
import com.ibm.es.nuvo.tokenizer.BaseTagToken;
import com.ibm.es.nuvo.tokenizer.FieldAggregator;
import com.ibm.es.nuvo.tokenizer.IndexableDocument;
import com.ibm.es.nuvo.tokenizer.ResultHandler;
import com.ibm.es.nuvo.tokenizer.SegmentsInputStream;
import com.ibm.es.nuvo.tokenizer.TToken;
import com.ibm.es.nuvo.tokenizer.TokenVector;
import com.ibm.es.nuvo.tokenizer.Tokenizer;
import com.ibm.es.nuvo.tokenizer.TokenizerException;
import com.ibm.es.nuvo.tokenizer.TokenizerPool;
import com.ibm.es.nuvo.tokenizer.TokenizerProcessInput;
import com.ibm.es.nuvo.tokenizer.XMLToken;
import com.ibm.es.nuvo.util.URLUtils;
import com.ibm.es.nuvo.util.Vint8;
import com.ibm.es.nuvo.util.decimal.Decimal;
import com.ibm.supa.config.AnalysisScopeConfig;
import com.ibm.supa.config.ConfigurationLoader;
import com.ibm.supa.tokenizers.DelegateTokenizer;
import com.ibm.supa.tokenizers.TokenizationParams;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.Deflater;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
class TokenizableDocument {
    private static final String copyright = "IBM Confidential OCO Source Materials 5724-R21 \u00a9 Copyright IBM Corp.  2006, 2007.   All Rights Reserved. The source code for this program is not published or otherwise divested of its trade secrets, irrespective of what has been deposited with the U.S. Copyright Office.";
    private StringBuilder fieldText;
    private TokenVector plainVector;
    private int curNumberOfTextTokens = 0;
    private Map<String, TokenVector> fieldMap;
    private String cid;
    private Metadata metadata;
    private String knownLanguage = null;
    private String defaultLanguage = null;
    private int linkCount;
    private Document doc;
    private Deflater deflater;
    private boolean isStoreTokenVector;
    private SegmentsInputStream sis;
    private boolean facetsEnabled;
    private boolean indexSentenceAnnotations;
    StringBuilder facetValues = new StringBuilder();

    TokenizableDocument(CollectionConfiguration config) {
        String propertyValue;
        this.fieldText = new StringBuilder(256);
        this.fieldMap = new LinkedHashMap<String, TokenVector>();
        this.plainVector = new TokenVector();
        this.deflater = new Deflater(-1);
        this.isStoreTokenVector = config.getParserServiceConfig().isStoreTokenVectorsEnabled();
        this.sis = new SegmentsInputStream();
        IndexDescriptor taxiDescriptor = config.getIndexDescriptorFor(IndexDescriptor.IndexType.Facet);
        this.facetsEnabled = taxiDescriptor != null && taxiDescriptor.isEnabled();
        IndexDescriptor textIndexDescriptor = config.getIndexDescriptorFor(IndexDescriptor.IndexType.Text);
        this.indexSentenceAnnotations = false;
        if (textIndexDescriptor != null && (propertyValue = textIndexDescriptor.getProperty("IndexSentenceAnnotations")) != null) {
            this.indexSentenceAnnotations = propertyValue.equalsIgnoreCase("true");
        }
    }

    protected void finalize() throws Throwable {
        if (this.deflater != null) {
            this.deflater.end();
        }
        super.finalize();
    }

    void setCollectionId(String cid) {
        this.cid = cid;
    }

    void setMetadata(Metadata metadata) {
        this.metadata = metadata;
        this.defaultLanguage = null;
        this.knownLanguage = null;
        if (metadata != null) {
            this.defaultLanguage = metadata.get("Language");
            this.knownLanguage = metadata.get("KnownLanguage");
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    void tokenizeDocument(TokenizerPool pool, FieldAggregator fa) throws TokenizerException {
        this.reset();
        this.doc = new Document();
        Tokenizer acquiredTokenizer = pool.aquire(this.cid);
        DefaultResultHandler handler = new DefaultResultHandler();
        try {
            TokenizerProcessInput tpi = fa.getTokenizerProcessInput();
            tpi.setDefaultLanguage(this.defaultLanguage);
            tpi.setKnownLanguage(this.knownLanguage);
            tpi.setIndexSentenceAnnotations(this.indexSentenceAnnotations);
            tpi.setTokenizationParams(handler.tokParams);
            handler.setStartOfAttributeValues(tpi.getStartOfAttributeValues());
            handler.setFields(fa.getTokenizedFields());
            handler.setAnchors(fa.getAnchors());
            acquiredTokenizer.process(handler, tpi);
        }
        finally {
            pool.release(acquiredTokenizer);
            if (handler.tokenizer != null) {
                handler.tokenizer.releaseResources();
            }
        }
    }

    void setAnchorText(TargetAnchorText targetAnchorText) {
        if (this.doc != null) {
            TokenVector anchorText = targetAnchorText.getAnchorText();
            if (anchorText != null) {
                this.fieldMap.put("_anchor", anchorText);
            }
            this.linkCount = targetAnchorText.getReferrerCount();
        }
    }

    IndexableDocument finalizeDocument(String uri, SpecialSimilarity similarity, FieldAggregator fa) {
        if (this.doc == null) {
            return null;
        }
        try {
            this.calculateDocumentHash(fa);
            this.addPredefinedFields(uri);
            this.addUntokenizedFields(fa.getUntokenizedFields());
            this.addParametricFields(fa.getParametricFields());
            this.clearEmptyTokenVectors();
            this.addFields(fa.getStoredFields(), Field.Store.YES, Field.Index.NO);
            this.addFields(fa.getUntokenizedStoredFields(), Field.Store.YES, Field.Index.UN_TOKENIZED);
            if (this.facetsEnabled) {
                this.addFacetField(uri, fa.getFacetFields());
            }
            float plainTextBoost = similarity == null ? 1.0f : similarity.sweetSpotLengthNorm(this.curNumberOfTextTokens);
            IndexableDocument indexableDoc = new IndexableDocument(this.doc, this.fieldMap, this.linkCount);
            indexableDoc.addPreTokenizedFields(plainTextBoost);
            if (this.isStoreTokenVector) {
                indexableDoc.storeTokenVectors(this.deflater);
            }
            return indexableDoc;
        }
        catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }

    private void calculateDocumentHash(FieldAggregator fa) {
        long dochash = 0L;
        try {
            this.sis.reset();
            this.sis.addSegments(fa.getTokenizedFields());
            this.sis.addSegments(fa.getStoredFields());
            this.sis.addSegments(fa.getUntokenizedFields());
            this.sis.addSegments(fa.getUntokenizedStoredFields());
            this.sis.addSegments(fa.getParametricFields());
            dochash = HashUtil.hashCode64((InputStream)this.sis, 0L);
        }
        catch (IOException e) {
            // empty catch block
        }
        Field field = new Field("_dochash", String.valueOf(dochash), Field.Store.YES, Field.Index.TOKENIZED);
        field.setOmitNorms(true);
        this.doc.add((Fieldable)field);
    }

    private void addUntokenizedFields(List<Segment> fields) {
        for (int i = 0; i < fields.size(); ++i) {
            Segment segment = fields.get(i);
            this.fieldText.setLength(0);
            segment.appendTo(this.fieldText);
            TokenVector tv = new TokenVector(new TokenVector.Header(3));
            tv.addToken(TToken.newToken(this.fieldText.toString().trim(), TToken.Type.ORIGINAL));
            this.fieldMap.put(segment.getName(), tv);
        }
    }

    private void addPredefinedFields(String uri) {
        Field linkCountField = new Field("_links", Integer.toString(this.linkCount), Field.Store.YES, Field.Index.TOKENIZED);
        linkCountField.setBoost((float)this.linkCount);
        this.doc.add((Fieldable)linkCountField);
        int urlDepth = URLUtils.computeURLDepth(uri);
        Field urlDepthField = new Field("_urldepth", Integer.toString(urlDepth), Field.Store.YES, Field.Index.TOKENIZED);
        urlDepthField.setBoost((float)urlDepth);
        this.doc.add((Fieldable)urlDepthField);
        String site = URLUtils.getHostname(uri);
        if (site != null) {
            Field siteField = new Field("site", site, Field.Store.YES, Field.Index.TOKENIZED);
            siteField.setOmitNorms(true);
            this.doc.add((Fieldable)siteField);
        }
        if (this.metadata != null) {
            String binkey;
            String crawlspace;
            String domain;
            ACL acl;
            String securityUri = this.metadata.get("securityuri");
            if (securityUri != null) {
                Field securityUriField = new Field("securityuri", securityUri, Field.Store.YES, Field.Index.NO);
                this.doc.add((Fieldable)securityUriField);
            }
            if ((acl = this.metadata.getACL()) != null && (domain = acl.getDomain()) != null) {
                this.doc.add((Fieldable)new Field("_sec", "on", Field.Store.YES, Field.Index.NO_NORMS));
                List<String> secTokens = acl.getACLTokens();
                if (secTokens != null && secTokens.size() > 0) {
                    StringBuilder builder = new StringBuilder();
                    for (String token : secTokens) {
                        if (builder.length() > 0) {
                            builder.append("\uf748");
                        }
                        builder.append(token);
                    }
                    Field secField = new Field("_sec_" + domain, builder.toString(), Field.Store.YES, Field.Index.TOKENIZED);
                    secField.setOmitNorms(true);
                    this.doc.add((Fieldable)secField);
                }
            }
            if ((crawlspace = this.metadata.get("CrawlSpaceID")) != null) {
                Field field = new Field("_crwsp", crawlspace, Field.Store.YES, Field.Index.NO_NORMS);
                this.doc.add((Fieldable)field);
            }
            if ((binkey = this.metadata.get("_binary_key")) != null) {
                Field field = new Field("_binary_key", binkey, Field.Store.YES, Field.Index.TOKENIZED);
                field.setOmitNorms(true);
                this.doc.add((Fieldable)field);
            }
        }
    }

    private void clearEmptyTokenVectors() {
        ArrayList<String> emptyKeys = null;
        for (Map.Entry<String, TokenVector> entry : this.fieldMap.entrySet()) {
            TokenVector tokens = entry.getValue();
            if (tokens != null && !tokens.isEmpty()) continue;
            if (emptyKeys == null) {
                emptyKeys = new ArrayList<String>();
            }
            emptyKeys.add(entry.getKey());
        }
        if (emptyKeys != null) {
            for (String emptyKey : emptyKeys) {
                this.fieldMap.remove(emptyKey);
            }
        }
    }

    private void addFields(List<Segment> fields, Field.Store store, Field.Index index) {
        for (int i = 0; i < fields.size(); ++i) {
            Segment segment = fields.get(i);
            this.fieldText.setLength(0);
            segment.appendTo(this.fieldText);
            this.doc.add((Fieldable)new Field(segment.getName(), this.fieldText.toString(), store, index));
        }
    }

    private void addParametricFields(List<Segment> fields) {
        for (Segment segment : fields) {
            String name = "_#" + segment.getName();
            TokenVector tv = this.fieldMap.get(name);
            if (tv == null) {
                tv = new TokenVector(new TokenVector.Header(3));
                this.fieldMap.put(name, tv);
            }
            TToken token = TToken.newToken("", TToken.Type.GENERIC);
            token.setPayload(segment.getNumericValue().toByteArray());
            tv.addToken(token);
        }
    }

    private void addFacetField(String uri, List<Segment> fields) {
        this.facetValues.setLength(0);
        try {
            URI url = new URI(uri);
            if (!url.isOpaque() && url.isAbsolute()) {
                String host = url.getHost();
                String path = url.getPath();
                int lastSlash = path.lastIndexOf(47);
                if (lastSlash >= 0) {
                    path = path.substring(0, lastSlash);
                }
                if (host != null && host.length() != 0 || path != null && path.length() != 0) {
                    this.facetValues.append("url");
                    if (host != null && host.length() != 0) {
                        this.facetValues.append("\uf749");
                        this.facetValues.append(host);
                    }
                    if (path != null && path.length() != 0) {
                        this.facetValues.append(path.replace('/', "\uf749".charAt(0)));
                    }
                    this.facetValues.append("\uf748");
                }
            }
        }
        catch (URISyntaxException e) {
            // empty catch block
        }
        Field languageField = this.doc.getField("language");
        if (languageField != null) {
            String language = LanguageNormalizer.normalize(languageField.stringValue());
            this.facetValues.append("language");
            this.facetValues.append("\uf749");
            this.facetValues.append(language.replace('-', "\uf749".charAt(0)));
            this.facetValues.append("\uf748");
        }
        for (int i = 0; i < fields.size(); ++i) {
            Segment segment = fields.get(i);
            this.fieldText.setLength(0);
            segment.appendTo(this.fieldText);
            this.facetValues.append(this.fieldText.toString().replace('?', "\uf749".charAt(0)));
            this.facetValues.append("\uf748");
        }
        String value = this.facetValues.toString();
        this.doc.add((Fieldable)new Field("_facets", value, Field.Store.YES, Field.Index.TOKENIZED));
        this.doc.add((Fieldable)new Field("_categories", value, Field.Store.YES, Field.Index.TOKENIZED));
    }

    private void reset() {
        this.plainVector.clear();
        this.curNumberOfTextTokens = 0;
        this.fieldMap.clear();
        this.fieldMap.put("_plain", this.plainVector);
        this.linkCount = 0;
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    private class DefaultResultHandler
    implements ResultHandler.PositionResultHandler {
        private Segment currentField = null;
        private int currentFieldIndex = 0;
        private int currentAnchorIndex = 0;
        private int currentFieldBegin = Integer.MAX_VALUE;
        private int currentFieldEnd = -1;
        private int currentAnchorEnd = -1;
        private boolean currentInSummary = true;
        private boolean currentFieldNoLemmatize;
        private TokenVector activePlainVector = null;
        private TokenVector activeFieldVector = null;
        private TokenVector activeAnchorVector = null;
        private boolean ignoreSentenceBreak = false;
        private int lastNumericTokenBegin = 0;
        private List<Segment> fields;
        private List<Anchor> anchors;
        private int startOfAttributeValues = -1;
        private DelegateTokenizer tokenizer;
        private TokenizationParams tokParams;
        private int posInc;

        private DefaultResultHandler() throws TokenizerException {
            try {
                String collectionId = TokenizableDocument.this.cid;
                AnalysisScopeConfig config = ConfigurationLoader.getAnalysisScopeConfig(collectionId);
                this.tokenizer = config.getTokenizer();
            }
            catch (RuntimeException e) {
                Message msg = new Message("P6001E.CANNOT_PROCESS_CAS");
                throw new TokenizerException(msg, (Throwable)e);
            }
            this.tokParams = new TokenizationParams(true, TokenizableDocument.this.cid, TokenizableDocument.this.metadata, TokenizableDocument.this.knownLanguage, TokenizableDocument.this.defaultLanguage, null);
        }

        void setFields(List<Segment> fields) {
            this.fields = fields;
        }

        void setAnchors(List<Anchor> anchors) {
            this.anchors = anchors;
        }

        void setStartOfAttributeValues(int value) {
            this.startOfAttributeValues = value;
        }

        @Override
        public void addField(String name, String value) {
            if (name == null || value == null || name.length() == 0 || value.length() == 0) {
                return;
            }
            if (TokenizableDocument.this.doc != null) {
                Field field = new Field(name, value, Field.Store.YES, Field.Index.TOKENIZED);
                field.setOmitNorms(true);
                TokenizableDocument.this.doc.add((Fieldable)field);
            }
            if (name == "language") {
                this.tokParams = this.tokParams.copy(value);
            }
        }

        @Override
        public int getPositionIncrement() {
            return this.posInc;
        }

        @Override
        public int getMinPositionIncrement() {
            if (this.activePlainVector != null) {
                return this.activePlainVector.getMinPositionIncrement();
            }
            return 1;
        }

        @Override
        public int getSentencePositionIncrement() {
            if (this.activePlainVector != null) {
                return this.activePlainVector.getSentencePositionIncrement();
            }
            return 0;
        }

        @Override
        public void addToken(TToken token, int begin, int end) {
            List<TToken> tokens;
            this.posInc = 0;
            this.switchFieldIfNecessary(begin, end);
            this.switchAnchorIfNecessary(begin, end);
            TToken.Type type = token.getType();
            if (this.ignoreSentenceBreak && TToken.Type.SENTENCE == type && this.currentFieldEnd > end) {
                return;
            }
            if (this.activePlainVector != null) {
                if (this.currentInSummary) {
                    tokens = this.tokenizer.tokenize(this.tokParams, token);
                    for (TToken currTok : tokens) {
                        this.posInc += this.activePlainVector.addToken(currTok, true);
                        if (TToken.Type.ORIGINAL != currTok.getType() && TToken.Type.BOTH != currTok.getType() && TToken.Type.NGRAM != currTok.getType()) continue;
                        TokenizableDocument.this.curNumberOfTextTokens++;
                    }
                } else {
                    this.posInc += this.activePlainVector.addToken(token, this.currentInSummary);
                }
            }
            if (this.activeAnchorVector != null) {
                this.activeAnchorVector.addToken(token);
            }
            if (this.activeFieldVector != null && this.currentFieldBegin <= begin) {
                if (!this.currentField.getName().equals("title") && !this.currentField.getName().equals("filename")) {
                    if (this.currentFieldNoLemmatize) {
                        if (TToken.Type.BOTH == type) {
                            token = TToken.newToken(token, TToken.Type.ORIGINAL);
                        } else if (TToken.Type.LEMMA == type) {
                            return;
                        }
                    }
                    this.activeFieldVector.addToken(token);
                } else {
                    tokens = this.tokenizer.tokenize(this.tokParams, token);
                    for (TToken currTok : tokens) {
                        if (this.currentFieldNoLemmatize) {
                            if (TToken.Type.BOTH == type) {
                                currTok = TToken.newToken(currTok, TToken.Type.ORIGINAL);
                            } else if (TToken.Type.LEMMA == type) continue;
                        }
                        this.activeFieldVector.addToken(currTok);
                    }
                }
            }
        }

        @Override
        public void addXMLToken(XMLToken xmlToken) {
            this.addBaseTagToken(xmlToken, false);
        }

        @Override
        public void addBaseTagToken(BaseTagToken tagToken) {
            this.addBaseTagToken(tagToken, true);
        }

        private void addBaseTagToken(BaseTagToken tagToken, boolean uima) {
            String fieldName = tagToken.isAttr() ? (uima ? "_uimaatts" : "_xmlatts") : (uima ? "_uimatags" : "_xmltags");
            this.getTokenVector(fieldName).addToken(tagToken);
            this.addNumericValue(tagToken, uima ? "_uimanumvals" : "_numvals");
        }

        @Override
        public void addAttributeValueToken(TToken token) {
            TokenVector vec = this.getTokenVector("_uimavals");
            List<TToken> tokens = this.tokenizer.tokenize(this.tokParams, token);
            this.posInc = 0;
            for (TToken currTok : tokens) {
                this.posInc += vec.addToken(currTok, true);
            }
        }

        private TokenVector getTokenVector(String fieldName) {
            TokenVector tv = (TokenVector)TokenizableDocument.this.fieldMap.get(fieldName);
            if (tv == null) {
                tv = new TokenVector();
                TokenizableDocument.this.fieldMap.put(fieldName, tv);
            }
            return tv;
        }

        private void addNumericValue(BaseTagToken tagToken, String field) {
            Object numericValue = tagToken.getNumericValue();
            if (numericValue != null) {
                TToken numericToken = TToken.newToken(tagToken.getTagAttrName(), TToken.Type.GENERIC);
                numericToken.setPositionIncrement(tagToken.getTagAttrTokenBegin() - this.lastNumericTokenBegin);
                this.lastNumericTokenBegin = tagToken.getTagAttrTokenBegin();
                Decimal decimal = numericValue instanceof Long ? new Decimal((Long)numericValue) : new Decimal((Double)numericValue);
                numericToken.setPayload(decimal.toByteArray());
                this.getTokenVector(field).addToken(numericToken);
            }
        }

        @Override
        public void setClosingPosition(int position) {
            TokenizableDocument.this.doc.removeField("_plainendpos");
            byte[] bytes = new byte[Vint8.bytesNeeded(position)];
            Vint8.encode(position, bytes, 0);
            Field field = new Field("_plainendpos", bytes, Field.Store.YES);
            field.setOmitNorms(true);
            TokenizableDocument.this.doc.add((Fieldable)field);
        }

        private void switchAnchorIfNecessary(int begin, int end) {
            if (this.currentAnchorEnd <= begin) {
                Anchor currentAnchor = null;
                while (this.currentAnchorEnd <= begin) {
                    if (this.anchors == null || this.currentAnchorIndex >= this.anchors.size()) {
                        this.activeAnchorVector = null;
                        return;
                    }
                    currentAnchor = this.anchors.get(this.currentAnchorIndex);
                    if (currentAnchor.begin > begin) {
                        this.activeAnchorVector = null;
                        return;
                    }
                    this.currentAnchorEnd = currentAnchor.end;
                    ++this.currentAnchorIndex;
                }
                if (this.currentAnchorEnd < end) {
                    this.activeAnchorVector = null;
                    return;
                }
                this.activeAnchorVector = new TokenVector();
                currentAnchor.setTokenVector(this.activeAnchorVector);
            }
        }

        private void switchFieldIfNecessary(int begin, int end) {
            if (this.currentFieldEnd <= begin) {
                while (this.currentFieldEnd <= begin) {
                    if (this.fields == null || this.currentFieldIndex >= this.fields.size()) {
                        this.activePlainVector = null;
                        this.activeFieldVector = null;
                        return;
                    }
                    Segment nextField = this.fields.get(this.currentFieldIndex);
                    if (nextField.offset() > begin) {
                        this.activePlainVector = null;
                        this.activeFieldVector = null;
                        return;
                    }
                    this.currentField = nextField;
                    ++this.currentFieldIndex;
                    this.currentFieldBegin = this.currentField.offset();
                    this.currentFieldEnd = this.currentField.offset() + this.currentField.length();
                }
                if (this.currentFieldEnd < end) {
                    // empty if block
                }
                this.ignoreSentenceBreak = "url".equals(this.currentField.getName());
                if (this.currentField.test(1)) {
                    this.activePlainVector = TokenizableDocument.this.plainVector;
                    if (this.startOfAttributeValues >= 0 && "_xmlvals".equals(this.currentField.getName())) {
                        this.activePlainVector = this.getTokenVector("_xmlvals");
                    }
                    this.currentInSummary = this.currentField.getName() == null || this.currentField.test(2048);
                } else {
                    this.activePlainVector = null;
                    this.currentInSummary = false;
                }
                if (this.currentField.test(2)) {
                    this.activeFieldVector = this.getTokenVector(this.currentField.getName());
                    this.currentFieldNoLemmatize = this.currentField.test(4);
                } else {
                    this.activeFieldVector = null;
                }
            }
        }

        @Override
        public void close() {
        }

        @Override
        public void reset() {
        }
    }
}

