/*
 * Decompiled with CFR 0.152.
 */
package com.ibm.es.nuvo.tokenizer.uima;

import com.ibm.es.nuvo.tokenizer.BaseTagToken;
import com.ibm.es.nuvo.tokenizer.ResultHandler;
import com.ibm.es.nuvo.tokenizer.SentenceXMLToken;
import com.ibm.es.nuvo.tokenizer.SimpleTagToken;
import com.ibm.es.nuvo.tokenizer.SimpleTextTagToken;
import com.ibm.es.nuvo.tokenizer.TToken;
import com.ibm.es.nuvo.tokenizer.XMLToken;
import com.ibm.es.nuvo.tokenizer.uima.AbstractCASTokenizer;
import com.ibm.es.nuvo.tokenizer.uima.NgramTokenIterator;
import com.ibm.es.nuvo.tokenizer.uima.RegularTokenIterator;
import com.ibm.es.nuvo.tokenizer.uima.SentenceIterator;
import com.ibm.es.nuvo.tokenizer.uima.TokenIteratorAggregator;
import com.ibm.es.nuvo.tokenizer.uima.XMLAttrIterator;
import com.ibm.es.nuvo.tokenizer.uima.XMLLowLevelCASSupport;
import com.ibm.es.nuvo.tokenizer.uima.XMLTagIterator;
import com.ibm.uima.cas.FSIterator;
import com.ibm.uima.cas.Feature;
import com.ibm.uima.cas.FeatureStructure;
import com.ibm.uima.cas.Type;
import com.ibm.uima.cas.TypeSystem;
import com.ibm.uima.cas.text.AnnotationFS;
import com.ibm.uima.cas.text.TCAS;
import com.ibm.uima.search.IndexingException;
import com.ibm.uima.search.Style;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Stack;
import java.util.StringTokenizer;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class CASTokenizer
extends AbstractCASTokenizer {
    private static final String COPYRIGHT = "IBM Confidential OCO Source Materials 5724-R21 \u00a9 Copyright IBM Corp.  2006, 2007.   All Rights Reserved. The source code for this program is not published or otherwise divested of its trade secrets, irrespective of what has been deposited with the U.S. Copyright Office.";
    private int tokenBegin;
    private int tokenEnd;
    private int tokenCount;
    private int tokenCountAtStartOfAttributeValues;
    private int lastRegularXMLTokenNumber;
    private ResultHandler.PositionResultHandler resultHandler;
    private XMLTagIterator itTags;
    private Stack<XMLLowLevelCASSupport.Tag> tagStack;
    private XMLAttrIterator itAttrs;
    private Stack<XMLLowLevelCASSupport.Attr> attrStack;
    private LinkedList<SentenceXMLToken> sentenceList;
    private static Class[] EMPTY_CLASS_ARRAY = new Class[0];
    private static Object[] EMPTY_OBJ_ARRAY = new Object[0];
    AnnotQueue openAnnots;
    FSIterator annotIt;
    AnnotationFS currAnnot;
    int annotId;
    TypeSystem typeSystem;
    ArrayList<SimpleTextTagToken> annotTokens;
    ArrayList<SimpleTagToken> attrTokens;

    @Override
    protected void extractAnnotations(TCAS tcas, ResultHandler handler) {
        this.extractAnnotations(tcas, (ResultHandler.PositionResultHandler)handler);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void extractAnnotations(TCAS tcas, ResultHandler.PositionResultHandler handler) {
        try {
            String language = tcas.getDocumentLanguage();
            if (language != null) {
                handler.addField("language", language);
            }
            String docText = tcas.getDocumentText();
            boolean endPositionUnset = true;
            this.resultHandler = handler;
            this.tokenCount = 0;
            this.tokenCountAtStartOfAttributeValues = -1;
            RegularTokenIterator it1 = new RegularTokenIterator(tcas, false, this.usePuncForMathSymbol, this.usePuncForCurrencySymbol);
            NgramTokenIterator it2 = new NgramTokenIterator(tcas, false);
            TokenIteratorAggregator it = new TokenIteratorAggregator(it1, it2);
            SentenceIterator itSentences = new SentenceIterator(tcas);
            if (!itSentences.next()) {
                itSentences = null;
            }
            SentenceXMLToken sxt = null;
            this.itTags = new XMLTagIterator(this.xll);
            this.itTags.next();
            this.itAttrs = new XMLAttrIterator(this.xll);
            this.itAttrs.next();
            this.tagStack = new Stack();
            this.attrStack = new Stack();
            this.sentenceList = this.indexSentenceAnnotations ? new LinkedList() : null;
            int xmlTokenNumber = 0;
            int posInc = 0;
            ArrayList<TToken> tokens = new ArrayList<TToken>();
            this.openAnnots = new AnnotQueue(10);
            this.annotTokens = new ArrayList();
            this.attrTokens = new ArrayList();
            this.annotIt = tcas.getAnnotationIndex().iterator();
            this.annotId = 0;
            this.currAnnot = null;
            this.tokenBegin = -1;
            this.tokenEnd = -1;
            block11: while (it.next()) {
                this.tokenBegin = it.getBegin();
                this.tokenEnd = it.getEnd();
                this.updateAnnotations();
                while (true) {
                    if (sxt != null) {
                        if (sxt.getTagAttrEnd() > this.tokenBegin) break;
                        sxt.setTagAttrTokenEnd(this.tokenCount + 1);
                        handler.addToken(TToken.newSentenceBreak(), sxt.getTagAttrBegin(), sxt.getTagAttrEnd());
                        sxt = null;
                        this.tokenCount += handler.getPositionIncrement();
                    }
                    if (itSentences == null || itSentences.getBegin() > this.tokenBegin) break;
                    sxt = new SentenceXMLToken();
                    sxt.setTagAttrBegin(itSentences.getBegin());
                    sxt.setTagAttrEnd(itSentences.getEnd());
                    sxt.setTagAttrTokenBegin(this.tokenCount + handler.getMinPositionIncrement());
                    if (this.sentenceList != null) {
                        this.sentenceList.add(sxt);
                    }
                    if (itSentences.next()) continue;
                    itSentences = null;
                }
                xmlTokenNumber = this.tokenCount + handler.getMinPositionIncrement();
                if (this.tokenCountAtStartOfAttributeValues == -1 && this.startOfAttributeValues >= 0 && this.tokenBegin >= this.startOfAttributeValues) {
                    this.tokenCountAtStartOfAttributeValues = this.tokenCount;
                }
                if (endPositionUnset && this.tokenBegin >= this.endOfPlaintextPosition) {
                    this.resultHandler.setClosingPosition(this.tokenCount + handler.getMinPositionIncrement());
                    endPositionUnset = false;
                }
                switch (it.getTokenType()) {
                    case WHITESPACE: {
                        String whiteSpace = docText.substring(this.tokenBegin, this.tokenEnd);
                        handler.addToken(TToken.newToken(whiteSpace, TToken.Type.WHITESPACE), this.tokenBegin, this.tokenEnd);
                        this.tokenCount += handler.getPositionIncrement();
                        continue block11;
                    }
                    case PUNCTUATION: {
                        handler.addToken(TToken.newToken(it.getOriginal(), TToken.Type.PUNCTUATION), this.tokenBegin, this.tokenEnd);
                        this.tokenCount += handler.getPositionIncrement();
                        continue block11;
                    }
                    case NGRAM: {
                        handler.addToken(TToken.newToken(it.getOriginal(), TToken.Type.NGRAM), this.tokenBegin, this.tokenEnd);
                        posInc = handler.getPositionIncrement();
                        this.tokenCount += posInc;
                        this.lastRegularXMLTokenNumber += posInc;
                        break;
                    }
                    case REGULAR: {
                        this.generateRegularTokens(tokens, it, false);
                        posInc = 0;
                        for (TToken token : tokens) {
                            handler.addToken(token, this.tokenBegin, this.tokenEnd);
                            posInc += handler.getPositionIncrement();
                        }
                        this.tokenCount += posInc;
                        this.lastRegularXMLTokenNumber += posInc;
                    }
                }
                this.updateXMLAnnotations(xmlTokenNumber);
            }
            this.updateAnnotations();
            xmlTokenNumber = this.tokenCount + handler.getMinPositionIncrement();
            this.tokenBegin = Integer.MAX_VALUE;
            this.updateAnnotations();
            this.tokenCount += handler.getMinPositionIncrement();
            this.lastRegularXMLTokenNumber += handler.getMinPositionIncrement();
            this.updateXMLAnnotations(xmlTokenNumber);
            this.sendXMLAnnotations();
            this.reorderUIMAAnnotationsAndAttrs();
            this.tokenizeAttributes();
            this.sendUIMAAnnotationsAndAttrs();
        }
        catch (Exception e) {
            System.err.println("exception in CASTOK:");
            e.printStackTrace();
        }
        finally {
            this.resultHandler = null;
            this.tagStack = null;
            this.attrStack = null;
            this.itAttrs = null;
            this.itTags = null;
            this.openAnnots = null;
            this.annotTokens = null;
            this.attrTokens = null;
        }
    }

    private void updateXMLAnnotations(int xmlTokenNumber) {
        XMLLowLevelCASSupport.Attr attr;
        XMLLowLevelCASSupport.Tag tag;
        while (!this.tagStack.isEmpty() && this.tagStack.peek().getTagAttrEnd() <= this.tokenBegin) {
            tag = this.tagStack.pop();
            tag.setTagAttrTokenEnd(xmlTokenNumber);
        }
        if (this.tokenBegin < Integer.MAX_VALUE) {
            while (this.itTags.getBegin() <= this.tokenBegin) {
                tag = this.itTags.getTag();
                tag.setTagAttrTokenBegin(xmlTokenNumber);
                if (tag.getTagAttrEnd() > this.tokenBegin) {
                    this.tagStack.push(tag);
                } else {
                    tag.setTagAttrTokenEnd(xmlTokenNumber);
                }
                this.itTags.next();
            }
        }
        while (!this.attrStack.isEmpty() && this.attrStack.peek().getTagAttrEnd() <= this.tokenBegin) {
            attr = this.attrStack.pop();
            attr.setTagAttrTokenEnd(this.lastRegularXMLTokenNumber);
        }
        if (this.tokenBegin < Integer.MAX_VALUE) {
            while (this.itAttrs.getBegin() <= this.tokenBegin) {
                attr = this.itAttrs.getAttr();
                this.lastRegularXMLTokenNumber = xmlTokenNumber - this.tokenCountAtStartOfAttributeValues;
                attr.setTagAttrTokenBegin(this.lastRegularXMLTokenNumber);
                if (attr.getTagAttrEnd() > this.tokenBegin) {
                    this.attrStack.push(attr);
                } else {
                    attr.setTagAttrTokenEnd(this.lastRegularXMLTokenNumber);
                }
                this.itAttrs.next();
            }
        }
    }

    private void sendXMLAnnotations() {
        Iterator itSentences = this.sentenceList != null ? this.sentenceList.iterator() : null;
        SentenceXMLToken sxt = null;
        if (itSentences != null && itSentences.hasNext()) {
            sxt = (SentenceXMLToken)itSentences.next();
        }
        this.itTags = new XMLTagIterator(this.xll);
        XMLLowLevelCASSupport.Tag tag = null;
        if (this.itTags.next()) {
            tag = this.itTags.getTag();
        }
        int previousTokenBegin = 0;
        int lastTagTokenEnd = Integer.MAX_VALUE;
        while (tag != null || sxt != null) {
            XMLToken tokenToSend = null;
            if (tag != null && sxt != null) {
                tokenToSend = sxt.getTagAttrTokenBegin() < tag.getTagAttrTokenBegin() ? sxt : tag;
            } else {
                XMLToken xMLToken = tokenToSend = tag != null ? tag : sxt;
                if (tag == null && sxt.getTagAttrTokenBegin() > lastTagTokenEnd) break;
            }
            tokenToSend.setTagAttrTokenZero(previousTokenBegin);
            previousTokenBegin = tokenToSend.getTagAttrTokenBegin();
            this.resultHandler.addXMLToken(tokenToSend);
            if (tokenToSend == tag) {
                lastTagTokenEnd = tag.getTagAttrTokenEnd();
                tag = this.itTags.next() ? this.itTags.getTag() : null;
                continue;
            }
            sxt = itSentences != null && itSentences.hasNext() ? (SentenceXMLToken)itSentences.next() : null;
        }
        previousTokenBegin = 0;
        this.itAttrs = new XMLAttrIterator(this.xll);
        while (this.itAttrs.next()) {
            XMLLowLevelCASSupport.Attr attr = this.itAttrs.getAttr();
            attr.setTagAttrTokenZero(previousTokenBegin);
            previousTokenBegin = attr.getTagAttrTokenBegin();
            this.resultHandler.addXMLToken(attr);
        }
    }

    private void testBeginAgainstOpenAnnots(int begin) {
        int annotTokenEnd = this.tokenCount + this.resultHandler.getMinPositionIncrement();
        while (!this.openAnnots.isEmpty() && begin >= this.openAnnots.topTagAttrEnd()) {
            SimpleTextTagToken annot = (SimpleTextTagToken)this.openAnnots.poll();
            annot.setTagAttrTokenEnd(annotTokenEnd);
        }
    }

    private void updateAnnotations() throws IndexingException {
        this.testBeginAgainstOpenAnnots(this.tokenBegin);
        int annotTokenBegin = this.tokenCount + this.resultHandler.getMinPositionIncrement();
        while (this.currAnnot != null || this.annotIt.isValid()) {
            if (this.currAnnot == null) {
                this.currAnnot = (AnnotationFS)this.annotIt.get();
            }
            int begin = this.currAnnot.getBegin();
            int end = this.currAnnot.getEnd();
            if (begin >= this.tokenEnd) break;
            this.testBeginAgainstOpenAnnots(begin);
            this.addAnnotation(annotTokenBegin, this.currAnnot, begin, end);
            this.currAnnot = null;
            this.annotIt.moveToNext();
        }
    }

    private void addAnnotation(int annotTokenBegin, AnnotationFS annot) throws IndexingException {
        this.addAnnotation(annotTokenBegin, annot, annot.getBegin(), annot.getEnd());
    }

    private void addAnnotation(int annotTokenBegin, AnnotationFS annot, int begin, int end) throws IndexingException {
        for (AbstractCASTokenizer.CompiledIndexBuildItem item : this.compiledItems) {
            Style[] styles;
            if (!item.matches(annot)) continue;
            for (Style style : styles = item.rule.getStyles()) {
                if ("Term".equals(style.getName())) {
                    throw new IndexingException(new Throwable("Token prohibited in CAS - using LanguageWare tokenization"));
                }
                if ("Breaking".equals(style.getName())) {
                    throw new IndexingException(new Throwable("Sentence break prohibited in CAS - using LanguageWare tokenization"));
                }
                if (!"Annotation".equals(style.getName())) continue;
                String annotName = CASTokenizer.getAnnotationName(annot, style, item).toLowerCase();
                SimpleTextTagToken newAnnot = new SimpleTextTagToken(annotName, this.annotId, annotTokenBegin);
                newAnnot.setTagAttrBegin(begin);
                newAnnot.setTagAttrEnd(end);
                this.openAnnots.add(newAnnot);
                this.annotTokens.add(newAnnot);
                this.handleAttributes(item.attributeMappings);
                ++this.annotId;
            }
        }
    }

    private void tokenizeAttributes() {
        int pos = 1;
        for (SimpleTagToken tagToken : this.attrTokens) {
            String value;
            tagToken.setTagAttrTokenBegin(pos);
            if (tagToken.getAttributeValue() != null && (value = ((Object)tagToken.getAttributeValue()).toString()) != null && value.length() > 0) {
                StringTokenizer sTok = new StringTokenizer(value);
                while (sTok.hasMoreTokens()) {
                    this.resultHandler.addAttributeValueToken(TToken.newToken(sTok.nextToken(), TToken.Type.ORIGINAL));
                    pos += this.resultHandler.getPositionIncrement();
                }
            }
            tagToken.setTagAttrTokenEnd(pos);
            pos += this.resultHandler.getSentencePositionIncrement();
        }
    }

    private void reorderUIMAAnnotationsAndAttrs() {
        if (this.annotTokens.isEmpty()) {
            return;
        }
        SimpleTextTagToken tag = this.annotTokens.get(0);
        int currBegin = tag.getTagAttrTokenBegin();
        int beginIndex = 0;
        int beginAttrIndex = 0;
        for (int i = 1; i < this.annotTokens.size(); ++i) {
            tag = this.annotTokens.get(i);
            if (tag.getTagAttrTokenBegin() == currBegin) continue;
            beginAttrIndex = this.reverseOrder(beginIndex, i, beginAttrIndex);
            beginIndex = i;
            currBegin = tag.getTagAttrTokenBegin();
        }
        this.reverseOrder(beginIndex, this.annotTokens.size(), beginAttrIndex);
    }

    private int reverseOrder(int start, int end, int attrStart) {
        int attrEnd;
        int serial = this.annotTokens.get(end - 1).getTagAttrSerial();
        int serialSum = this.annotTokens.get(start).getTagAttrSerial() + serial;
        for (attrEnd = attrStart; attrEnd < this.attrTokens.size() && this.attrTokens.get(attrEnd).getTagAttrSerial() <= serial; ++attrEnd) {
        }
        this.reverseOrder(this.annotTokens, start, end);
        this.reverseOrder(this.attrTokens, attrStart, attrEnd);
        this.resetSerials(this.annotTokens, start, end, serialSum);
        this.resetSerials(this.attrTokens, attrStart, attrEnd, serialSum);
        return attrEnd;
    }

    private void resetSerials(List<? extends SimpleTagToken> tokens, int start, int end, int serialSum) {
        for (int i = start; i < end; ++i) {
            SimpleTagToken tag = tokens.get(i);
            tag.setTagAttrTokenSerial(serialSum - tag.getTagAttrSerial());
        }
    }

    private void sendUIMAAnnotationsAndAttrs() {
        this.sendUIMATagTokens(this.annotTokens);
        this.sendUIMATagTokens(this.attrTokens);
    }

    private void sendUIMATagTokens(List<? extends BaseTagToken> tags) {
        int previousTokenBegin = 0;
        for (BaseTagToken baseTagToken : tags) {
            baseTagToken.setTagAttrTokenZero(previousTokenBegin);
            previousTokenBegin = baseTagToken.getTagAttrTokenBegin();
            this.resultHandler.addBaseTagToken(baseTagToken);
        }
    }

    private void handleAttributes(Map<String, String> attributeMappings) {
        if (attributeMappings == null || attributeMappings.isEmpty()) {
            return;
        }
        Type type = this.currAnnot.getType();
        List aFeatures = type.getFeatures();
        Iterator iter = aFeatures.iterator();
        int oldSize = this.attrTokens.size();
        while (iter.hasNext()) {
            String featName;
            Feature feat = (Feature)iter.next();
            if (this.currAnnot.getFeatureValueAsString(feat) == null || this.currAnnot.getFeatureValueAsString(feat).trim().equals("") || !attributeMappings.containsKey(featName = feat.getShortName())) continue;
            String attNameToIndex = attributeMappings.get(featName).toLowerCase();
            Type rangeType = feat.getRange();
            if (rangeType.isPrimitive()) {
                this.addAttrToken(attNameToIndex, this.currAnnot.getFeatureValueAsString(feat));
                continue;
            }
            FeatureStructure fSt = this.currAnnot.getFeatureValue(feat);
            if (fSt == null) continue;
            if (rangeType.isArray() && rangeType.getComponentType().isPrimitive()) {
                try {
                    String[] values;
                    Method m = fSt.getClass().getMethod("toStringArray", EMPTY_CLASS_ARRAY);
                    if (m == null) continue;
                    for (String value : values = (String[])m.invoke((Object)fSt, EMPTY_OBJ_ARRAY)) {
                        if (value == null) continue;
                        this.addAttrToken(attNameToIndex, value);
                    }
                    continue;
                }
                catch (Exception e) {
                    e.printStackTrace();
                    continue;
                }
            }
            if (rangeType.isArray()) continue;
            try {
                Method mHead = fSt.getClass().getMethod("getHead", EMPTY_CLASS_ARRAY);
                Method mTail = fSt.getClass().getMethod("getTail", EMPTY_CLASS_ARRAY);
                if (mHead == null || mTail == null || !mHead.getReturnType().isPrimitive()) continue;
                while (fSt != null) {
                    Object value = mHead.invoke((Object)fSt, EMPTY_OBJ_ARRAY);
                    if (value != null) {
                        this.addAttrToken(attNameToIndex, value.toString());
                    }
                    fSt = (FeatureStructure)mTail.invoke((Object)fSt, EMPTY_OBJ_ARRAY);
                }
            }
            catch (Exception e) {
                e.printStackTrace();
            }
        }
        this.reverseOrder(this.attrTokens, oldSize, this.attrTokens.size());
    }

    private void addAttrToken(String attNameToIndex, String value) {
        SimpleTagToken newAttr = new SimpleTagToken(attNameToIndex, this.annotId);
        newAttr.setAttributeValue(value);
        this.attrTokens.add(newAttr);
    }

    private <E> void reverseOrder(List<E> list, int start, int end) {
        --end;
        while (start < end) {
            E temp = list.get(start);
            list.set(start, list.get(end));
            list.set(end, temp);
            ++start;
            --end;
        }
    }

    private static String getAnnotationName(AnnotationFS annot, Style style, AbstractCASTokenizer.CompiledIndexBuildItem buildItem) throws IndexingException {
        String annotationName;
        String fixedNameAttr = style.getAttribute("fixedName");
        String nameAttrAttr = style.getAttribute("nameAttribute");
        if (fixedNameAttr != null && nameAttrAttr != null) {
            throw new IndexingException("invalid_attribute_combination_in_build_item", new Object[]{buildItem.name, "fixedName, nameAttribute"});
        }
        if (fixedNameAttr != null) {
            annotationName = fixedNameAttr;
        } else if (nameAttrAttr != null) {
            Feature f = annot.getType().getFeatureByBaseName(nameAttrAttr);
            if (f == null) {
                throw new IndexingException("unknown_feature_in_build_item", new Object[]{nameAttrAttr, buildItem.name});
            }
            annotationName = annot.getStringValue(f);
        } else {
            annotationName = annot.getType().getShortName();
        }
        return annotationName;
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    class AnnotQueue
    extends PriorityQueue<SimpleTextTagToken> {
        public AnnotQueue(int initialCapacity) {
            super(initialCapacity, new Comparator<SimpleTextTagToken>(){

                @Override
                public int compare(SimpleTextTagToken o1, SimpleTextTagToken o2) {
                    int result = o1.getTagAttrEnd() - o2.getTagAttrEnd();
                    if (result != 0) {
                        return result;
                    }
                    result = o1.getTagAttrBegin() - o2.getTagAttrBegin();
                    if (result != 0) {
                        return result;
                    }
                    return o1.getTagAttrSerial() - o2.getTagAttrSerial();
                }
            });
        }

        protected int topTagAttrEnd() {
            if (this.size() > 0) {
                return ((SimpleTextTagToken)this.peek()).getTagAttrEnd();
            }
            return Integer.MAX_VALUE;
        }
    }
}

