/*
 * Decompiled with CFR 0.152.
 */
package com.ibm.es.nuvo.tokenizer.annotators.ngram;

import com.ibm.es.nuvo.tokenizer.annotators.ngram.AnnotatorBase;
import com.ibm.es.nuvo.tokenizer.annotators.ngram.Constants;
import com.ibm.es.nuvo.tokenizer.annotators.ngram.Dictionary;
import com.ibm.es.nuvo.tokenizer.annotators.ngram.DictionaryResource;
import com.ibm.es.nuvo.tokenizer.annotators.ngram.NgramTokenizer;
import com.ibm.es.nuvo.tokenizer.annotators.ngram.NgramTokenizerConfig;
import com.ibm.uima.analysis_engine.ResultSpecification;
import com.ibm.uima.analysis_engine.annotator.AnnotatorConfigurationException;
import com.ibm.uima.analysis_engine.annotator.AnnotatorContext;
import com.ibm.uima.analysis_engine.annotator.AnnotatorContextException;
import com.ibm.uima.analysis_engine.annotator.AnnotatorInitializationException;
import com.ibm.uima.analysis_engine.annotator.AnnotatorProcessException;
import com.ibm.uima.analysis_engine.annotator.TextAnnotator;
import com.ibm.uima.cas.Feature;
import com.ibm.uima.cas.Type;
import com.ibm.uima.cas.TypeSystem;
import com.ibm.uima.cas.impl.FSIndexRepositoryImpl;
import com.ibm.uima.cas.impl.LowLevelCAS;
import com.ibm.uima.cas.impl.LowLevelIterator;
import com.ibm.uima.cas.impl.TCASImpl;
import com.ibm.uima.cas.text.TCAS;
import com.ibm.uima.tttypesystem.TokenProperties;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Map;
import java.util.WeakHashMap;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class NgramAnnotator
extends AnnotatorBase
implements TextAnnotator,
Constants {
    private static final String copyright = "IBM Confidential OCO Source Materials 5724-R21 \u00a9 Copyright IBM Corp.  2006, 2007.   All Rights Reserved. The source code for this program is not published or otherwise divested of its trade secrets, irrespective of what has been deposited with the U.S. Copyright Office.";
    private Map<String, Dictionary> dictionaries;
    private TCASImpl cas;
    private FSIndexRepositoryImpl ir;
    private AnnotatorContext context;
    private boolean useRelativeTokenAndSentenceNumbers = false;
    private Mode mode;
    private int startFeatCode;
    private int endFeatCode;
    private int normCovTextFeatCode;
    private int useNgramForCJKFeatureCode;
    private boolean isTokenRequired = true;
    private int tokenTypeCode;
    private int ngramTokenTypeCode;
    private boolean isTokenNumbersRequired = false;
    private int tokenNumFeatCode;
    private int tokenPropsFeatCode;
    private int tokenTypeFeatCode;
    private int ngramTokenTypeFeatCode;
    private int tokenNumber;
    private int documentTypeCode;
    private int languageFeatureCode;
    private NgramTokenizerConfig defaultConfig;
    private RangeAnnotator sentence;
    private RangeAnnotator paragraph;
    private Handler handler = new Handler();
    private Map<NgramTokenizerConfig, NgramTokenizer> tokenizers;
    private Map<String, NgramTokenizerConfig> tokenizerConfig;

    private void tokenEntry(int start, int end, TokenProperties prop, char[] coveredText) {
        int addr;
        if (prop.hasNgram()) {
            addr = this.cas.ll_createFS(this.ngramTokenTypeCode, false);
            this.cas.ll_setIntValue(addr, this.startFeatCode, start, false);
            if (prop.hasSpecial()) {
                this.cas.ll_setIntValue(addr, this.ngramTokenTypeFeatCode, 1, false);
            }
            this.cas.ll_setIntValue(addr, this.endFeatCode, end, false);
            if (coveredText != null) {
                this.cas.ll_setCharBufferValue(addr, this.normCovTextFeatCode, coveredText, 0, coveredText.length, false);
            }
        } else {
            addr = this.cas.ll_createFS(this.tokenTypeCode, false);
            this.cas.ll_setIntValue(addr, this.startFeatCode, start, false);
            this.cas.ll_setIntValue(addr, this.endFeatCode, end, false);
            if (prop.hasSpecial()) {
                this.cas.ll_setIntValue(addr, this.tokenTypeFeatCode, prop.hasNumeric() ? 600 : 1, false);
            }
            if (this.isTokenNumbersRequired) {
                this.cas.ll_setIntValue(addr, this.tokenNumFeatCode, this.tokenNumber, false);
            }
            int tokProp = prop.getInt();
            this.cas.ll_setIntValue(addr, this.tokenPropsFeatCode, tokProp, false);
        }
        this.ir.ll_addFS(addr, false);
        ++this.tokenNumber;
    }

    private final Dictionary loadDictionary(String lang) {
        if (lang == null) {
            return null;
        }
        Dictionary dict = this.dictionaries.get(lang = NgramAnnotator.trimLanguage(lang));
        if (dict == null) {
            try {
                dict = ((DictionaryResource)this.context.getResourceObject("ExternalDictionary", new String[]{lang})).get();
                if (dict != null) {
                    this.dictionaries.put(lang, dict);
                }
            }
            catch (AnnotatorContextException annotatorContextException) {
                // empty catch block
            }
        }
        return dict;
    }

    private boolean checkWhitneyControlflowForExit(TCAS tcas) {
        int useNgramForCJK;
        String twoLetterLanguage;
        LowLevelIterator llIter;
        int docAnAddr;
        if (this.mode == Mode.UNKNOWN) {
            return false;
        }
        LowLevelCAS llcas = tcas.getLowLevelCAS();
        String existingLanguage = llcas.ll_getStringValue(docAnAddr = (llIter = llcas.ll_getIndexRepository().ll_getIndex("AnnotationIndex", this.documentTypeCode).ll_iterator()).ll_get(), this.languageFeatureCode);
        return existingLanguage != null && existingLanguage.length() >= 2 && ((twoLetterLanguage = existingLanguage.substring(0, 2).toLowerCase()).equals("ja") || twoLetterLanguage.equals("zh") || twoLetterLanguage.equals("ko")) && (useNgramForCJK = llcas.ll_getIntValue(docAnAddr, this.useNgramForCJKFeatureCode)) == 0;
    }

    public void process(TCAS aCas, ResultSpecification resultSpec) throws AnnotatorProcessException {
        NgramTokenizer tokenizer;
        if (this.checkWhitneyControlflowForExit(aCas)) {
            return;
        }
        boolean bl = this.isTokenRequired = resultSpec.containsType("uima.tt.TokenAnnotation") || this.mode != Mode.UNKNOWN;
        if (this.tokenNumFeatCode != 0 && resultSpec.containsFeature("uima.tt.TokenAnnotation:tokenNumber")) {
            this.isTokenNumbersRequired = true;
        }
        boolean isSentenceRequired = this.mode != Mode.UNKNOWN;
        boolean isSentenceNumbersRequired = false;
        if (resultSpec.containsType("uima.tt.SentenceAnnotation")) {
            isSentenceRequired = true;
            if (resultSpec.containsFeature("uima.tt.SentenceAnnotation:sentenceNumber")) {
                isSentenceNumbersRequired = true;
            }
        }
        this.sentence.init();
        this.sentence.setRequired(isSentenceRequired);
        this.sentence.setNumberRequired(isSentenceNumbersRequired);
        boolean isParagraphRequired = false;
        boolean isParagraphNumbersRequired = false;
        if (resultSpec.containsType("uima.tt.ParagraphAnnotation")) {
            isParagraphRequired = true;
            if (resultSpec.containsFeature("uima.tt.ParagraphAnnotation:paragraphNumber")) {
                isParagraphNumbersRequired = true;
            }
        }
        this.paragraph.init();
        this.paragraph.setRequired(isParagraphRequired);
        this.paragraph.setNumberRequired(isParagraphNumbersRequired);
        this.cas = (TCASImpl)aCas;
        this.ir = (FSIndexRepositoryImpl)aCas.getIndexRepository();
        if (!(this.isTokenRequired || isSentenceRequired || isParagraphRequired)) {
            return;
        }
        String docLanguage = aCas.getDocumentLanguage();
        Dictionary dic = this.loadDictionary(docLanguage);
        docLanguage = NgramAnnotator.trimLanguage(docLanguage).toLowerCase();
        this.tokenNumber = 1;
        NgramTokenizerConfig config = this.tokenizerConfig.get(docLanguage);
        if (config == null) {
            config = new NgramTokenizerConfig(this.defaultConfig);
            this.tokenizerConfig.put(docLanguage, config);
        }
        if ((tokenizer = this.tokenizers.get(config)) == null) {
            tokenizer = new NgramTokenizer(config);
            this.tokenizers.put(config, tokenizer);
        }
        tokenizer.process(aCas.getDocumentText().toCharArray(), this.handler, dic);
        int lastChar = aCas.getDocumentText().length();
        this.sentence.add(lastChar);
        this.paragraph.add(lastChar);
    }

    public void initialize(AnnotatorContext ctx) throws AnnotatorInitializationException, AnnotatorConfigurationException {
        this.context = ctx;
        this.init();
    }

    public void init() throws AnnotatorConfigurationException {
        try {
            String[] groups;
            this.tokenizers = new HashMap<NgramTokenizerConfig, NgramTokenizer>();
            this.tokenizerConfig = new HashMap<String, NgramTokenizerConfig>();
            this.dictionaries = new WeakHashMap<String, Dictionary>();
            String annotatorMode = NgramAnnotator.safeGetConfigParameterValue(this.context, "AnnotatorMode", (String)null);
            this.mode = "indexer".equalsIgnoreCase(annotatorMode) ? Mode.INDEXER : ("runtime".equalsIgnoreCase(annotatorMode) ? Mode.RUNTIME : Mode.UNKNOWN);
            this.useRelativeTokenAndSentenceNumbers = NgramAnnotator.safeGetConfigParameterValue(this.context, "UseRelativeTokenAndSentenceNumbers", false);
            int defaultNgramCount = NgramAnnotator.safeGetConfigParameterValue(this.context, "NgramCount", 2);
            String[] defaultNgramChars = NgramAnnotator.safeGetConfigParameterValue(this.context, "NgramCharacters", DEFAULT_NGRAM_CHARS);
            String[] defaultIgnoreWhitespace = NgramAnnotator.safeGetConfigParameterValue(this.context, "IgnoreWhitespace", DEFAULT_IGNORE_WHITESPACE);
            this.defaultConfig = new NgramTokenizerConfig();
            this.defaultConfig.setIgnoreWhitespaceCategories(NgramAnnotator.getCategories(defaultIgnoreWhitespace));
            this.defaultConfig.setGram(defaultNgramCount);
            this.defaultConfig.setNgramCategories(NgramAnnotator.getCategories(defaultNgramChars));
            this.defaultConfig.setIgnorePunctuation(NgramAnnotator.safeGetConfigParameterValue(this.context, "IgnorePunctuationTokens", false));
            this.defaultConfig.setIgnoreSentenceBreakers(NgramAnnotator.safeGetConfigParameterValue(this.context, "IgnoreSentenceBreakers", false));
            if (this.mode == Mode.RUNTIME) {
                this.defaultConfig.setRuntimeConfig(new NgramTokenizerConfig.RuntimeConfig());
            }
            for (String group : groups = this.context.getConfigurationGroupNames()) {
                Boolean ignoreSentence;
                Boolean ignorePunctuation;
                String[] categories;
                String[] ignoreWhitespace;
                NgramTokenizerConfig config = new NgramTokenizerConfig(this.defaultConfig);
                String lang = NgramAnnotator.trimLanguage(group).toLowerCase();
                Integer count = (Integer)this.context.getConfigParameterValue(group, "NgramCount");
                if (count != null) {
                    config.setGram(count);
                }
                if ((ignoreWhitespace = (String[])this.context.getConfigParameterValue(group, "IgnoreWhitespace")) != null) {
                    config.setIgnoreWhitespaceCategories(NgramAnnotator.getCategories(ignoreWhitespace));
                }
                if ((categories = (String[])this.context.getConfigParameterValue(group, "NgramCharacters")) != null) {
                    config.setNgramCategories(NgramAnnotator.getCategories(categories));
                }
                if ((ignorePunctuation = (Boolean)this.context.getConfigParameterValue(group, "IgnorePunctuationTokens")) != null) {
                    config.setIgnorePunctuation(ignorePunctuation);
                }
                if ((ignoreSentence = (Boolean)this.context.getConfigParameterValue(group, "IgnoreSentenceBreakers")) != null) {
                    config.setIgnoreSentenceBreakers(ignoreSentence);
                }
                this.tokenizerConfig.put(lang, config);
            }
        }
        catch (AnnotatorContextException e) {
            throw new AnnotatorConfigurationException((Throwable)e);
        }
    }

    public void typeSystemInit(TypeSystem ts) throws AnnotatorInitializationException, AnnotatorConfigurationException {
        Type tokenType = NgramAnnotator.initType("uima.tt.TokenAnnotation", ts, true);
        this.tokenTypeCode = NgramAnnotator.getTypeCode(tokenType);
        Feature tokenPropsFeat = NgramAnnotator.initFeature("uima.tt.TokenAnnotation:tokenProperties", ts, true);
        this.tokenPropsFeatCode = NgramAnnotator.getFeatCode(tokenPropsFeat);
        Feature tokenTypeFeat = NgramAnnotator.initFeature("uima.tt.TokenLikeAnnotation:frost_TokenType", ts, true);
        this.tokenTypeFeatCode = NgramAnnotator.getFeatCode(tokenTypeFeat);
        Feature ngramTokenTypeFeat = NgramAnnotator.initFeature("uima.tt.NgramTokenAnnotation:ngram_TokenType", ts, true);
        this.ngramTokenTypeFeatCode = NgramAnnotator.getFeatCode(ngramTokenTypeFeat);
        Feature startFeat = NgramAnnotator.initFeature("uima.tcas.Annotation:begin", ts, true);
        this.startFeatCode = NgramAnnotator.getFeatCode(startFeat);
        Feature endFeat = NgramAnnotator.initFeature("uima.tcas.Annotation:end", ts, true);
        this.endFeatCode = NgramAnnotator.getFeatCode(endFeat);
        Type ngramTokenType = NgramAnnotator.initType("uima.tt.NgramTokenAnnotation", ts, true);
        this.ngramTokenTypeCode = NgramAnnotator.getTypeCode(ngramTokenType);
        Feature normCovTextFeat = NgramAnnotator.initFeature("uima.tt.NgramTokenAnnotation:normalizedCoveredText", ts, true);
        this.normCovTextFeatCode = NgramAnnotator.getFeatCode(normCovTextFeat);
        Type sentenceType = NgramAnnotator.initType("uima.tt.SentenceAnnotation", ts, false);
        int sentenceTypeCode = 0;
        int sentenceNumFeatCode = 0;
        if (sentenceType != null) {
            sentenceTypeCode = NgramAnnotator.getTypeCode(sentenceType);
            Feature sentenceNumFeat = NgramAnnotator.initFeature("uima.tt.SentenceAnnotation:sentenceNumber", ts, false);
            if (sentenceNumFeat != null) {
                sentenceNumFeatCode = NgramAnnotator.getFeatCode(sentenceNumFeat);
            }
        }
        Type paraType = NgramAnnotator.initType("uima.tt.ParagraphAnnotation", ts, false);
        int paraTypeCode = 0;
        int paraNumFeatCode = 0;
        if (paraType != null) {
            paraTypeCode = NgramAnnotator.getTypeCode(paraType);
            Feature paraNumFeat = NgramAnnotator.initFeature("uima.tt.ParagraphAnnotation:paragraphNumber", ts, false);
            if (paraNumFeat != null) {
                paraNumFeatCode = NgramAnnotator.getFeatCode(paraNumFeat);
            }
        }
        Feature tokenNumFeat = NgramAnnotator.initFeature("uima.tt.TokenAnnotation:tokenNumber", ts, false);
        this.tokenNumFeatCode = 0;
        if (tokenNumFeat != null) {
            this.tokenNumFeatCode = NgramAnnotator.getFeatCode(tokenNumFeat);
        }
        Type documentType = NgramAnnotator.initType("uima.tcas.DocumentAnnotation", ts, true);
        this.documentTypeCode = NgramAnnotator.getTypeCode(documentType);
        Feature doclangFeature = NgramAnnotator.initFeature("uima.tcas.DocumentAnnotation:language", ts, true);
        this.languageFeatureCode = NgramAnnotator.getFeatCode(doclangFeature);
        Feature useNgramforCJKFeature = NgramAnnotator.initFeature("uima.tcas.DocumentAnnotation:useNgramForCJK", ts, false);
        this.useNgramForCJKFeatureCode = useNgramforCJKFeature == null ? 0 : NgramAnnotator.getFeatCode(useNgramforCJKFeature);
        this.sentence = new RangeAnnotator(sentenceTypeCode, sentenceNumFeatCode);
        this.paragraph = new RangeAnnotator(paraTypeCode, paraNumFeatCode);
    }

    public void reconfigure() throws AnnotatorConfigurationException, AnnotatorInitializationException {
        this.destroy();
        this.init();
    }

    public void destroy() {
    }

    private static EnumSet<NgramTokenizerConfig.CharCategory> getCategories(String[] chars) {
        EnumSet<NgramTokenizerConfig.CharCategory> categories = EnumSet.noneOf(NgramTokenizerConfig.CharCategory.class);
        if (chars != null) {
            for (int i = 0; i < chars.length; ++i) {
                if ("alphabet".equalsIgnoreCase(chars[i])) {
                    categories.add(NgramTokenizerConfig.CharCategory.ALPHABET);
                    continue;
                }
                if ("number".equalsIgnoreCase(chars[i])) {
                    categories.add(NgramTokenizerConfig.CharCategory.NUMBER);
                    continue;
                }
                if (!"other".equalsIgnoreCase(chars[i])) continue;
                categories.add(NgramTokenizerConfig.CharCategory.OTHER);
            }
        }
        return categories;
    }

    private class RangeAnnotator {
        private boolean isRequired = true;
        private int typeCode;
        private boolean isNumbersRequired = false;
        private int numberFeatCode;
        private int startIndex;
        private int number;

        protected RangeAnnotator(int typeCode, int numberFeatCode) {
            this.typeCode = typeCode;
            this.numberFeatCode = numberFeatCode;
            this.init();
        }

        public void init() {
            this.startIndex = 0;
            this.number = 1;
        }

        public void setRequired(boolean required) {
            this.isRequired = required && this.typeCode != 0;
        }

        public void setNumberRequired(boolean required) {
            this.isNumbersRequired = required && this.typeCode != 0;
        }

        public void add(int location) {
            if (this.isRequired && this.startIndex < location) {
                int addr = NgramAnnotator.this.cas.ll_createFS(this.typeCode, false);
                NgramAnnotator.this.cas.ll_setIntValue(addr, NgramAnnotator.this.startFeatCode, this.startIndex, false);
                NgramAnnotator.this.cas.ll_setIntValue(addr, NgramAnnotator.this.endFeatCode, location, false);
                if (this.isNumbersRequired) {
                    NgramAnnotator.this.cas.ll_setIntValue(addr, this.numberFeatCode, this.number, false);
                }
                NgramAnnotator.this.ir.ll_addFS(addr, false);
                this.startIndex = location;
                ++this.number;
                if (NgramAnnotator.this.useRelativeTokenAndSentenceNumbers) {
                    NgramAnnotator.this.tokenNumber = 1;
                }
            }
        }
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    private class Handler
    implements NgramTokenizer.Handler {
        private TokenProperties prop = new TokenProperties();

        @Override
        public void addToken(NgramTokenizer.Type type, int start, int end, char[] covered, EnumSet<NgramTokenizer.Property> properties) {
            this.prop.reset();
            if (type == NgramTokenizer.Type.SENTENCE || type == NgramTokenizer.Type.NEWLINE) {
                if (start != end) {
                    this.prop.setSpecial(true);
                    this.prop.setNgram(type == NgramTokenizer.Type.NEWLINE);
                    NgramAnnotator.this.tokenEntry(start, end, this.prop, covered);
                }
                NgramAnnotator.this.sentence.add(end);
            } else if (type == NgramTokenizer.Type.PARAGRAPH) {
                NgramAnnotator.this.paragraph.add(end);
            } else {
                this.prop.setNgram(type == NgramTokenizer.Type.NGRAM);
                this.prop.setSpecial(type == NgramTokenizer.Type.PUNCTUATION || type == NgramTokenizer.Type.MATH_SYMBOL);
                this.prop.setNumeric(type == NgramTokenizer.Type.MATH_SYMBOL);
                NgramAnnotator.this.tokenEntry(start, end, this.prop, covered);
            }
        }
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    static enum Mode {
        UNKNOWN,
        INDEXER,
        RUNTIME;

    }
}

