/*
 * Decompiled with CFR 0.152.
 */
package com.ibm.es.nuvo.tokenizer.uima;

import com.ibm.es.nuvo.common.ExtendedException;
import com.ibm.es.nuvo.common.Message;
import com.ibm.es.nuvo.configuration.GlobalConfiguration;
import com.ibm.es.nuvo.parser.XMLTagInfo;
import com.ibm.es.nuvo.tokenizer.AbstractTokenizer;
import com.ibm.es.nuvo.tokenizer.ResultHandler;
import com.ibm.es.nuvo.tokenizer.TToken;
import com.ibm.es.nuvo.tokenizer.TokenizerException;
import com.ibm.es.nuvo.tokenizer.TokenizerProcessInput;
import com.ibm.es.nuvo.tokenizer.uima.TokenIterator;
import com.ibm.es.nuvo.tokenizer.uima.XMLLowLevelCASSupport;
import com.ibm.es.nuvo.util.FileUtils;
import com.ibm.supa.config.AnalysisScopeConfig;
import com.ibm.supa.config.ConfigurationLoader;
import com.ibm.supa.uima.DelegateTCAS;
import com.ibm.supa.uima.IndexedTAESpec;
import com.ibm.supa.uima.UIMAConfig;
import com.ibm.supa.uima.index.FilterParser;
import com.ibm.supa.uima.index.Mapping;
import com.ibm.uima.UIMAFramework;
import com.ibm.uima.analysis_engine.AnalysisEngineProcessException;
import com.ibm.uima.analysis_engine.TaeDescription;
import com.ibm.uima.analysis_engine.TextAnalysisEngine;
import com.ibm.uima.analysis_engine.metadata.AnalysisEngineMetaData;
import com.ibm.uima.cas.CAS;
import com.ibm.uima.cas.CASRuntimeException;
import com.ibm.uima.cas.FSMatchConstraint;
import com.ibm.uima.cas.FeatureStructure;
import com.ibm.uima.cas.Type;
import com.ibm.uima.cas.TypeSystem;
import com.ibm.uima.cas.impl.LowLevelCAS;
import com.ibm.uima.cas.impl.LowLevelIndexRepository;
import com.ibm.uima.cas.impl.LowLevelIterator;
import com.ibm.uima.cas.text.AnnotationFS;
import com.ibm.uima.cas.text.TCAS;
import com.ibm.uima.cas.text.TCASRuntimeException;
import com.ibm.uima.resource.ResourceInitializationException;
import com.ibm.uima.resource.ResourceManager;
import com.ibm.uima.resource.ResourceSpecifier;
import com.ibm.uima.resource.metadata.FsIndexCollection;
import com.ibm.uima.resource.metadata.FsIndexDescription;
import com.ibm.uima.resource.metadata.TypePriorities;
import com.ibm.uima.resource.metadata.TypeSystemDescription;
import com.ibm.uima.search.Filter;
import com.ibm.uima.search.IndexBuildItem;
import com.ibm.uima.search.IndexBuildSpecification;
import com.ibm.uima.search.IndexRule;
import com.ibm.uima.search.IndexingException;
import com.ibm.uima.search.Style;
import com.ibm.uima.util.CasCreationUtils;
import com.ibm.uima.util.InvalidXMLException;
import com.ibm.uima.util.Level;
import com.ibm.uima.util.XMLInputSource;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public abstract class AbstractCASTokenizer
extends AbstractTokenizer {
    private static final String copyright = "IBM Confidential OCO Source Materials 5724-R21 \u00a9 Copyright IBM Corp.  2006, 2007.   All Rights Reserved. The source code for this program is not published or otherwise divested of its trade secrets, irrespective of what has been deposited with the U.S. Copyright Office.";
    public static final String PARAM_DESC = "whitney.tokenizer.uima.descriptor";
    public static final String PARAM_DATA = "whitney.tokenizer.uima.data";
    public static final String PARAM_OPTS = "whitney.tokenizer.uima.options";
    public static final String PARAM_INIT_SIZE = "whitney.tokenizer.uima.cas.initsize";
    public static final String PARAM_FORCE_NGRAM = "whitney.tokenizer.force_ngram";
    public static final String PARAM_INDEX_ALL_LEMMAS = "whitney.tokenizer.all_lemmas";
    private static final String UIMA_PARAM_USE_PUNC_FOR_MATH = "CreatePunctuationInsteadOfMathSymbol";
    private static final String UIMA_PARAM_USE_PUNC_FOR_CURRENCY = "CreatePunctuationInsteadOfCurrencySymbol";
    public static final String PARAM_COLLECTION_ID = "whitney.tokenizer.collection.id";
    protected String collectionId;
    protected int endOfPlaintextPosition;
    private TextAnalysisEngine[] taes;
    protected CompiledIndexBuildItem[] compiledItems;
    private TCAS tcas;
    private int documentTypeCode;
    private int useNgramForCJKFeatureCode;
    private int indexAllLemmasFeatureCode;
    private int fallbackLanguageFeatureCode;
    protected XMLLowLevelCASSupport xll;
    private int xmlAttrValueTextBeginFeatureCode;
    private boolean useNgramForCJK;
    private boolean indexAllLemmas;
    protected boolean usePuncForMathSymbol;
    protected boolean usePuncForCurrencySymbol;
    protected int startOfAttributeValues;
    protected boolean indexSentenceAnnotations;
    private List<CharSequence> lemmaList = new ArrayList<CharSequence>();

    protected abstract void extractAnnotations(TCAS var1, ResultHandler var2);

    protected void preloadCAS(TCAS cas, TokenizerProcessInput tpi) {
        String docTextAsString = ((Object)tpi.getText()).toString();
        cas.setDocumentText(docTextAsString);
        this.startOfAttributeValues = tpi.getStartOfAttributeValues();
        this.endOfPlaintextPosition = tpi.getEndOfPlaintextPosition();
        this.indexSentenceAnnotations = tpi.indexSentenceAnnotations();
        LowLevelCAS llcas = cas.getLowLevelCAS();
        LowLevelIndexRepository llir = llcas.ll_getIndexRepository();
        LowLevelIterator llIter = llir.ll_getIndex("AnnotationIndex", this.documentTypeCode).ll_iterator();
        int docAnAddr = llIter.ll_get();
        llcas.ll_setIntValue(docAnAddr, this.useNgramForCJKFeatureCode, this.useNgramForCJK ? 2 : 1);
        llcas.ll_setIntValue(docAnAddr, this.indexAllLemmasFeatureCode, this.indexAllLemmas ? 1 : 0);
        if (tpi.getDefaultLanguage() != null) {
            llcas.ll_setStringValue(docAnAddr, this.fallbackLanguageFeatureCode, tpi.getDefaultLanguage());
        }
        if (tpi.getKnownLanguage() != null) {
            cas.setDocumentLanguage(tpi.getKnownLanguage());
        }
        this.xll.setXMLTextBuffers(tpi.getXMLTextBuffers());
        if (tpi.getXMLTextBuffers() != null) {
            llcas.ll_setIntValue(docAnAddr, this.xmlAttrValueTextBeginFeatureCode, this.startOfAttributeValues);
            Iterator<XMLTagInfo> it = tpi.getXMLTagInfo();
            while (it != null && it.hasNext()) {
                XMLTagInfo xmlTagInfo = it.next();
                int textStart = xmlTagInfo.getTagTextStartPos();
                int textEnd = textStart + xmlTagInfo.getTagTextLength();
                int serial = xmlTagInfo.getSerialNumber();
                int lastDesc = xmlTagInfo.getLastDescendant();
                int depth = xmlTagInfo.getDepth();
                XMLLowLevelCASSupport.Tag t = new XMLLowLevelCASSupport.Tag(this.xll);
                t.setTagAttrBegin(textStart);
                t.setTagAttrEnd(textEnd);
                t.setTagAttrName(xmlTagInfo.getTagName());
                t.setTagAttrSerial(serial);
                t.setTagAttrLastDescendant(lastDesc);
                t.setTagAttrDepth(depth);
                t.addToIndex();
                Iterator<XMLTagInfo.Attribute> it2 = xmlTagInfo.getAttributes();
                while (it2 != null && it2.hasNext()) {
                    XMLTagInfo.Attribute attr = it2.next();
                    XMLLowLevelCASSupport.Attr a = new XMLLowLevelCASSupport.Attr(this.xll);
                    int valueStart = attr.getValueStartPos() + this.startOfAttributeValues;
                    int valueEnd = valueStart + attr.getValueLength();
                    a.setTagAttrBegin(valueStart);
                    a.setTagAttrEnd(valueEnd);
                    a.setTagAttrName(attr.getName());
                    a.setTagAttrSerial(serial);
                    a.setTagAttrLastDescendant(lastDesc);
                    a.setTagAttrDepth(depth);
                    a.addToIndex();
                }
            }
        }
    }

    @Override
    public void process(ResultHandler handler, TokenizerProcessInput tpi) throws TokenizerException {
        try {
            try {
                this.preloadCAS(this.tcas, tpi);
            }
            catch (Exception e) {
                e.printStackTrace();
            }
            this.taes[0].process((CAS)this.tcas);
            if (this.taes.length > 1) {
                String oldCasText = this.tcas.getDocumentText();
                DelegateTCAS dTcas = new DelegateTCAS(this.tcas, tpi.getTokenizationParams());
                dTcas.setDocumentText(this.tcas.getDocumentText().substring(0, tpi.getEndOfPlaintextPosition()));
                for (int i = 1; i < this.taes.length; ++i) {
                    this.taes[i].process((CAS)dTcas);
                }
                dTcas.setDocumentText(oldCasText);
            }
            this.extractAnnotations(this.tcas, handler);
        }
        catch (TCASRuntimeException e) {
            Message msg = new Message("P6001E.CANNOT_PROCESS_CAS");
            throw new TokenizerException(msg, (Throwable)e);
        }
        catch (AnalysisEngineProcessException e) {
            Message msg = new Message("P6001E.CANNOT_PROCESS_CAS");
            throw new TokenizerException(msg, (Throwable)e);
        }
        catch (Throwable e) {
            Message msg = new Message("P6001E.CANNOT_PROCESS_CAS");
            throw new TokenizerException(msg, e);
        }
        finally {
            this.tcas.reset();
        }
    }

    @Override
    public void configure(Properties params) throws TokenizerException {
        if (params == null) {
            throw new NullPointerException();
        }
        String desc = params.getProperty(PARAM_DESC);
        String data = params.getProperty(PARAM_DATA);
        String init = params.getProperty(PARAM_INIT_SIZE);
        this.collectionId = params.getProperty(PARAM_COLLECTION_ID);
        this.useNgramForCJK = "true".equalsIgnoreCase(params.getProperty(PARAM_FORCE_NGRAM));
        this.indexAllLemmas = "true".equalsIgnoreCase(params.getProperty(PARAM_INDEX_ALL_LEMMAS));
        this.destroyTAE();
        this.initUimaTae(desc, data, init);
    }

    private TaeDescription loadDescription(File taeDescriptor) throws IOException, InvalidXMLException {
        boolean validArg;
        boolean bl = validArg = taeDescriptor.exists() && !taeDescriptor.isDirectory();
        if (!validArg) {
            throw new IllegalArgumentException(taeDescriptor + " TAE descriptor file not found.");
        }
        XMLInputSource in = new XMLInputSource(taeDescriptor);
        return UIMAFramework.getXMLParser().parseTaeDescription(in);
    }

    private void loadEnginesAndTCas(TaeDescription desc, ResourceManager manager, Properties perfProps) throws InvalidXMLException, IOException, ResourceInitializationException, IndexingException, CASRuntimeException {
        List<IndexedTAESpec> taeSpecs;
        LinkedList<TaeDescription> taeDescs = new LinkedList<TaeDescription>();
        taeDescs.add(desc);
        AnalysisScopeConfig config = ConfigurationLoader.getAnalysisScopeConfig(this.collectionId);
        String supaDirPath = ConfigurationLoader.getSupaDirectoryPath(this.collectionId);
        UIMAConfig uimaConfig = config.getUIMAConfig();
        List<String> jarPaths = uimaConfig.getJarPaths();
        if (jarPaths != null && !jarPaths.isEmpty()) {
            String tempDir = GlobalConfiguration.getConfiguration().getTempDirectory();
            String baseTempCollectionDir = tempDir + "supa_temp_jars" + File.separator + "collections";
            String baseCollectionDir = baseTempCollectionDir + File.separator + this.collectionId + File.separator + "supa";
            URL[] urls = new URL[jarPaths.size()];
            int i = 0;
            for (String jarPath : jarPaths) {
                File tempFile = new File(baseCollectionDir + File.separator + jarPath);
                new File(tempFile.getParent()).mkdirs();
                File srcFile = new File(supaDirPath, jarPath);
                boolean tempFileExists = false;
                if (tempFile.exists()) {
                    tempFileExists = true;
                }
                if (!tempFileExists) {
                    try {
                        FileUtils.copyFile(srcFile, tempFile);
                    }
                    catch (ExtendedException e) {
                        throw new IOException("Can't copy temp supa jar");
                    }
                    tempFile.deleteOnExit();
                }
                urls[i++] = tempFile.toURL();
            }
            manager.setExtensionClassPath((ClassLoader)new URLClassLoader(urls, this.getClass().getClassLoader()), "", true);
        }
        if ((taeSpecs = uimaConfig.getIndexedTAESpecs()) == null) {
            taeSpecs = Collections.emptyList();
        }
        for (IndexedTAESpec taeSpec : taeSpecs) {
            taeDescs.add(this.loadDescription(new File(supaDirPath, taeSpec.getTAEDescriptorPath())));
        }
        LinkedList<TypeSystemDescription> systemDescs = new LinkedList<TypeSystemDescription>();
        LinkedList<TypePriorities> priorities = new LinkedList<TypePriorities>();
        LinkedList<FsIndexCollection> fsIndexes = new LinkedList<FsIndexCollection>();
        int index = 0;
        this.taes = new TextAnalysisEngine[taeDescs.size()];
        HashMap<String, Properties> props = new HashMap<String, Properties>();
        props.put("PERFORMANCE_TUNING_SETTINGS", perfProps);
        for (TaeDescription taeDesc : taeDescs) {
            this.taes[index] = UIMAFramework.produceTAE((ResourceSpecifier)taeDesc, (ResourceManager)manager, props);
            AnalysisEngineMetaData metaData = this.taes[index].getAnalysisEngineMetaData();
            systemDescs.add(metaData.getTypeSystem());
            priorities.add(metaData.getTypePriorities());
            fsIndexes.add(metaData.getFsIndexCollection());
            ++index;
        }
        this.tcas = CasCreationUtils.createTCas((TypeSystemDescription)CasCreationUtils.mergeTypeSystems(systemDescs, (ResourceManager)manager), (TypePriorities)CasCreationUtils.mergeTypePriorities(priorities, (ResourceManager)manager), (FsIndexDescription[])CasCreationUtils.mergeFsIndexes(fsIndexes, (ResourceManager)manager).getFsIndexes(), (Properties)perfProps, (ResourceManager)manager);
        this.compile(supaDirPath, taeSpecs, this.tcas.getTypeSystem());
    }

    private void compile(String baseDirPath, List<IndexedTAESpec> taeSpecs, TypeSystem aTypeSystem) throws IndexingException, InvalidXMLException, IOException {
        ArrayList<CompiledIndexBuildItem> result = new ArrayList<CompiledIndexBuildItem>();
        for (IndexedTAESpec taeSpec : taeSpecs) {
            IndexBuildItem[] items;
            if (taeSpec.getIndexBuildSpecificationPath() == null || taeSpec.getIndexBuildSpecificationPath().length() == 0) continue;
            File indxSpecFile = new File(baseDirPath, taeSpec.getIndexBuildSpecificationPath());
            IndexBuildSpecification spec = (IndexBuildSpecification)UIMAFramework.getXMLParser().parse(new XMLInputSource(indxSpecFile));
            for (IndexBuildItem item : items = spec.getIndexBuildItems()) {
                String name;
                CompiledIndexBuildItem compiledItem = new CompiledIndexBuildItem();
                compiledItem.name = name = item.getName();
                if (name.endsWith(".*")) {
                    compiledItem.type = null;
                    compiledItem.typePrefix = name.substring(0, name.length() - 2);
                } else {
                    compiledItem.type = aTypeSystem.getType(item.getName());
                    if (compiledItem.type == null) {
                        UIMAFramework.getLogger(this.getClass()).log(Level.WARNING, "Unknown type \"" + item.getName() + "\" in IndexBuildSpecification.");
                    }
                }
                Filter filter = item.getFilter();
                compiledItem.filter = filter == null ? null : FilterParser.parse(filter);
                compiledItem.rule = item.getIndexRule();
                result.add(compiledItem);
                if (taeSpec.getAttributeMappings() == null) continue;
                Style[] styles = item.getIndexRule().getStyles();
                for (int j = 0; j < styles.length; ++j) {
                    List<Mapping> attrMappings;
                    String styleName = styles[j].getName();
                    if (styleName != "Term" && styleName != "Breaking" && styleName != "Annotation") {
                        UIMAFramework.getLogger(this.getClass()).log(Level.WARNING, "Unsupported style \"" + styleName + "\" in IndexBuildSpecification.");
                    }
                    if (styleName != "Annotation" || (attrMappings = taeSpec.getAttributeMappings().get(item.getName())) == null) continue;
                    compiledItem.attributeMappings = new HashMap<String, String>();
                    for (Mapping mapping : attrMappings) {
                        compiledItem.attributeMappings.put(mapping.getFeature(), mapping.getIndexName());
                    }
                }
            }
        }
        this.compiledItems = result.toArray(new CompiledIndexBuildItem[result.size()]);
    }

    private void initUimaTae(String aTaeDescriptorName, String uimaDataPath, String initSize) throws TokenizerException {
        try {
            ResourceManager rsrcMgr = UIMAFramework.newDefaultResourceManager();
            if (uimaDataPath != null) {
                rsrcMgr.setDataPath(uimaDataPath);
            }
            Properties perfProps = UIMAFramework.getDefaultPerformanceTuningProperties();
            if (initSize != null) {
                perfProps.setProperty("cas_initial_heap_size", initSize);
            }
            this.loadEnginesAndTCas(this.loadDescription(new File(aTaeDescriptorName)), rsrcMgr, perfProps);
            LowLevelCAS llCas = this.tcas.getLowLevelCAS();
            this.xll = new XMLLowLevelCASSupport(this.tcas);
            this.documentTypeCode = llCas.ll_getTypeSystem().ll_getCodeForTypeName("uima.tcas.DocumentAnnotation");
            this.useNgramForCJKFeatureCode = llCas.ll_getTypeSystem().ll_getCodeForFeatureName("uima.tcas.DocumentAnnotation:useNgramForCJK");
            this.indexAllLemmasFeatureCode = llCas.ll_getTypeSystem().ll_getCodeForFeatureName("uima.tcas.DocumentAnnotation:indexAllLemmas");
            this.fallbackLanguageFeatureCode = llCas.ll_getTypeSystem().ll_getCodeForFeatureName("uima.tcas.DocumentAnnotation:fallbackLanguage");
            this.xmlAttrValueTextBeginFeatureCode = llCas.ll_getTypeSystem().ll_getCodeForFeatureName("uima.tcas.DocumentAnnotation:xmlAttrValueTextBegin");
            this.usePuncForMathSymbol = AbstractCASTokenizer.getConfigParameterValue(this.taes[0], UIMA_PARAM_USE_PUNC_FOR_MATH, true);
            this.usePuncForCurrencySymbol = AbstractCASTokenizer.getConfigParameterValue(this.taes[0], UIMA_PARAM_USE_PUNC_FOR_CURRENCY, true);
        }
        catch (IOException e) {
            Message msg = new Message("P6000E.CANNOT_CREATE_CAS_TOKENIZER");
            msg.addArgument(aTaeDescriptorName);
            msg.addArgument(uimaDataPath);
            throw new TokenizerException(msg, (Throwable)e);
        }
        catch (InvalidXMLException e) {
            Message msg = new Message("P6000E.CANNOT_CREATE_CAS_TOKENIZER");
            msg.addArgument(aTaeDescriptorName);
            msg.addArgument(uimaDataPath);
            throw new TokenizerException(msg, (Throwable)e);
        }
        catch (ResourceInitializationException e) {
            Message msg = new Message("P6000E.CANNOT_CREATE_CAS_TOKENIZER");
            msg.addArgument(aTaeDescriptorName);
            msg.addArgument(uimaDataPath);
            throw new TokenizerException(msg, (Throwable)e);
        }
        catch (IndexingException e) {
            Message msg = new Message("P6000E.CANNOT_CREATE_CAS_TOKENIZER");
            msg.addArgument(aTaeDescriptorName);
            msg.addArgument(uimaDataPath);
            throw new TokenizerException(msg, (Throwable)e);
        }
    }

    private void destroyTAE() {
        this.tcas = null;
        this.xll = null;
        if (this.taes != null) {
            for (TextAnalysisEngine tae : this.taes) {
                tae.destroy();
            }
            this.taes = null;
        }
    }

    protected boolean lemmaEqualsOriginal(CharSequence original, CharSequence lemma) {
        int lLen;
        int oLen = original.length();
        if (oLen != (lLen = lemma.length())) {
            return false;
        }
        if (Character.toLowerCase(original.charAt(0)) != Character.toLowerCase(lemma.charAt(0))) {
            return false;
        }
        for (int i = lLen - 1; i > 0; --i) {
            if (Character.toLowerCase(original.charAt(i)) == Character.toLowerCase(lemma.charAt(i))) continue;
            return false;
        }
        return true;
    }

    protected void generateRegularTokens(List<TToken> tokens, TokenIterator it, boolean isRuntime) {
        tokens.clear();
        CharSequence orig = it.getOriginal();
        boolean equalLemma = false;
        if (this.indexAllLemmas) {
            int i;
            List<CharSequence> lemmas = it.getLemmas(this.lemmaList);
            if (lemmas != null) {
                for (i = lemmas.size() - 1; i >= 0; --i) {
                    if (!this.lemmaEqualsOriginal(orig, lemmas.get(i))) continue;
                    lemmas.remove(i);
                    equalLemma = true;
                }
                if (lemmas.size() == 0) {
                    lemmas = null;
                }
            }
            if (equalLemma) {
                tokens.add(TToken.newToken(orig, TToken.Type.BOTH));
            } else {
                tokens.add(TToken.newToken(orig, TToken.Type.ORIGINAL));
            }
            if (lemmas != null) {
                for (i = 0; i < lemmas.size(); ++i) {
                    tokens.add(TToken.newToken(lemmas.get(i), TToken.Type.LEMMA));
                }
            }
        } else {
            CharSequence lemma = it.getLemma();
            if (lemma != null && this.lemmaEqualsOriginal(orig, lemma)) {
                lemma = null;
                equalLemma = true;
            }
            if (equalLemma) {
                tokens.add(TToken.newToken(orig, TToken.Type.BOTH));
            } else {
                tokens.add(TToken.newToken(orig, TToken.Type.ORIGINAL));
                if (lemma != null) {
                    tokens.add(TToken.newToken(lemma, TToken.Type.LEMMA));
                }
            }
        }
    }

    protected static boolean getConfigParameterValue(TextAnalysisEngine tae, String param, boolean defaultValue) {
        Object v = tae.getConfigParameterValue(param);
        if (v != null && v instanceof Boolean) {
            return (Boolean)v;
        }
        return defaultValue;
    }

    static class CompiledIndexBuildItem {
        Type type;
        String typePrefix;
        String name;
        IndexRule rule;
        FSMatchConstraint filter;
        Map<String, String> attributeMappings;

        CompiledIndexBuildItem() {
        }

        boolean matches(AnnotationFS aAnnotation) {
            if (!this.matchesType(aAnnotation.getType())) {
                return false;
            }
            return this.filter == null || this.filter.match((FeatureStructure)aAnnotation);
        }

        boolean matchesType(Type annotationType) {
            return !(this.type != null ? annotationType != this.type : this.typePrefix == null || !annotationType.getName().startsWith(this.typePrefix));
        }
    }
}

