/*
 * Decompiled with CFR 0.152.
 */
package com.ibm.supa.tokenizers;

import com.ibm.dltj.DLTException;
import com.ibm.dltj.Dictionary;
import com.ibm.dltj.GlossCollection;
import com.ibm.dltj.gloss.LemmaGenerator;
import com.ibm.dltj.gloss.MidGloss;
import com.ibm.es.nuvo.tokenizer.TToken;
import com.ibm.supa.common.SUPAUtils;
import com.ibm.supa.common.text.CharSequenceIterator;
import com.ibm.supa.common.text.TextUtils;
import com.ibm.supa.tokenizers.ChainedDelegateTokenizer;
import com.ibm.supa.tokenizers.DictionaryCache;
import com.ibm.supa.tokenizers.TTokenUtils;
import com.ibm.supa.tokenizers.TokenizationParams;
import java.text.CharacterIterator;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class FastLemmatizationTokenizer
extends ChainedDelegateTokenizer {
    private ThreadLocal<Set<String>> testedLanguages = SUPAUtils.createThreadLocal(HashSet.class);
    private ThreadLocal<Set<String>> testedWords = SUPAUtils.createThreadLocal(HashSet.class);
    private Set<String> additionalLanguages;
    private DictionaryCache cache;

    public void setAdditionalLanguages(Set<String> additionalLanguages) {
        this.additionalLanguages = additionalLanguages;
    }

    public void setDictionaryCache(DictionaryCache cache) {
        this.cache = cache;
    }

    @Override
    protected void tokenize(TokenizationParams params, TToken token, List<TToken> accumulator) {
        this.testedLanguages.get().clear();
        this.tokenize(params.defaultLanguage, (CharSequence)token, accumulator);
        this.tokenize(params.detectedLanguage, (CharSequence)token, accumulator);
        this.tokenize(params.knownLanguage, (CharSequence)token, accumulator);
        for (String lang : this.additionalLanguages) {
            this.tokenize(lang, (CharSequence)token, accumulator);
        }
        this.testedLanguages.get().clear();
    }

    private void tokenize(String lang, CharSequence tokenText, List<TToken> accumulator) {
        if (lang == null || !this.testedLanguages.get().add(lang)) {
            return;
        }
        Set<String> wordsTested = this.testedWords.get();
        try {
            wordsTested.clear();
            Dictionary dict = this.cache.load(lang);
            GlossCollection gc = null;
            if (dict != null) {
                gc = dict.lookupWord((CharacterIterator)new CharSequenceIterator(tokenText), tokenText.length());
            }
            if (gc == null) {
                accumulator.add(TTokenUtils.newToken(tokenText, TToken.Type.ORIGINAL));
                return;
            }
            boolean first = true;
            for (Object oGloss : gc) {
                if (!MidGloss.class.isInstance(oGloss)) continue;
                MidGloss midGloss = (MidGloss)oGloss;
                LemmaGenerator lemmaGen = midGloss.getLemmaGloss();
                if (lemmaGen != null) {
                    String lemma = lemmaGen.getLemma((CharacterIterator)new CharSequenceIterator(tokenText), 0, tokenText.length());
                    if (!wordsTested.add(lemma)) continue;
                    if (first) {
                        if (TextUtils.LEX_CASE_INSENSITIVE_ORDER.compare(lemma, tokenText) == 0) {
                            accumulator.add(TTokenUtils.newToken(tokenText, TToken.Type.BOTH));
                        } else {
                            accumulator.add(TTokenUtils.newToken(tokenText, TToken.Type.ORIGINAL));
                            accumulator.add(TTokenUtils.newToken((CharSequence)lemma, TToken.Type.LEMMA));
                        }
                    } else {
                        accumulator.add(TTokenUtils.newToken((CharSequence)lemma, TToken.Type.LEMMA));
                    }
                } else if (first) {
                    accumulator.add(TTokenUtils.newToken(tokenText, TToken.Type.ORIGINAL));
                }
                first = false;
            }
        }
        catch (DLTException e) {
            throw new RuntimeException(e);
        }
        finally {
            wordsTested.clear();
        }
    }
}

