/*
 * Decompiled with CFR 0.152.
 */
package com.ibm.es.nuvo.tokenizer.annotators;

import com.ibm.es.nuvo.tokenizer.annotators.AbstractTextAnnotator;
import com.ibm.icu.lang.UScript;
import com.ibm.uima.analysis_engine.ResultSpecification;
import com.ibm.uima.analysis_engine.annotator.AnnotatorConfigurationException;
import com.ibm.uima.analysis_engine.annotator.AnnotatorContext;
import com.ibm.uima.analysis_engine.annotator.AnnotatorContextException;
import com.ibm.uima.analysis_engine.annotator.AnnotatorInitializationException;
import com.ibm.uima.analysis_engine.annotator.AnnotatorProcessException;
import com.ibm.uima.cas.FeatureStructure;
import com.ibm.uima.cas.TypeSystem;
import com.ibm.uima.cas.impl.LowLevelCAS;
import com.ibm.uima.cas.text.AnnotationFS;
import com.ibm.uima.cas.text.TCAS;

public class QuickLanguageIDAnnotator
extends AbstractTextAnnotator {
    private static final String copyright = "IBM Confidential OCO Source Materials 5724-R21 \u00a9 Copyright IBM Corp.  2006, 2007.   All Rights Reserved. The source code for this program is not published or otherwise divested of its trade secrets, irrespective of what has been deposited with the U.S. Copyright Office.";
    public static final String UNSPECIFIED_LANGUAGE = "x-unspecified";
    public static final String DEFAULT_CJK_LANGUAGE_PARM = "DefaultCJKLanguage";
    private String defaultCJKLanguage = "ja";
    private int languageFeatureCode;
    private int fallbackLanguageFeatureCode;

    public void process(TCAS cas, ResultSpecification resultSpec) throws AnnotatorProcessException {
        AnnotationFS docFS;
        int addr;
        String existingLanguage = cas.getDocumentLanguage();
        if (existingLanguage != null && existingLanguage.length() > 0 && !existingLanguage.equals(UNSPECIFIED_LANGUAGE)) {
            return;
        }
        LowLevelCAS llcas = cas.getLowLevelCAS();
        String fallbackLanguage = llcas.ll_getStringValue(addr = llcas.ll_getFSRef((FeatureStructure)(docFS = cas.getDocumentAnnotation())), this.fallbackLanguageFeatureCode);
        if (fallbackLanguage == null || fallbackLanguage.length() == 0) {
            fallbackLanguage = UNSPECIFIED_LANGUAGE;
        }
        String computedLang = this.identify(cas.getDocumentText(), fallbackLanguage);
        llcas.ll_setStringValue(addr, this.languageFeatureCode, computedLang);
    }

    private String identify(String text, String fallback) {
        boolean isHanScriptFound = false;
        for (int i = 0; i < text.length(); ++i) {
            char c = text.charAt(i);
            switch (UScript.getScript((int)c)) {
                case 2: {
                    return "ar";
                }
                case 19: {
                    return "he";
                }
                case 20: 
                case 22: {
                    return "ja";
                }
                case 24: {
                    return "lo";
                }
                case 27: {
                    return "mn";
                }
                case 38: {
                    return "th";
                }
                case 41: {
                    return "yi";
                }
                case 18: {
                    return "ko";
                }
                case 17: {
                    isHanScriptFound = true;
                }
            }
        }
        if (isHanScriptFound) {
            if (fallback != null && fallback.length() >= 2) {
                String twoLetter = fallback.substring(0, 2).toLowerCase();
                if (!("ja".equals(twoLetter) || "ko".equals(twoLetter) || "zh".equals(twoLetter))) {
                    return this.defaultCJKLanguage;
                }
                return fallback;
            }
            return this.defaultCJKLanguage;
        }
        return fallback;
    }

    public void initialize(AnnotatorContext context) throws AnnotatorInitializationException, AnnotatorConfigurationException {
        try {
            String lang = (String)context.getConfigParameterValue(DEFAULT_CJK_LANGUAGE_PARM);
            if (lang != null) {
                this.defaultCJKLanguage = lang;
            }
        }
        catch (AnnotatorContextException e) {
            throw new AnnotatorConfigurationException((Throwable)e);
        }
    }

    public void reconfigure() throws AnnotatorConfigurationException, AnnotatorInitializationException {
    }

    public void destroy() {
    }

    public void typeSystemInit(TypeSystem ts) throws AnnotatorInitializationException, AnnotatorConfigurationException {
        this.languageFeatureCode = QuickLanguageIDAnnotator.getFeatureCodeForName("uima.tcas.DocumentAnnotation:language", ts.getLowLevelTypeSystem());
        this.fallbackLanguageFeatureCode = QuickLanguageIDAnnotator.getFeatureCodeForName("uima.tcas.DocumentAnnotation:fallbackLanguage", ts.getLowLevelTypeSystem());
    }
}

